In [5]:
import pandas as pd
import joblib

In [6]:
def preprocess_data():
    # Read the individual and collective monitoring CSV files
    individual_df = pd.read_csv('processed_individual_monitoring.csv')
    real_scenario_df = pd.read_csv('processed_real_scenario.csv')

    # Merge the DataFrames on the Identifier column
    merged_df = pd.merge(real_scenario_df, individual_df, on='Identifier', suffixes=('', '_Individual'))
    
    # Add the _Individual suffix to CPU Load and Memory Load columns
    if 'CPU Load' in merged_df.columns:
        merged_df.rename(columns={'CPU Load': 'CPU Load_Individual'}, inplace=True)
    if 'Memory Load' in merged_df.columns:
        merged_df.rename(columns={'Memory Load': 'Memory Load_Individual'}, inplace=True)

    # Remove the categorical columns
    numerical_df = merged_df.drop(columns=['URI', 'Method', 'Pricing', 'Identifier'])

    # Group by Timestamp and aggregate the individual numerical values
    aggregated_df = numerical_df.groupby('Timestamp').agg(['sum', 'mean', 'max', 'min'])

    # Flatten the MultiIndex columns
    aggregated_df.columns = ['_'.join(col).strip() for col in aggregated_df.columns.values]

    # Drop the Timestamp column
    aggregated_df = aggregated_df.reset_index(drop=True)

    return aggregated_df

In [7]:
def load_and_predict(target_column, model_type):
    # Load the new data
    df = preprocess_data()

    # Load the feature selector and model
    selector = joblib.load(f'feature_selector_{target_column}_{model_type}.joblib')
    model = joblib.load(f'{model_type}_model_{target_column}.joblib')

    # Ensure the target column is not in the features
    if target_column in df.columns:
        df = df.drop(columns=[target_column])

    # Select the features used in training
    X_new = selector.transform(df)

    # Make predictions
    predictions = model.predict(X_new)

    return predictions

In [8]:
# Predict using the linear model for CPU Load
cpu_load_linear_predictions = load_and_predict('CPU Load_mean', 'knn')
print(f'CPU Load predictions using knn model: {cpu_load_linear_predictions}')

CPU Load predictions using knn model: [85.678 90.432 90.432 86.438 86.958 87.578 85.678 86.958 51.79  90.432
 86.438 85.678 85.678 85.678 84.986 86.438 90.432 89.748 84.29  84.29
 84.986 85.678 75.186 64.364 79.398 65.964 50.122 50.122 47.064 48.632
 46.484 64.364 48.632 70.268 67.77  60.088 67.092 77.65  66.922 46.484
 70.268 75.944 58.074 76.024 64.364 69.208 44.848 48.632 58.074 46.484
 54.222 70.268 48.632 54.222 79.398 48.632]


In [4]:
# Predict using the linear model for CPU Load
cpu_load_linear_predictions = load_and_predict('CPU Load_mean', 'linear')
print(f'CPU Load predictions using linear model: {cpu_load_linear_predictions}')

# Predict using the linear model for Memory Load
memory_load_linear_predictions = load_and_predict('Memory Load_mean', 'linear')
print(f'Memory Load predictions using linear model: {memory_load_linear_predictions}')

# Predict using the random forest model for CPU Load
cpu_load_rf_predictions = load_and_predict('CPU Load_mean', 'random_forest')
print(f'CPU Load predictions using random forest model: {cpu_load_rf_predictions}')

# Predict using the random forest model for Memory Load
memory_load_rf_predictions = load_and_predict('Memory Load_mean', 'random_forest')
print(f'Memory Load predictions using random forest model: {memory_load_rf_predictions}')

# Predict using the SVM model for CPU Load
cpu_load_svm_predictions = load_and_predict('CPU Load_mean', 'svm')
print(f'CPU Load predictions using SVM model: {cpu_load_svm_predictions}')

# Predict using the SVM model for Memory Load
memory_load_svm_predictions = load_and_predict('Memory Load_mean', 'svm')
print(f'Memory Load predictions using SVM model: {memory_load_svm_predictions}')

CPU Load predictions using linear model: [ 87.81547689 149.2637535  110.5420533  102.08244909  88.25740479
 102.33048298  71.34529221  98.19695694  61.03456621  99.68734928
  88.03671303  87.95094213  90.150906    77.02407501  82.79500821
 116.09585256  79.67088834  79.10102416  86.48588402  89.89343212
  67.94403548  90.55078562  64.45032825  62.23188775  62.32134893
  66.29608321  62.05309734  65.0877596   63.52629511  64.48055455
  62.00626883  63.67355705  64.68015514  59.96838666  62.3313466
  66.1997587   61.3174933   65.55724205  66.5723525   62.37299408
  60.31294902  61.01603687  64.04829898  62.79274283  65.25446034
  63.15394121  54.39182714  67.16782207  63.18481171  63.16499573
  59.79575203  62.83605009  64.34037269  61.6500181   62.31608122
  64.78222614]
Memory Load predictions using linear model: [3.53752549 3.56639682 3.57845533 3.57716543 3.57177353 3.57171993
 3.57610779 3.57512287 3.53933676 3.55900839 3.56455594 3.56727285
 3.57287232 3.57004672 3.56874195 3.53437