In [1]:
# Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

In [2]:
# Load the datasets
df_macbook_pro1 = pd.read_csv('datasets/MacBookPro1.csv')
df_macbook_pro2 = pd.read_csv('datasets/MacBookPro2.csv')
df_raspberry_pi = pd.read_csv('datasets/RasberryPi.csv')
df_vm = pd.read_csv('datasets/VM.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/MacBookPro1.csv'

In [None]:
# Pre-processing steps to make ready everything before process with model training

In [None]:
# Convert the 'Time' column to datetime and extract features
datasets = [df_macbook_pro1, df_macbook_pro2, df_raspberry_pi, df_vm]
server_names = ['MacBookPro1', 'MacBookPro2', 'RaspberryPi', 'VM']

for df, name in zip(datasets, server_names):
    df['Time'] = pd.to_datetime(df['Time'])
    df['Hour'] = df['Time'].dt.hour
    df['DayOfWeek'] = df['Time'].dt.dayofweek
    df['ServerType'] = name

In [None]:
# Combine datasets
df_combined = pd.concat(datasets).reset_index(drop=True)

In [None]:
# One-hot encode the 'ServerType' column
df_combined = pd.get_dummies(df_combined, columns=['ServerType'], drop_first=True)

In [None]:
# Drop the 'Time' column as it's no longer needed
df_combined.drop('Time', axis=1, inplace=True)

In [None]:
# Prepare features and target variable
X = df_combined.drop('Execution Time', axis=1)
y = df_combined['Execution Time']

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
# Predict and evaluate
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)

In [None]:
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'Root Mean Squared Error: {rmse}')