In [25]:
from google.colab import files
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import xgboost as xgb
from scipy.stats import uniform
from sklearn.metrics import confusion_matrix


uploaded = files.upload()

data = pd.read_csv(next(iter(uploaded)))


label_encoder = LabelEncoder()
data['Task Type'] = label_encoder.fit_transform(data['Task Type'])

y = data['Processor ID']
label_encoder_y = LabelEncoder()
y_encoded = label_encoder_y.fit_transform(y)

X = data[['Task Weight', 'Task Type', 'Processor Load', 'Task Execution Time']]

scaler = StandardScaler()
X.loc[:, ['Task Weight', 'Processor Load', 'Task Execution Time']] = scaler.fit_transform(X[['Task Weight', 'Processor Load', 'Task Execution Time']])

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

xg_model = xgb.XGBClassifier(random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'learning_rate': uniform(0.01, 0.1),
    'max_depth': [3, 5, 7, 9],
    'min_child_weight': [1, 3, 5],
    'subsample': uniform(0.7, 0.3),
    'colsample_bytree': uniform(0.7, 0.3),
    'gamma': uniform(0.1, 0.5)
}

random_search = RandomizedSearchCV(estimator=xg_model, param_distributions=param_dist,
                                   n_iter=100, cv=3, n_jobs=-1, random_state=42, verbose=2)
random_search.fit(X_train, y_train)

print("Best Parameters from RandomizedSearchCV:", random_search.best_params_)

best_xg_model = random_search.best_estimator_

y_pred = best_xg_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


new_task = pd.DataFrame({
    'Task Weight': [1],
    'Task Type': [label_encoder.transform(['I/O-intensive'])[0]],
    'Processor Load': [4],
    'Task Execution Time': [5]
})

new_task_scaled = scaler.transform(new_task[['Task Weight', 'Processor Load', 'Task Execution Time']])

new_task_scaled_with_type = pd.DataFrame(new_task_scaled, columns=['Task Weight', 'Processor Load', 'Task Execution Time'])
new_task_scaled_with_type['Task Type'] = new_task['Task Type'].values

new_task_scaled_with_type = new_task_scaled_with_type[['Task Weight', 'Task Type', 'Processor Load', 'Task Execution Time']]

predicted_processor = best_xg_model.predict(new_task_scaled_with_type)
predicted_processor_label = label_encoder_y.inverse_transform(predicted_processor)
print(f"The predicted processor for the new task is: Processor {predicted_processor_label[0]}")



Saving load_balancing_data_synthetic.csv to load_balancing_data_synthetic (11).csv
Fitting 3 folds for each of 100 candidates, totalling 300 fits
Best Parameters from RandomizedSearchCV: {'colsample_bytree': np.float64(0.7354494482864968), 'gamma': np.float64(0.44836858268207536), 'learning_rate': np.float64(0.07289428467798839), 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': np.float64(0.9902741654560742)}
Accuracy: 85.00%
The predicted processor for the new task is: Processor 1
