In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load data from Excel file
file_path = "C:\\Python312\\output_core_data.xlsx"  # Replace with the actual file path
df = pd.read_excel(file_path)

# Define migration thresholds for each feature
cpu_threshold = 1.2
vir_mem_threshold = 5000000000  # 7 billion
phy_mem_threshold = 1000000000  # 1 billion
net_bytes_threshold = 300

# Create binary 'migration' target variable based on thresholds
df['migration'] = (
    (df['core_value'] > cpu_threshold) &
    (df['vir_mem_value'] > vir_mem_threshold) &
    (df['phy_mem_value'] > phy_mem_threshold) &
    (df['data_net_bytes_total']> net_bytes_threshold)
).astype(int)

print(df['migration'])
# Features and target
X = df[['core_value', 'vir_mem_value', 'phy_mem_value', 'data_net_bytes_total']]
y = df['migration']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForestClassifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predictions
predictions = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)


0      1
1      0
2      1
3      0
4      1
      ..
171    0
172    0
173    0
174    0
175    0
Name: migration, Length: 176, dtype: int32
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        31
           1       1.00      1.00      1.00         5

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

