In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report  # Add these imports
import joblib

# Load the data
df = pd.read_csv('data.csv')

# Prepare features (X) and target variable (y)
X = df.drop('Grade', axis=1)
y = df['Grade'].astype('category').cat.codes

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save the trained model using joblib
joblib.dump(rf_model, 'random_forest_model.pkl')

# Optionally, print model evaluation metrics
y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
classification_report_rf = classification_report(y_test, y_pred_rf)
print(f"\nRandom Forest Accuracy: {accuracy_rf}")
print("Random Forest Classification Report:\n", classification_report_rf)



Random Forest Accuracy: 0.9433962264150944
Random Forest Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99        78
           1       0.96      0.90      0.93        86
           2       0.84      0.96      0.89        48

    accuracy                           0.94       212
   macro avg       0.93      0.95      0.94       212
weighted avg       0.95      0.94      0.94       212

