In [None]:
from google.colab import files

uploaded = files.upload()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv("StudentsPerformance.csv")

# create a new column to calculate average score
df['average_score'] = df[['math score', 'reading score', 'writing score']].mean(axis=1)

# convert average score to grades
def convert_to_grade(score):
    if score >= 70:
        return 'A'
    elif score >= 60:
        return 'B'
    elif score >= 50:
        return 'C'
    elif score >= 45:
        return 'D'
    elif score >= 40:
        return 'E'
    else:
        return 'F'

df['grade'] = df['average_score'].apply(convert_to_grade)

# create new columns to calculate range of scores and check for any failed subject.
df['score_range'] = df[['math score', 'reading score', 'writing score']].max(axis=1) - df[['math score', 'reading score', 'writing score']].min(axis=1)
df['has_failed_subject'] = ((df[['math score', 'reading score', 'writing score']] < 40).any(axis=1)).astype(int)

# drop 'average_score' column to avoid clash with grades
df.drop(['average_score'], axis=1, inplace=True)

# identify target features
X = df.drop('grade', axis=1)
y = df['grade']

# convert categorcal text columns to binary
X = pd.get_dummies(X, drop_first=True)


In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest with balanced class weights
model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.925
Classification Report:
               precision    recall  f1-score   support

           A       0.95      0.98      0.96        90
           B       0.95      0.79      0.86        48
           C       0.81      1.00      0.90        35
           D       1.00      0.79      0.88        14
           E       1.00      1.00      1.00         3
           F       1.00      1.00      1.00        10

    accuracy                           0.93       200
   macro avg       0.95      0.93      0.93       200
weighted avg       0.93      0.93      0.92       200



In [None]:
# create data for a new student
new_student = pd.DataFrame([{
    'gender': 'male',
    'race/ethnicity': 'group C',
    'parental level of education': "bachelor's degree",
    'lunch': 'standard',
    'test preparation course': 'completed',
    'math score': 75,
    'reading score': 80,
    'writing score': 77
}])

# create range and failed subject columns as done before training
new_student['score_range'] = new_student[['math score', 'reading score', 'writing score']].max(axis=1) - new_student[['math score', 'reading score', 'writing score']].min(axis=1)
new_student['has_failed_subject'] = ((new_student[['math score', 'reading score', 'writing score']] < 40).any(axis=1)).astype(int)

# encode new student
new_student_encoded = pd.get_dummies(new_student, drop_first=True)
new_student_final = new_student_encoded.reindex(columns=X.columns, fill_value=0)

# predict the grade
predicted_grade = model.predict(new_student_final)
print("The Student's Predicted grade is:", predicted_grade[0])

The Student's Predicted grade is: A
