In [17]:
pip uninstall -y scikit-learn imbalanced-learn

Found existing installation: scikit-learn 1.4.0
Uninstalling scikit-learn-1.4.0:
  Successfully uninstalled scikit-learn-1.4.0
Found existing installation: imbalanced-learn 0.14.1
Uninstalling imbalanced-learn-0.14.1:
  Successfully uninstalled imbalanced-learn-0.14.1
Note: you may need to restart the kernel to use updated packages.


In [18]:
!pip install --upgrade scikit-learn==1.4.0

Collecting scikit-learn==1.4.0
  Obtaining dependency information for scikit-learn==1.4.0 from https://files.pythonhosted.org/packages/a8/e9/3e4879974a7c4dcaca2a746dde3df08d0ae8f14c74b03591616ce5f0a8b1/scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl.metadata
  Using cached scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl.metadata (11 kB)
Using cached scikit_learn-1.4.0-1-cp311-cp311-win_amd64.whl (10.6 MB)
Installing collected packages: scikit-learn
Successfully installed scikit-learn-1.4.0


In [19]:

!pip install xg


Collecting imbalanced-learn
  Obtaining dependency information for imbalanced-learn from https://files.pythonhosted.org/packages/c7/b5/56f1ceb568676c0231d12b2fed17ebfd606dd1f627e7372aaed5dd56bd97/imbalanced_learn-0.14.1-py3-none-any.whl.metadata
  Using cached imbalanced_learn-0.14.1-py3-none-any.whl.metadata (8.9 kB)
Collecting scikit-learn<2,>=1.4.2 (from imbalanced-learn)
  Obtaining dependency information for scikit-learn<2,>=1.4.2 from https://files.pythonhosted.org/packages/89/3c/45c352094cfa60050bcbb967b1faf246b22e93cb459f2f907b600f2ceda5/scikit_learn-1.8.0-cp311-cp311-win_amd64.whl.metadata
  Using cached scikit_learn-1.8.0-cp311-cp311-win_amd64.whl.metadata (11 kB)
Using cached imbalanced_learn-0.14.1-py3-none-any.whl (235 kB)
Using cached scikit_learn-1.8.0-cp311-cp311-win_amd64.whl (8.1 MB)
Installing collected packages: scikit-learn, imbalanced-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.4.0
    Uninstalling scikit-learn-1.4.0:

In [12]:
!pip install xgboost
import sklearn
print(sklearn.__version__)

Collecting xgboost
  Downloading https://files.pythonhosted.org/packages/24/14/d9ecb9fa86727f51bfb35f1c2b0428ebc6cd5ffde24c5e2dc583d3575a6f/xgboost-1.6.2-py3-none-win_amd64.whl (125.4MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.6.2
1.0.2


In [19]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns
from imblearn.over_sampling import SMOTE, ADASYN
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

In [20]:
file_path = 'AI_Data_CSE_final.csv'
df = pd.read_csv(file_path)

In [21]:
# Calculate the average of all semester marks for each student
semester_cols = ['Marks_sem1 ', 'Marks_sem2', 'Marks_sem3', 'Marks_sem4',
                 'Marks_sem5', 'Marks_sem6', 'Marks_sem7', 'Marks_sem8']
df['Aggregate_Marks'] = df[semester_cols].mean(axis=1)
# Calculate the Global Average (Mean of the entire class)
global_average = df['Aggregate_Marks'].mean()
print(f"Global Class Average: {global_average:.2f}%")

Global Class Average: 78.32%


In [22]:
# Define the classification function
def classify_learner(score):
    if score >= 90:
        return 'Advance'
    elif score < global_average:
        return 'Slow'
    else:
        return 'Moderate'
# Apply the classification
df['Learner_Segment'] = df['Aggregate_Marks'].apply(classify_learner)

# Check the distribution of segments
print("\nStudent Segments Count:")
print(df['Learner_Segment'].value_counts())


Student Segments Count:
Moderate    128
Slow         84
Advance       7
Name: Learner_Segment, dtype: int64


In [23]:
# 3. Prepare Data for Random Forest
# We select features that might predict the learner type (excluding the marks used to derive the label)
# Using features like previous academic history, attendance, skills, etc.
feature_columns = ['Marks_sem1 ', 'Marks_sem2', 'Marks_sem3', 'Marks_sem4',
                 'Marks_sem5', 'Marks_sem6', 'Marks_sem7', 'Marks_sem8']

X = df[feature_columns]
y = df['Learner_Segment']

# Handle any missing values (filling with mean for simplicity)
X = X.fillna(X.median())

# Encode the target labels (Slow, Moderate, Advance) into numbers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [24]:
X = df[feature_columns]
X.fillna(X.median(), inplace=True)
le = LabelEncoder()
y = le.fit_transform(df['Learner_Segment'])

# Adjust k_neighbors for SMOTE to be less than the number of minority samples (which is 5 for 'Advanced')
smote = SMOTE(sampling_strategy='minority', k_neighbors=4)
X_sm, y_sm = smote.fit_resample(X, y)

minmax = MinMaxScaler()
X_n = minmax.fit_transform(X_sm)

X_train, X_test, y_train, y_test = train_test_split(
    X_n, y_sm, test_size=0.2, random_state=16)

clf = RandomForestClassifier(n_estimators=500)
clf.fit(X_train, y_train)

# --- Metrics & report ---
pred = clf.predict(X_test)
print("Classification report:\n", classification_report(y_test, pred, target_names=le.classes_))

# --- Save model (pickle) ---
with open('Student_Grading_Model.pkl', 'wb') as f:
    pickle.dump({'model': clf, 'label_encoder': le}, f)

# --- Save updated dataset with LearnerType ---
df.to_csv('updated_with_learnerType.csv', index=False)
print("Saved: learner_model.pkl, updated_with_learnerType.csv")


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)


Classification report:
               precision    recall  f1-score   support

     Advance       1.00      1.00      1.00        25
    Moderate       0.97      1.00      0.98        28
        Slow       1.00      0.93      0.97        15

    accuracy                           0.99        68
   macro avg       0.99      0.98      0.98        68
weighted avg       0.99      0.99      0.99        68

Saved: learner_model.pkl, updated_with_learnerType.csv
