In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [19]:
#Loading the cleaned data
df = pd.read_csv('model_data.csv')
df.head()

Unnamed: 0,StudentID,Age,Gender,Ethnicity,ParentalEducation,StudyTimeWeekly,Absences,Tutoring,ParentalSupport,Extracurricular,Sports,Music,Volunteering,GPA,GradeClass
0,1001,17,1,0,2,19.833723,7,1,2,0,0,1,0,2.929196,2.0
1,1002,18,0,0,1,15.408756,0,0,1,0,0,0,0,3.042915,1.0
2,1003,15,0,2,3,4.21057,26,0,2,0,0,0,0,0.112602,4.0
3,1004,17,1,0,3,10.028829,14,0,3,1,0,0,0,2.054218,3.0
4,1005,17,1,0,2,4.672495,17,1,3,0,0,0,0,1.288061,4.0


In [20]:
df.drop(['StudentID', 'GPA'], axis=1, inplace=True)
print(df.head())

   Age  Gender  Ethnicity  ParentalEducation  StudyTimeWeekly  Absences  \
0   17       1          0                  2        19.833723         7   
1   18       0          0                  1        15.408756         0   
2   15       0          2                  3         4.210570        26   
3   17       1          0                  3        10.028829        14   
4   17       1          0                  2         4.672495        17   

   Tutoring  ParentalSupport  Extracurricular  Sports  Music  Volunteering  \
0         1                2                0       0      1             0   
1         0                1                0       0      0             0   
2         0                2                0       0      0             0   
3         0                3                1       0      0             0   
4         1                3                0       0      0             0   

   GradeClass  
0         2.0  
1         1.0  
2         4.0  
3         3.0  


In [21]:
#Split data is train and test

## DiVide the dataset into indepent and dependent features
X=df.drop('GradeClass',axis=1)
y=df['GradeClass']

## Split the data in training and tetsing sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## Scale these features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [22]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

def create_model(neurons=32, layers=1, dropout_rate=0.5, num_classes=5):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dropout(dropout_rate))  # Add Dropout after the first Dense layer

    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))
        model.add(Dropout(dropout_rate))  # Add Dropout after each Dense layer

    model.add(Dense(num_classes, activation='softmax'))  # num_classes should match the number of classes
    model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=['accuracy'])

    return model

In [23]:
model = KerasClassifier(build_fn=create_model, neurons=32, layers=1, dropout_rate=0.5, num_classes=5, verbose=1)



In [24]:
param_grid = {
    'neurons': [16, 32, 64, 128],
    'layers': [1, 2],
    'dropout_rate': [0.2, 0.5],
    'epochs': [50, 100],
    'batch_size': [16, 32]  # You might also want to tune the batch size
}


In [25]:
# Initialize GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose=1)

In [26]:
# Fit GridSearchCV
grid_result = grid.fit(X_train, y_train)

# Print the best parameters and best score
print(f"Best: {grid_result.best_score_} using {grid_result.best_params_}")

# Optionally, you can also use the best model to evaluate on test data
best_model = grid_result.best_estimator_
score = best_model.score(X_test, y_test)
print(f"Test accuracy of best model: {score}")

Fitting 3 folds for each of 64 candidates, totalling 192 fits
Epoch 1/100


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 967us/step - accuracy: 0.2303 - loss: 1.6935  
Epoch 2/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 762us/step - accuracy: 0.4673 - loss: 1.3831
Epoch 3/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 909us/step - accuracy: 0.5204 - loss: 1.2433
Epoch 4/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 883us/step - accuracy: 0.5377 - loss: 1.1623
Epoch 5/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 864us/step - accuracy: 0.5845 - loss: 1.0804
Epoch 6/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 881us/step - accuracy: 0.5993 - loss: 1.0285
Epoch 7/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 866us/step - accuracy: 0.6266 - loss: 1.0058
Epoch 8/100
[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 888us/step - accuracy: 0.6195 - loss: 0.9632
Epoch 9/100
[1m120/120[0