# Load Dataset

In [1]:
from google.colab import files
uploaded = files.upload()

Saving AI_Dataset.csv to AI_Dataset (1).csv


# Clean the Dataset

In [2]:
import pandas as pd

# Load the dataset (uploaded is assumed to be a dict of uploaded files)
df = pd.read_csv(next(iter(uploaded)))

# Strip spaces from column names
df.columns = df.columns.str.strip()

# Select only required columns
df = df[['Course credit',
         'how many hours did you study each week for this subject?',
         'Attendance Rate (%) (Number)',
         'Assignment Score out of 30',
         'Final Exam Results']]

# Rename columns to shorter names
df.rename(columns={
    'Course credit': 'Credit',
    'how many hours did you study each week for this subject?': 'Weekly Study Hours',
    'Attendance Rate (%) (Number)': 'Attendance',
    'Assignment Score out of 30': 'Assignment Score',
    'Final Exam Results': 'Exam Grades'
}, inplace=True)

# Preview the cleaned and renamed columns
print("Cleaned & Renamed Column Names:\n", df.columns.tolist())

# Preview first 5 rows
print(df.head())


Cleaned & Renamed Column Names:
 ['Credit', 'Weekly Study Hours', 'Attendance', 'Assignment Score', 'Exam Grades']
   Credit  Weekly Study Hours Attendance  Assignment Score Exam Grades
0       2                   4    80 - 89                24          B+
1       3                   6    70 - 79                26          B+
2       2                  10    70 - 79                28           A
3       2                   6    90 - 99                12          C+
4       2                   4    90 - 99                22          B+


In [3]:
print("Missing values in each column:")
print(df.isnull().sum())


Missing values in each column:
Credit                0
Weekly Study Hours    0
Attendance            0
Assignment Score      0
Exam Grades           1
dtype: int64


# Data Preprocess

**Convert Attendance to Numeric**

In [4]:

def attendance_range_to_avg(value):
    if isinstance(value, str) and '-' in value:
        start, end = value.split('-')
        return (float(start.strip()) + float(end.strip())) / 2
    try:
        return float(value)
    except:
        return None  # or np.nan

# Apply the function to the Attendance column
df['Attendance'] = df['Attendance'].apply(attendance_range_to_avg)

# Check result
print(df['Attendance'].head())


0    84.5
1    74.5
2    74.5
3    94.5
4    94.5
Name: Attendance, dtype: float64


**Simplify Exam Grades**

In [5]:
def simplify_grades(grade):
    grade = str(grade).strip().upper()
    if grade in ['A+', 'A', 'A-']:
        return 'A'
    elif grade in ['B+', 'B', 'B-']:
        return 'B'
    elif grade in ['C+', 'C']:
        return 'C'
    elif grade in ['C-', 'D+', 'D']:
        return 'D'
    else:
        return 'E'

# Apply to your column
df['Exam Grades'] = df['Exam Grades'].apply(simplify_grades)

# Preview result
print(df['Exam Grades'].value_counts())


Exam Grades
B    330
A    321
C    194
D     92
E     65
Name: count, dtype: int64


In [6]:
df.head(10)


Unnamed: 0,Credit,Weekly Study Hours,Attendance,Assignment Score,Exam Grades
0,2,4,84.5,24,B
1,3,6,74.5,26,B
2,2,10,74.5,28,A
3,2,6,94.5,12,C
4,2,4,94.5,22,B
5,2,3,100.0,22,B
6,2,4,100.0,22,B
7,3,4,100.0,25,B
8,3,3,100.0,20,C
9,2,4,100.0,17,C


**Prepare Features and Target**

In [7]:
# Features and label
X = df[['Credit', 'Weekly Study Hours', 'Attendance', 'Assignment Score']]
y = df['Exam Grades']  # use grade labels like A, B, C


In [8]:
X = X.dropna()
y = y.loc[X.index]  # align y with X


**Encode Target Variable**

In [9]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # A=0, B=1, ...


In [10]:
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))


{'A': np.int64(0), 'B': np.int64(1), 'C': np.int64(2), 'D': np.int64(3), 'E': np.int64(4)}


**Split Data into Training and Test Sets**

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


**Normalize Features**

In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Model Develop

In [13]:
pip install tensorflow==2.18.0 scikeras==0.13.0 scikit-learn==1.4.2




In [14]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV


**Neural Network Model**

In [15]:
def create_model(learning_rate=0.001, hidden1=128, hidden2=64, hidden3=32,
                 drop1=0.0, drop2=0.0, drop3=0.0):
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(hidden1, activation='relu'),
        Dropout(drop1),
        Dense(hidden2, activation='relu'),
        Dropout(drop2),
        Dense(hidden3, activation='relu'),
        Dropout(drop3),
        Dense(5, activation='softmax')  # 5-class output
    ])
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [16]:
model = KerasClassifier(
    model=create_model,
    verbose=0  # silent training
)


**Define Hyperparameter**

In [17]:
param_grid = {
    "epochs": [30, 50],                         # Number of training epochs
    "batch_size": [8, 16],                  # Batch sizes
    "model__learning_rate": [0.001, 0.0005],
    "model__drop1": [0.0, 0.2, 0.3], # Corrected parameter name
    "model__drop2": [0.0, 0.2, 0.3], # Corrected parameter name
    "model__drop3": [0.0, 0.2, 0.3] # Corrected parameter name
}

**Run Grid Search**

In [18]:
grid = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=3,           # 3-fold CV
    verbose=2,      # shows progress
    n_jobs=1        # safer in notebooks; set to -1 on server/GPU
)

grid_result = grid.fit(X_train, y_train)

Fitting 3 folds for each of 216 candidates, totalling 648 fits
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.001; total time=  10.2s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.001; total time=  10.0s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.001; total time=   9.0s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.0005; total time=   8.5s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.0005; total time=  10.0s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.0, model__learning_rate=0.0005; total time=   8.4s
[CV] END batch_size=8, epochs=30, model__drop1=0.0, model__drop2=0.0, model__drop3=0.2, model__learning_rate=0.001; total ti

In [19]:
print("Best parameters:", grid_result.best_params_)
print("Best CV accuracy:", grid_result.best_score_)


Best parameters: {'batch_size': 8, 'epochs': 50, 'model__drop1': 0.0, 'model__drop2': 0.2, 'model__drop3': 0.2, 'model__learning_rate': 0.001}
Best CV accuracy: 0.8539325842696629
