# Load Dataset

In [1]:
from google.colab import files
uploaded = files.upload()

Saving AI_Dataset.csv to AI_Dataset (2).csv


# Clean the Dataset

In [2]:
import pandas as pd

# Load the dataset (uploaded is assumed to be a dict of uploaded files)
df = pd.read_csv(next(iter(uploaded)))

# Strip spaces from column names
df.columns = df.columns.str.strip()

# Select only required columns
df = df[['Course credit',
         'how many hours did you study each week for this subject?',
         'Attendance Rate (%) (Number)',
         'Assignment Score out of 30',
         'Final Exam Results']]

# Rename columns to shorter names
df.rename(columns={
    'Course credit': 'Credit',
    'how many hours did you study each week for this subject?': 'Weekly Study Hours',
    'Attendance Rate (%) (Number)': 'Attendance',
    'Assignment Score out of 30': 'Assignment Score',
    'Final Exam Results': 'Exam Grades'
}, inplace=True)

# Preview the cleaned and renamed columns
print("Cleaned & Renamed Column Names:\n", df.columns.tolist())

# Preview first 5 rows
print(df.head())


Cleaned & Renamed Column Names:
 ['Credit', 'Weekly Study Hours', 'Attendance', 'Assignment Score', 'Exam Grades']
   Credit  Weekly Study Hours Attendance  Assignment Score Exam Grades
0       2                   4    80 - 89                24          B+
1       3                   6    70 - 79                26          B+
2       2                  10    70 - 79                28           A
3       2                   6    90 - 99                12          C+
4       2                   4    90 - 99                22          B+


In [3]:
print("Missing values in each column:")
print(df.isnull().sum())


Missing values in each column:
Credit                0
Weekly Study Hours    0
Attendance            0
Assignment Score      0
Exam Grades           1
dtype: int64


# Data Preprocess

**Convert Attendance to Numeric**

In [4]:

def attendance_range_to_avg(value):
    if isinstance(value, str) and '-' in value:
        start, end = value.split('-')
        return (float(start.strip()) + float(end.strip())) / 2
    try:
        return float(value)
    except:
        return None  # or np.nan

# Apply the function to the Attendance column
df['Attendance'] = df['Attendance'].apply(attendance_range_to_avg)

# Check result
print(df['Attendance'].head())


0    84.5
1    74.5
2    74.5
3    94.5
4    94.5
Name: Attendance, dtype: float64


**Simplify Exam Grades**

In [5]:
def simplify_grades(grade):
    grade = str(grade).strip().upper()
    if grade in ['A+', 'A', 'A-']:
        return 'A'
    elif grade in ['B+', 'B', 'B-']:
        return 'B'
    elif grade in ['C+', 'C']:
        return 'C'
    elif grade in ['C-', 'D+', 'D']:
        return 'D'
    else:
        return 'E'

# Apply to your column
df['Exam Grades'] = df['Exam Grades'].apply(simplify_grades)

# Preview result
print(df['Exam Grades'].value_counts())


Exam Grades
B    330
A    321
C    194
D     92
E     65
Name: count, dtype: int64


In [6]:
df.head(10)


Unnamed: 0,Credit,Weekly Study Hours,Attendance,Assignment Score,Exam Grades
0,2,4,84.5,24,B
1,3,6,74.5,26,B
2,2,10,74.5,28,A
3,2,6,94.5,12,C
4,2,4,94.5,22,B
5,2,3,100.0,22,B
6,2,4,100.0,22,B
7,3,4,100.0,25,B
8,3,3,100.0,20,C
9,2,4,100.0,17,C


**Prepare Features and Target**

In [7]:
# Features and label
X = df[['Credit', 'Weekly Study Hours', 'Attendance', 'Assignment Score']]
y = df['Exam Grades']  # use grade labels like A, B, C


In [8]:
X = X.dropna()
y = y.loc[X.index]  # align y with X


**Encode Target Variable**

In [9]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # A=0, B=1, ...


In [10]:
print(dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))


{'A': np.int64(0), 'B': np.int64(1), 'C': np.int64(2), 'D': np.int64(3), 'E': np.int64(4)}


**Split Data into Training and Test Sets**

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


**Normalize Features**

In [12]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Model Develop

In [13]:
!pip install keras-tuner --quiet

import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


**Neural Network Model**

In [14]:
import keras_tuner as kt

def model_builder(hp):
    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(128, activation='relu'),
        Dropout(hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)),
        Dense(64, activation='relu'),
        Dropout(hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)),
        Dense(32, activation='relu'),
        Dropout(hp.Float('dropout_3', min_value=0.1, max_value=0.4, step=0.1)),
        Dense(5, activation='softmax')
    ])

    # Bayesian search for learning rate
    learning_rate = hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='log')
    optimizer = Adam(learning_rate=learning_rate)

    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


**Baysian Tuner**

In [15]:
tuner = kt.BayesianOptimization(
    model_builder,
    objective='val_accuracy',
    max_trials=20,              # number of different hyperparam sets to try
    executions_per_trial=2,     # average results for stability
    directory='bayesian_tuning',
    project_name='student_Academic_Performance_Final'
)


**Run tunning process**

In [16]:
stop_early = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    epochs=50,
    validation_split=0.2,
    callbacks=[stop_early],
    batch_size=8
)


Trial 20 Complete [00h 00m 30s]
val_accuracy: 0.8540372550487518

Best val_accuracy So Far: 0.8819875717163086
Total elapsed time: 00h 12m 08s


In [17]:
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Best hyperparameters found:
- Dropout 1: {best_hps.get('dropout_1')}
- Dropout 2: {best_hps.get('dropout_2')}
- Dropout 3: {best_hps.get('dropout_3')}
- Learning Rate: {best_hps.get('learning_rate')}
""")



Best hyperparameters found:
- Dropout 1: 0.1
- Dropout 2: 0.1
- Dropout 3: 0.1
- Learning Rate: 0.003959343999708524



In [18]:
checkpoint = ModelCheckpoint(
    'best_model.keras',      # save best model
    monitor='val_accuracy',  # monitor validation accuracy
    save_best_only=True
)

best_model = tuner.hypermodel.build(best_hps)

history = best_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=8,
    validation_split=0.2,
    callbacks=[stop_early, checkpoint]
)

Epoch 1/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.4829 - loss: 1.1974 - val_accuracy: 0.7516 - val_loss: 0.5511
Epoch 2/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7069 - loss: 0.6432 - val_accuracy: 0.7267 - val_loss: 0.5125
Epoch 3/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.7557 - loss: 0.5337 - val_accuracy: 0.8261 - val_loss: 0.4536
Epoch 4/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 25ms/step - accuracy: 0.7362 - loss: 0.5312 - val_accuracy: 0.8199 - val_loss: 0.4578
Epoch 5/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.8092 - loss: 0.4991 - val_accuracy: 0.8137 - val_loss: 0.4461
Epoch 6/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.7983 - loss: 0.4478 - val_accuracy: 0.7578 - val_loss: 0.4588
Epoch 7/50
[1m80/80[0m [32m━━━━━━

**Evaluate Model Performance**

In [19]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix


y_pred = best_model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=1)

accuracy = accuracy_score(y_test, y_pred_classes)
precision = precision_score(y_test, y_pred_classes, average='weighted', zero_division=0)
recall = recall_score(y_test, y_pred_classes, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_pred_classes, average='weighted', zero_division=0)

print(f"Final Test Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Final Test Accuracy: 0.8259
Precision: 0.8346
Recall: 0.8259
F1 Score: 0.8236
