# Deep Learning Model

# --------------------------------------------------------
## 1) Import packages

In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras_tuner.tuners import RandomSearch


# pip install tensorflow
# pip install keras
# pip install keras-tuner

ImportError: Traceback (most recent call last):
  File "c:\Program Files\Python311\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

# --------------------------------------------------------
## 2) Load Dataset

In [None]:
df = pd.read_csv("../../Data/Student_performance_scaled.csv")
df.head()

# --------------------------------------------------------
## 3) Feature Engineering

#### i) Encoding Categorical Variables: 

In [None]:
def encode_categorical_features(df):
    # all features are already scaled, so return unchanged.
    return df

#### ii) Ratio & Aggregate Features: 

adds new features in the for of ratios

`StudyAbsenceRatio` combines `StudyTimeWeekly` and `Absences`. ↑study:↓absent = ↑ratio

In [None]:
# Create ratio-based feature(s)
def add_ratio_features(df):
    df = df.copy()
    # Study Time to Absence ratio
    df['StudyAbsenceRatio'] = df['StudyTimeWeekly'] / (df['Absences'] + 1)  # +1 to avoid division by zero
    return df

#### iii) Interaction Features:

adds new features in terms of interaction

`SportsMusic` multiplies `Sports` and `Music` to give an understanding into the total extra carricular activities a student takes part in

`TotalSupport` adds `TotalSupport` and `Tutoring` to show total support given to a student

In [None]:
#optional
def add_interaction_features(df):
    df = df.copy()
    # Combining sports and music participation
    df['SportsMusic'] = df['Sports'] * df['Music']
    # Combined parental involvement
    df['TotalSupport'] = df['ParentalSupport'] + df['Tutoring']
    return df

#### iV) Apply all feature engineering:

In [None]:
def apply_feature_engineering(df):
    df = encode_categorical_features(df)
    df = add_ratio_features(df)
    df = add_interaction_features(df)
    return df

# --------------------------------------------------------
## 4) Prepare Data

`x` = features (independent variables the model learns from).

`y` = target (GradeClass, the label we want the model to predict).

In [None]:
# Apply feature engineering
df = apply_feature_engineering(df)

# Define features and target
X = df.drop(['GradeClass'], axis=1)
y = df['GradeClass']

# Split the data into train and test sets (ensure y_test is defined)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --------------------------------------------------------
## 5) Build model and set up tuner

using the sequential keras model

Model: https://keras.io/api/models/sequential/

Explained: https://www.geeksforgeeks.org/keras-sequential-class/

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(Dense(units=hp.Int('units1', min_value=32, max_value=256, step=32), activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dropout(rate=hp.Float('dropout1', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(Dense(units=hp.Int('units2', min_value=32, max_value=256, step=32), activation='relu'))
    model.add(Dropout(rate=hp.Float('dropout2', min_value=0.0, max_value=0.5, step=0.1)))

    model.add(Dense(5, activation='softmax'))  # 5 classes for GradeClass

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


keras tuner:

https://keras.io/keras_tuner/api/tuners/random/

automatically searches for the best hyperparameters for the deep learning model instead of using a grid or manual methods.

It works by:
1) Randomly picks different combinations of settings.
2) Trains a model with each.
3) Picks the best based on a metric specified (`objective` = `'val_accuracy'`).

Settings it tries in the script:

`units1`, `units2`: Neurons in 1st and 2nd layers (32 to 256).

`dropout1`, `dropout2`: Dropout rates (0 to 0.5).

In [None]:
# Keras Tuner
tuner = RandomSearch(
    hypermodel=build_model,
    objective='val_accuracy',
    max_trials=10,
    seed=42,
    directory='../../Tuners/student_tuner',
    project_name='grade_classification'
)

tuner.search(X_train, y_train, epochs=20, validation_split=0.2)

# --------------------------------------------------------
## 6) Choose the best model and run predictions

Best model = the one that scored highest on validation accuracy during tuner search.

Fit the best model again on full training data (20 epochs).

Uuse it to predict the classes for X_test.

In [None]:
# Get best model
best_model = tuner.get_best_models(num_models=1)[0]

# Fit on full training data
best_model.fit(X_train, y_train, epochs=20, validation_split=0.2)

# Predictions
y_pred = np.argmax(best_model.predict(X_test), axis=1)

# --------------------------------------------------------
## 7) Run Evaluation Metrics

#### Accuracy

simple measure of correctness

`correct predictions` / `total predictions`

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))

#### Precision (weighted)

how many predictions were actually correct

weighted adjusts for class imbalance

In [None]:
print("Precision:", precision_score(y_test, y_pred, average='weighted'))

#### Recall (weighted)

how many labels were correctly predicted?

weighted adjusts for class imbalance

In [None]:
print("Recall:", recall_score(y_test, y_pred, average='weighted'))

#### F1 Score (weighted)

harmonic mean of precision and recall

weighted adjusts for class imbalance

In [None]:
print("F1 Score:", f1_score(y_test, y_pred, average='weighted'))

#### Confusion Matrix

shows real vs predicted class counts

In [None]:
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

#### Classification Report

breakdown of precision, recall, F1-Score per class

In [None]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Metric      : Meaning

Precision   : Out of all predictions for this class, how many were correct?

Recall      : Out of all actual instances of this class, how many did we correctly identify?

F1-Score    : Harmonic mean of Precision and Recall — balances false positives and false negatives.

Support     : Number of actual test samples in each class. Shows class distribution.