### **SUMMARY STATISTICS OF DATA**

In [2]:
import pandas as pd
from ucimlrepo import fetch_ucirepo 

# Load the dataset from UCI [cite: 34]
predict_students_dropout_and_academic_success = fetch_ucirepo(id=697) 

# Extract features and targets into a single DataFrame for analysis [cite: 115]
X = predict_students_dropout_and_academic_success.data.features 
y = predict_students_dropout_and_academic_success.data.targets 
df = pd.concat([X, y], axis=1)

# Clean column names by removing whitespace and tabs [cite: 116]
df.columns = df.columns.str.strip().str.replace("\t", " ")

# Generate Descriptive Statistics [cite: 66]
# .T transposes the table for better readability as shown in your slides [cite: 66, 69]
print("Descriptive Statistics (Numerical Only)")
stats_summary = df.describe().T
display(stats_summary)

Descriptive Statistics (Numerical Only)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Marital Status,4424.0,1.178571,0.605747,1.0,1.0,1.0,1.0,6.0
Application mode,4424.0,18.669078,17.484682,1.0,1.0,17.0,39.0,57.0
Application order,4424.0,1.727848,1.313793,0.0,1.0,1.0,2.0,9.0
Course,4424.0,8856.642631,2063.566416,33.0,9085.0,9238.0,9556.0,9991.0
Daytime/evening attendance,4424.0,0.890823,0.311897,0.0,1.0,1.0,1.0,1.0
Previous qualification,4424.0,4.577758,10.216592,1.0,1.0,1.0,1.0,43.0
Previous qualification (grade),4424.0,132.613314,13.188332,95.0,125.0,133.1,140.0,190.0
Nacionality,4424.0,1.873192,6.914514,1.0,1.0,1.0,1.0,109.0
Mother's qualification,4424.0,19.561935,15.603186,1.0,2.0,19.0,37.0,44.0
Father's qualification,4424.0,22.275316,15.343108,1.0,3.0,19.0,37.0,44.0


### **CHECK FOR MISSING VALUE**

In [None]:
print("\nMissing Values per Column\n")
missing_values = df.isnull().sum()
print(missing_values)

### **UNIQUE VALUES CHECK**

In [None]:
print("\nUnique Values per Column")

for col in df.columns:
    if df[col].dtype == 'object' or df[col].nunique() < 15:
        print(f"{col}: {df[col].unique()}")

### **UNIQUE VALUES COUNT**

In [None]:
unique_counts = df.nunique()

unique_df = pd.DataFrame({
    'Column': unique_counts.index, 
    'Unique Values': unique_counts.values
})

display(unique_df)

### **IMPORTING LIBRARIES AND LOAD DATASET FROM UCI REPO**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from ucimlrepo import fetch_ucirepo

# Load the dataset from UCI repository
dataset = fetch_ucirepo(id=697) 
X_raw = dataset.data.features 
y_raw = dataset.data.targets 

df = pd.concat([X_raw, y_raw], axis=1)

# clean the column name by removing whitespace and tabs
df.columns = df.columns.str.strip().str.replace("\t", " ")

# find the target column to change its name to 'Target'
for col in df.columns:
    if df[col].isin(['Graduate', 'Dropout', 'Enrolled']).any():
        df.rename(columns={col: 'Target'}, inplace=True) 
        break

# delete rows with missing values and duplicates
df.dropna(inplace=True) 
df.drop_duplicates(inplace=True) 

# change ( GRADUATE, DROPOUT, ENROLLED ) to ( 0, 1, 2 )
df['Target'] = df['Target'].map({"Graduate": 0, "Dropout": 1, "Enrolled": 2})

print(f"Dataset shape after cleaning: {df.shape}") 

### **DROP LESS RELEVANT COLUMN**

In [None]:
# Define columns that should be removed if present
drop_cols = [
    'Curricular units 1st sem (credited)',
    'Curricular units 2nd sem (credited)',
    'Curricular units 1st sem (without evaluations)',
    'Curricular units 2nd sem (without evaluations)',
    'International'
]

# Drop only the columns that exist in the DataFrame to avoid errors
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)

print(f"Columns remaining after removal: {len(df.columns)}")

### **WINSORIZATION** 

In [None]:
# List of numeric columns where outliers should be capped
outlier_cols = [
    'Previous qualification (grade)', 
    'Admission grade',
    'Curricular units 1st sem (grade)', 
    'Curricular units 2nd sem (grade)',
    'Unemployment rate', 
    'Inflation rate', 
    'GDP'
]

# Loop through each column and clip values to 5th-95th percentile
for col in outlier_cols:
    if col in df.columns:  
        # Compute 5th and 95th percentiles
        q5, q95 = df[col].quantile([0.05, 0.95])
        
        # Clip values outside the percentile range
        df[col] = df[col].clip(lower=q5, upper=q95)
        
        print(f" {col}: clipped to [{q5:.2f}, {q95:.2f}]")

### **FEATURE SCALING**

In [None]:
# Select all columns except the target column
feature_cols = [col for col in df.columns if col != 'Target']

# Apply min-max scaling to bring all features to range [0, 1]
df[feature_cols] = (df[feature_cols] - df[feature_cols].min()) / (df[feature_cols].max() - df[feature_cols].min())

# Fill any missing values with 0 to avoid errors during training
df.fillna(0, inplace=True)

print(f" Features used: {len(feature_cols)} columns")


### **TRAIN-TEST SPLIT (80/20)**

In [None]:
# Prepare feature matrix X and target vector y
X = df[feature_cols].values
y = df['Target'].values

# Set random seed for reproducibility of the split
np.random.seed(42)

# Shuffle indices to randomize the dataset
indices = np.random.permutation(len(X))

# Determine split point for 80% training data
split = int(0.8 * len(X))

# Split features and labels into training and testing sets
X_train, X_test = X[indices[:split]], X[indices[split:]]
y_train, y_test = y[indices[:split]], y[indices[split:]]

print(f"Training set size: {len(X_train)}")
print(f"Testing set size: {len(X_test)}")


### **OVERSAMPLING WITH SMOTE**

In [None]:

# Create SMOTE object for oversampling minority classes
sm = SMOTE(random_state=42)

# Apply SMOTE to the training set to balance class distribution
X_train_os, y_train_os = sm.fit_resample(X_train, y_train)

print(f" After SMOTE: {np.bincount(y_train_os)}")

### **ONE-HOT ENCODING**

technique used to represent categorical data in a numerical format suitable for machine learning models

In [None]:
import numpy as np

# Function to convert integer labels into one-hot encoded vectors
def one_hot(y, num_classes=3):
    # np.eye creates an identity matrix
    return np.eye(num_classes)[y.astype(int)]

# Convert training and test labels to one-hot encoding
y_train_oh = one_hot(y_train_os)
y_test_oh = one_hot(y_test)

print("One-Hot Encoding Example:")
print(f"Graduate: {y_train_oh[0]} (if index 0 is Graduate)")


### **DEFINE BPNN MODEL**
- Initialization: Sets up weights and biases for 3
hidden layers and the output layer.
- Activation Functions: Uses ReLU for hidden layers
and Softmax for multi-class classification.
- Forward Propagation: Passes input through the
layers to generate predictions.
- Loss Function: Uses categorical cross-entropy to
measure prediction error.
- Backward Propagation: Calculates gradients and
updates weights using gradient descent.
- Prediction: Uses argmax to choose the class with
the highest probability.

In [None]:
class BPNN:
    def __init__(self, input_size, h1, h2, h3, output_size, lr=0.01):
        self.lr = lr
        self.W1 = np.random.randn(input_size, h1) * np.sqrt(2/input_size)
        self.b1 = np.zeros((1, h1))
        self.W2 = np.random.randn(h1, h2) * np.sqrt(2/h1)
        self.b2 = np.zeros((1, h2))
        self.W3 = np.random.randn(h2, h3) * np.sqrt(2/h2)
        self.b3 = np.zeros((1, h3))
        self.W4 = np.random.randn(h3, output_size) * np.sqrt(2/h3)
        self.b4 = np.zeros((1, output_size))

# Activation functions
    def relu(self, x): return np.maximum(0, x)
    def relu_deriv(self, x): return (x > 0).astype(float)
    def softmax(self, x):
        exp = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp / np.sum(exp, axis=1, keepdims=True)

# Forward propagation
    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = self.relu(self.Z2)
        self.Z3 = self.A2 @ self.W3 + self.b3
        self.A3 = self.relu(self.Z3)
        self.Z4 = self.A3 @ self.W4 + self.b4
        self.A4 = self.softmax(self.Z4)
        return self.A4

# Loss function
    def compute_loss(self, y_true, y_pred):
        return -np.mean(np.sum(y_true * np.log(y_pred + 1e-8), axis=1))

# Backward pass and weight updates
    def backward(self, X, y_true, y_pred):
        m = y_true.shape[0]

        dZ4 = y_pred - y_true
        dW4 = self.A3.T @ dZ4 / m
        db4 = np.sum(dZ4, axis=0, keepdims=True) / m

        dA3 = dZ4 @ self.W4.T
        dZ3 = dA3 * self.relu_deriv(self.Z3)
        dW3 = self.A2.T @ dZ3 / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m

        dA2 = dZ3 @ self.W3.T
        dZ2 = dA2 * self.relu_deriv(self.Z2)
        dW2 = self.A1.T @ dZ2 / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m

        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * self.relu_deriv(self.Z1)
        dW1 = X.T @ dZ1 / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m

        self.W4 -= self.lr * dW4; self.b4 -= self.lr * db4
        self.W3 -= self.lr * dW3; self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2; self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1; self.b1 -= self.lr * db1

#Predict function
    def predict(self, X):
        return np.argmax(self.forward(X), axis=1) 

### **TRAIN THE MODEL**

In [None]:
model = BPNN(X_train_os.shape[1], 256, 128, 64, 3, lr=0.01)
epochs = 1500

losses, train_accs, test_accs = [], [], []
best_acc, patience, wait = 0, 200, 0

for epoch in range(epochs):

    y_pred_train = model.forward(X_train_os)
    loss = model.compute_loss(y_train_oh, y_pred_train)
    
    model.backward(X_train_os, y_train_oh, y_pred_train)
    
    acc_train = np.mean(np.argmax(y_pred_train, axis=1) == y_train_os) * 100
    acc_test = np.mean(model.predict(X_test) == y_test) * 100
    
    losses.append(loss)
    train_accs.append(acc_train)
    test_accs.append(acc_test)
    
    if acc_test > best_acc:
        best_acc = acc_test
        wait = 0
    else:
        wait += 1
        
    if loss < 0.001:
        print(f"Early stopping at epoch {epoch+1} - Loss below 0.001")
        break
    if wait > patience:
        print(f"Early stopping at epoch {epoch+1} - Best Test Acc: {best_acc:.2f}%")
        break
        
    if epoch % 100 == 0 or epoch == epochs - 1:
        print(f"Epoch {epoch+1}/{epochs} Loss: {loss:.4f} Train Acc: {acc_train:.2f}% Test Acc: {acc_test:.2f}%")

### **FINAL EVALUATION**
the trained model is used to make predictions on the test set

In [None]:
# Predict labels for the test dataset
y_pred_test = model.predict(X_test)

# Calculate test accuracy as a percentage
test_accuracy = np.mean(y_pred_test == y_test) * 100
print(f"\nFinal Test Accuracy: {test_accuracy:.2f}%")   

### **PLOT LOSS & ACCURACY**

In [None]:
# Create a wide figure for two plots side by side
plt.figure(figsize=(14, 5))

# Plot 1: Loss
plt.subplot(1, 2, 1)  
plt.plot(losses, color='red')  
plt.title("Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss Value")

# Plot 2: Accuracy 
plt.subplot(1, 2, 2)  # 1 row, 2 columns, second plot
plt.plot(train_accs, label="Train", color='blue')  
plt.plot(test_accs, label="Test", color='green')   
plt.title("Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.legend() 

# Adjust layout and display the plots
plt.tight_layout()
plt.show()

### **CONFUISION MATRIX**
It shows how many students were correctly or
incorrectly classified into the three categories

In [None]:
# Generate predictions for the test dataset
y_pred_final = model.predict(X_test)

# Initialize a 3x3 confusion matrix with zeros
cm = np.zeros((3, 3), dtype=int)

# Fill the confusion matrix
# Rows = actual classes, Columns = predicted classes
for i in range(len(y_test)):
    cm[int(y_test[i]), int(y_pred_final[i])] += 1

# Print the numeric confusion matrix
print(cm)

# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
plt.imshow(cm, cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.colorbar()

# Set axis labels and ticks
tick_marks = np.arange(3)
class_labels = ['Graduate', 'Dropout', 'Enrolled']
plt.xticks(tick_marks, class_labels, rotation=45)
plt.yticks(tick_marks, class_labels)

# Add counts in each cell with color adjustment
for i in range(3):
    for j in range(3):
        plt.text(j, i, cm[i, j], ha="center", va="center",
                 color="white" if cm[i, j] > cm.max()/2 else "black")

# Label axes
plt.xlabel("Predicted")
plt.ylabel("Actual")

# Adjust layout and show the plot
plt.tight_layout()
plt.show()

### **CLASSIFICATION REPORT**

In [None]:
# Import evaluation function for classification models
from sklearn.metrics import classification_report

# Generate a detailed classification report as a dictionary
report = classification_report(y_test, y_pred_final, 
                               target_names=["Graduate", "Dropout", "Enrolled"], 
                               output_dict=True)

# Convert the report dictionary into a DataFrame and transpose
report_df = pd.DataFrame(report).transpose()
display(report_df)

### **SUMMARY MATRIX TABLE**

In [None]:
# Select only overall performance rows from the classification report
summary_df = report_df.loc[['accuracy', 'macro avg', 'weighted avg']].copy()

# Rename row labels to make them clearer and presentation-friendly
summary_df.rename(index={
    'accuracy': "Accuracy (Overall)",
    'macro avg': "Macro Average",
    'weighted avg': "Weighted Average"
}, inplace=True)

display(summary_df)

### **SAMPLE PREDICTION**

In [None]:
# Create a DataFrame 
results_df = pd.DataFrame({

    # True labels from the test dataset (numeric)
    'Actual': y_test,

    # Model predictions (numeric)
    'Predicted': y_pred_final
})

# Dictionary to convert numeric labels into readable class names
label_mapping = {0: 'Graduate', 1: 'Dropout', 2: 'Enrolled'}

# Convert numeric actual labels into text labels
results_df['Actual Label'] = results_df['Actual'].map(label_mapping)

# Convert numeric predicted labels into text labels
results_df['Predicted Label'] = results_df['Predicted'].map(label_mapping)

display(results_df.head(10))

### **BEST EPOCH BY TEST ACCURACY**

In [None]:
# Find the index of the epoch with the highest test accuracy
best_epoch = np.argmax(test_accs)

# Create a DataFrame
best_acc_row = pd.DataFrame({

    # Add 1 to convert index to epoch number
    'Epoch': [best_epoch + 1],

    # Training accuracy at the best epoch
    'Train Accuracy (%)': [train_accs[best_epoch]],

    # Highest test accuracy achieved
    'Test Accuracy (%)': [test_accs[best_epoch]],

    # Loss value at the best epoch
    'Loss': [losses[best_epoch]]
})

display(best_acc_row)

### **LAST 10 EPOCH PERFORMANCE**

In [None]:
# Create a DataFrame
epoch_df = pd.DataFrame({

    # Epoch numbers
    'Epoch': np.arange(1, len(train_accs) + 1),

    # Training accuracy recorded at each epoch
    'Train Accuracy (%)': train_accs,

    # Testing accuracy recorded at each epoch
    'Test Accuracy (%)': test_accs,

    # Loss value recorded at each epoch
    'Loss': losses
})

display(epoch_df.tail(10))

now , this is the experiment 1 .. we need to do atleast 5 more experiment with different dataset split , epoch , learning rate and hidden layer 

- Experiment 1: 80:20 Split, 1500 Epochs, 0.01   Learning Rate

- Experiment 2: 60:40 Split, 1500 Epochs, 0.01   Learning Rate

- Experiment 3: 80:20 Split, 1000 Epochs, 0.009  Learning Rate

- Experiment 4: 70:30 Split, 1000 Epochs, 0.009  Learning Rate

- Experiment 5: 80:20 Split, 1000 Epochs, 0.008  Learning Rate