In [1]:
#9. Build KNN Classification model for a given dataset. Build Artificial Neural Network model with back propagation on a given dataset.

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
data = pd.read_csv("student_data.csv")

In [4]:
print("First 5 rows:")
print(data.head())

First 5 rows:
  school sex  age address famsize Pstatus  Medu  Fedu     Mjob      Fjob  ...  \
0     GP   F   18       U     GT3       A     4     4  at_home   teacher  ...   
1     GP   F   17       U     GT3       T     1     1  at_home     other  ...   
2     GP   F   15       U     LE3       T     1     1  at_home     other  ...   
3     GP   F   15       U     GT3       T     4     2   health  services  ...   
4     GP   F   16       U     GT3       T     3     3    other     other  ...   

  famrel freetime  goout  Dalc  Walc health absences  G1  G2  G3  
0      4        3      4     1     1      3        6   5   6   6  
1      5        3      3     1     1      3        4   5   5   6  
2      4        3      2     2     3      3       10   7   8  10  
3      3        2      2     1     1      5        2  15  14  15  
4      4        3      2     1     2      5        4   6  10  10  

[5 rows x 33 columns]


In [5]:
for column in data.columns:
    if data[column].dtype == 'object':
        le = LabelEncoder()
        data[column] = le.fit_transform(data[column])

In [6]:
# Create Target Variable (Pass/Fail)
# Pass if G3 >= 10 else Fail
data['Result'] = data['G3'].apply(lambda x: 1 if x >= 10 else 0)

In [7]:
X = data.drop(['G3', 'Result'], axis=1)
y = data['Result']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
print("\n===== KNN MODEL =====")

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n",
      classification_report(y_test, y_pred_knn, zero_division=0))


===== KNN MODEL =====
KNN Accuracy: 0.7341772151898734
Confusion Matrix:
 [[12 15]
 [ 6 46]]
Classification Report:
               precision    recall  f1-score   support

           0       0.67      0.44      0.53        27
           1       0.75      0.88      0.81        52

    accuracy                           0.73        79
   macro avg       0.71      0.66      0.67        79
weighted avg       0.72      0.73      0.72        79



In [11]:
print("\n===== ANN MODEL =====")

ann = MLPClassifier(
    hidden_layer_sizes=(50, 30),
    activation='relu',
    solver='adam',
    max_iter=2000,
    early_stopping=True,
    random_state=42
)

ann.fit(X_train, y_train)

y_pred_ann = ann.predict(X_test)

print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_ann))
print("Classification Report:\n",
      classification_report(y_test, y_pred_ann, zero_division=0))


===== ANN MODEL =====
ANN Accuracy: 0.759493670886076
Confusion Matrix:
 [[12 15]
 [ 4 48]]
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.44      0.56        27
           1       0.76      0.92      0.83        52

    accuracy                           0.76        79
   macro avg       0.76      0.68      0.70        79
weighted avg       0.76      0.76      0.74        79



In [12]:
print("\n===== MODEL COMPARISON =====")
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("ANN Accuracy:", accuracy_score(y_test, y_pred_ann))


===== MODEL COMPARISON =====
KNN Accuracy: 0.7341772151898734
ANN Accuracy: 0.759493670886076
