In [345]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split

In [346]:
data = pd.read_csv("student-mat.csv" , sep=";" )
cols_to_drop = [
    "address","famsize","Pstatus","Medu","Fedu","sex",
    "traveltime","age", "school",
    "schoolsup","famsup","paid","activities","nursery","higher",
    "internet","romantic","famrel","freetime","goout","Dalc","Walc"
]

from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()

data.drop(
    columns=cols_to_drop,
    inplace=True
    )

# # data.info()
# data["reason"] = data["reason"].map({
#     "course":int(0),
#     "home":int(1),
#     "other":int(2)
# })


data["reason"] = label_encoder.fit_transform(data["reason"])
data["health"] = label_encoder.fit_transform(data["health"])


# data.info()
# data["guardian"] = data["guardian"].map({
#     "mother":int(0),
#     "father":int(1),
# })


data["guardian"] = label_encoder.fit_transform(data["guardian"])


# data.replace(np.nan , 0 , inplace=True)
label = data["G3"]

data.drop(columns="G3" , inplace=True)



data["Mjob"] = label_encoder.fit_transform(data["Mjob"])
data["Fjob"] = label_encoder.fit_transform(data["Fjob"])


q1 = data["absences"].quantile(0.25)
q3 = data["absences"].quantile(0.75)

IQR = q3-q1

upper = q3 + 1.5*IQR
lower = q1 - 1.5*IQR


data["absences"]  = data["absences"].clip(lower=lower , upper=upper)


data



Unnamed: 0,Mjob,Fjob,reason,guardian,studytime,failures,health,absences,G1,G2
0,0,4,0,1,2,0,2,6,5,6
1,0,2,0,0,2,0,2,4,5,5
2,0,2,2,1,2,3,2,10,7,8
3,1,3,1,1,3,0,4,2,15,14
4,2,2,1,0,2,0,4,4,6,10
...,...,...,...,...,...,...,...,...,...,...
390,3,3,0,2,2,2,3,11,9,9
391,3,3,0,1,1,0,1,3,14,16
392,2,2,0,2,1,3,2,3,10,8
393,3,2,0,1,1,0,4,0,11,12


In [347]:
xtrain,xtest,ytrain,ytest = train_test_split(data , label , test_size=0.2 , random_state=42)

from sklearn.preprocessing import StandardScaler

RS = StandardScaler()
xtrain["absences"] = RS.fit_transform(xtrain[["absences"]])
xtest["absences"] = RS.transform(xtest[["absences"]])



In [348]:
from numpy import ndarray

class NeuralNetwork:
    def __init__(self , X ,in_features=10 ,  hidden_layer_one=32 , hidden_layer_two=16 , out_features=1 , lr=1e-4):
        # Xaiver 
        # Use He initialization for ReLU
        self.w1 = np.random.randn(in_features, hidden_layer_one) * np.sqrt(2/in_features)
        self.w2 = np.random.randn(hidden_layer_one, hidden_layer_two) * np.sqrt(2/hidden_layer_one)
        self.w3 = np.random.randn(hidden_layer_two, out_features) * np.sqrt(2/hidden_layer_two)
        
        self.b1 = np.zeros((1 , hidden_layer_one))
        self.b2 = np.zeros((1 , hidden_layer_two))
        self.b3 = np.zeros((1 , out_features))
        self.lr = lr
    
    def relu(self, x:ndarray):
        return np.maximum(0,x)
    
    def relu_derivative(self, x:ndarray):
        return (x > 0).astype(float)
    
    def forward(self , x:ndarray):
        self.z1 = x@self.w1 + self.b1
        self.A1 = self.relu(self.z1)
        self.z2 = self.A1@self.w2 + self.b2
        self.A2 = self.relu(self.z2)
        self.z3 = self.A2@self.w3 + self.b3
        self.y_hat = self.z3
        return self.y_hat
        
    def loss(self , y_hat:ndarray , y:ndarray): 
        MSE = np.mean((y_hat - y) ** 2)
        ss_res = np.sum((y - y_hat) ** 2)
        ss_tot = np.sum((y - np.mean(y)) ** 2)
        RSQE = 1 - (ss_res / ss_tot)

        return MSE , RSQE
        
    def backward(self, X , y_hat:ndarray , y:ndarray):
        m = y.shape[0]
        self.dz3 = (2/m)*(y_hat - y)
        dw3 = self.A2.T @ self.dz3
        db3 = np.sum(self.dz3 , axis=0 )
        
        self.dA2 = self.dz3 @ self.w3.T
        self.dz2 = self.dA2 * self.relu_derivative(self.z2)
        dw2 = self.A1.T @ self.dz2
        db2 = np.sum(self.dz2 , axis=0 )
        
        
        self.dA1 = self.dz2 @ self.w2.T
        self.dz1 = self.dA1 * self.relu_derivative(self.z1)
        dw1 = X.T @ self.dz1
        db1 = np.sum(self.dz1 , axis=0 )
        
        return dw1 , db1 , dw2 , db2 , dw3,db3

    def optim(self, dw1, db1, dw2, db2, dw3, db3):
        self.w1 = self.w1 - self.lr*dw1
        self.w2 = self.w2 - self.lr*dw2
        self.w3 = self.w3 - self.lr*dw3
        
        self.b1 = self.b1 - self.lr*db1
        self.b2 = self.b2 - self.lr*db2
        self.b3 = self.b3 - self.lr*db3
        
    
    def train(self, xtrain:ndarray , ytrain:ndarray , epoch):
        
        
        for i in range(epoch):
            y_pred = self.forward(xtrain)
            
            dw1 , db1 , dw2 , db2 , dw3,db3 = self.backward( xtrain, y_pred , ytrain)
            
            self.optim( dw1, db1, dw2, db2, dw3, db3)
            
            if i % 1000 == 0 :
                print("Loss : " , self.loss(y_pred , ytrain))
                
                
    def predict(self, X):
        predictions = self.forward(X)
        return predictions     
    
def main():
    X_train = xtrain.values
    # X_test  = xtest.values


    y_train = ytrain.values.reshape(-1, 1)
    y_test  = ytest.values.reshape(-1, 1)

    in_features = X_train.shape[1]

    nn = NeuralNetwork(
        X_train,
        in_features=in_features,
        lr=1e-3
    )

    nn.train(X_train, y_train, epoch=20000)

    preds = nn.predict(xtest)
    print("Loss PREDS - YTEST : " ,nn.loss(preds , ytest) ) 
    print( "Model prediction : " , preds)
    print("Actual values : " , ytest)

        
        
if __name__ == "__main__":
    main()

Loss :  (237.48398302130337, -10.306329425803744)
Loss :  (3.211274833657454, 0.8471150319941058)
Loss :  (2.748151040107659, 0.8691638039078402)
Loss :  (2.2829617297791334, 0.891310912613967)
Loss :  (1.962848762906251, 0.9065511094933142)
Loss :  (1.5977440134617529, 0.9239333115249128)
Loss :  (1.4719111774897338, 0.9299240628925751)
Loss :  (1.4435957135799897, 0.9312721283862354)
Loss :  (1.3099918779041924, 0.9376328477892193)
Loss :  (1.194809835749069, 0.9431165275556389)
Loss :  (1.0311943108570953, 0.950906067717758)
Loss :  (0.9960244761129011, 0.952580461638603)
Loss :  (0.9831542773721715, 0.9531931964634404)
Loss :  (0.8590768035493963, 0.959100377131014)
Loss :  (0.9412181188578479, 0.9551897269956622)
Loss :  (0.715546550363913, 0.9659336814424822)
Loss :  (0.8642628904068204, 0.9588534737159083)
Loss :  (0.8046202373669903, 0.9616929893519449)
Loss :  (0.695215827016587, 0.9669016029532526)
Loss :  (0.7932469911791926, 0.9622344560496315)
Loss PREDS - YTEST :  (37.576

  return reduction(axis=axis, out=out, **passkwargs)
