In [14]:
import torch
import pandas as pd
import numpy as np
from math import log, exp

In [6]:
dataRaw = pd.read_csv('Admission_Predict.csv')
del dataRaw['Serial No.'] # Serves as a unique identifier, not needed for modeling

In [7]:
dataRaw.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


In [8]:
# We can either use min max normalization 
normalizedDataRaw = (dataRaw-dataRaw.min())/(dataRaw.max()-dataRaw.min())

In [9]:
normalizedDataRaw.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,0.94,0.928571,0.75,0.875,0.875,0.913462,1.0,0.920635
1,0.68,0.535714,0.75,0.75,0.875,0.663462,1.0,0.666667
2,0.52,0.428571,0.5,0.5,0.625,0.384615,1.0,0.603175
3,0.64,0.642857,0.5,0.625,0.375,0.599359,1.0,0.730159
4,0.48,0.392857,0.25,0.25,0.5,0.451923,0.0,0.492063


In [21]:
msk =  np.random.rand(len(normalizedDataRaw)) < 0.8

In [27]:
trainData = normalizedDataRaw[msk]
testData = normalizedDataRaw[~msk]

In [28]:
len(trainData)

326

In [29]:
len(testData)

74

In [83]:
class LogisticRegression:
    def __call__(self, y, yhat):
        self.y = y
        self.yHat = yhat
        tmp = (-1)*y*log(max(yhat, 0.000000000000001)) - (1-y)*log(max(1-yhat,0.000000000000001))
        return tmp
    
    def backwards(self, grad):
        '''
        [(-y)/(yHat) - (1-y)/(1-yHat)]
        '''
        left = ((-1)*(self.y))/(self.yHat)
        right = (1-self.y)/(1-self.yHat)
        
        grad *= (left-right)
        
        return grad
        
        

In [11]:
class Sigmoid:
    def __call__(self, x):
        self.x = x
        return 1/(1+exp((-1)*x)) 
    
    def backwards(self, grad):
        '''
        (e^-x)/((1+e^-x)^2)
        '''
        top = exp((-1)*self.x)
        bottom = (1+exp((-1)*self.x))**2
        
        grad *= top/bottom
        
        return grad
        

In [84]:
class Linear:
    # m: Number of signals layer is receiving
    # n: Number of nodes in layer
    def __init__(self, m, n, alpha, wantBias=True):
        self.w = torch.randn(m,n)
        self.b = torch.zeros(1,n)
        self.wantBias = wantBias
        
        if wantBias:  
            self.b = torch.randn(1,n)


        self.lr = alpha
        
    def __call__(self, inp):
        self.inp = inp
        
        return inp@self.w + self.b
   
    def backwards(self, grad):
        # Calculate new weight values
        # 1) Add in a dimension so we can take transpose
        # 2) Take transpose
        # 3) Use broadcasting in order to have a matrix of proper size
        
        '''
        dL/dX = (dL/dY)*wT
        dL/dW = xT*(dL/dY)
        '''
        
        if type(grad) != type([1,2]):
            modelInput = self.inp.t()

            # Just so I remember what this is:
            #     w :=    w   - \alpha*(gradient)
            #     w :=    w   - \alpha*(gradient of this layer)*(gradient of all of the layers)

            self.nW = self.w.add_( (-1)*(self.lr)*modelInput*grad )
            self.nB = self.b.add_( (-1)*(self.lr)*(grad) )
            
            return [grad,self.w.t()]
            
        elif type(grad) == type([1,2]):
            # We don't have a scalar
            
            self.nW = self.w.add_((-1)*(self.lr)*grad[0]*grad[1])
            self.nB = self.b.add_((-1)*(self.lr)*grad[0]*grad[1])
            
            # [grad,self.w.t()] 
            grad[0] = grad[0]*grad[1]
            grad[1] = self.w.t()
            
            return grad
        
        
    def update(self):
        self.w = self.nW
        self.b = self.nB

In [318]:
class Model:
    def __init__(self, alpha):
#         self.layers = [Linear(7,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,1,alpha)]
#         self.layers = [Linear(7,10,alpha), Linear(10,10,alpha), Linear(10,1,alpha)]
#         self.layers = [Linear(7,4,alpha), Linear(4,4,alpha), Linear(4,1,alpha)]
#         self.layers = [Linear(7,15,alpha), Linear(15,1,alpha, wantBias=False)]
        self.layers = [Linear(7,50,alpha), Linear(50,1,alpha, wantBias=False)]
#         self.layers = [Linear(7,5,alpha), Linear(5,1,alpha, wantBias=False)]
        self.activation = Sigmoid()
        self.loss = LogisticRegression()
        
    def forwards(self, inp):
        
        y = inp[-1]
        x = inp[:-1]
        
        for layer in self.layers:
            x = layer(x) 
        
        yHat = self.activation(x)
        
#         print("[yHat]: {}".format(yHat))
        
        
        loss = self.loss(y, yHat)
        
#         print("Loss is: {}".format(loss))
        
        return loss 
    
    def backwards(self):
        self.grad = 1
        
        
        expandedLayers = [self.loss, self.activation]
        
        for layer in expandedLayers:
            self.grad = layer.backwards(self.grad)
            
        # Iterate through expanded layers
        # Then iterate through the reverse of linear layers
        for layer in reversed(self.layers):
            self.grad = layer.backwards(self.grad)
            
        # Built update into backwards
        for layer in self.layers:
            layer.update()
        
        return self.grad

In [350]:
m = Model(0.0000000001)
epochs = 5

In [351]:
loss = 0

for epoch in range(epochs):

    for i in range(len(trainData)):
        train = trainData.iloc[i,]
        train = torch.tensor(train.values, dtype=torch.float)
        loss += m.forwards(train)
        m.backwards()
        
        
    print("[loss for epoch {}] {}".format(epoch+1, loss/len(trainData)))
    
# print("[ave loss] {}".format(loss/len(trainData)))

[loss for epoch 1] 1.5436700582504272
[loss for epoch 2] 3.08733868598938
[loss for epoch 3] 4.631008625030518
[loss for epoch 4] 6.174678325653076
[loss for epoch 5] 7.718349456787109


In [15]:
tmp.shape

torch.Size([8])

In [237]:
# out = m.forwards(z)

x: tensor([0.9400, 0.9286, 0.7500, 0.8750, 0.8750, 0.9135, 1.0000])
x: tensor([[2.0027e-05, 6.2652e+00, 9.8404e-01, 1.5848e+00, 1.5815e+00]])
Loss is: 0.27779802680015564


In [238]:
r = m.backwards()

A
tensor([[2.0027e-05, 6.2652e+00, 9.8404e-01, 1.5848e+00, 1.5815e+00]])
B
