### Want-To
Use what you wrote in preProcessing and create a working neural net.
Cost function is the thing mentioned in Andrew Ng's course (maybe re-watch fast.ai to get a refresser)

In [2]:
import torch
import pandas as pd
from math import log, exp

In [3]:
dataRaw = pd.read_csv('Admission_Predict.csv')
del dataRaw['Serial No.'] # Serves as a unique identifier, not needed for modeling

In [4]:
dataRaw.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,337,118,4,4.5,4.5,9.65,1,0.92
1,324,107,4,4.0,4.5,8.87,1,0.76
2,316,104,3,3.0,3.5,8.0,1,0.72
3,322,110,3,3.5,2.5,8.67,1,0.8
4,314,103,2,2.0,3.0,8.21,0,0.65


### 2) Normalize it
We use the min/max normalization equation:

$$x_{new} = \frac{x_{current}-X_{min}}{X_{max}-X_{min}},$$

where $x_{current}$ is an element in column $X$ and $x_{new}$ is the updated value of $x_{current}$.

In [5]:
# We can either use min max normalization 
normalizedDataRaw = (dataRaw-dataRaw.min())/(dataRaw.max()-dataRaw.min())
# or mean std normalization.
# normalizedDataRaw = (dataRaw-dataRaw.mean())/(dataRaw.std())

In [6]:
normalizedDataRaw.head()

Unnamed: 0,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,0.94,0.928571,0.75,0.875,0.875,0.913462,1.0,0.920635
1,0.68,0.535714,0.75,0.75,0.875,0.663462,1.0,0.666667
2,0.52,0.428571,0.5,0.5,0.625,0.384615,1.0,0.603175
3,0.64,0.642857,0.5,0.625,0.375,0.599359,1.0,0.730159
4,0.48,0.392857,0.25,0.25,0.5,0.451923,0.0,0.492063


In [7]:
normalizedDataRaw.shape

(400, 8)

1) Set-Up forwards propagation
* What are the dimensions of everything?

High Level:

Input Layer

Hidden Layer 1, 10 linear nodes

Hidden Layer 2, 10 linear nodes

Hidden Layer 3, 10 linear nodes

Hidden Layer 4, 10 linear nodes

Hidden Layer 5, 10 linear nodes

Sum, apply sigmoid? Whatever that cost function was.

Dimensions:

Input Layer:  [1x7]

Hidden Layer 1: w: [7x10] b: [1x10]

Hidden Layer 2: w: [10x10] b: [1x10]

Hidden Layer 3: w: [10x10] b: [1x10]

Hidden Layer 4: w: [10x10] b: [1x10]

Hidden Layer 5: w: [10x10] b: [1x10]

Let's get one forwards propagation. Don't worry about the cost function yet.

How should we do the weights? Just pull them from the standard normal distribution.

In [8]:
x = torch.randn(1)
print(x)
type(x)

tensor([0.0395])


torch.Tensor

In [9]:
x = torch.randn(512)
a = torch.randn(512,512)

a.shape

torch.Size([512, 512])

In [11]:
class LogisticRegression:
    def __call__(self, y, yhat):
        self.y = y
        self.yHat = yhat
        tmp = (-1)*y*log(yhat) - (1-y)*log(1-yhat)
        return tmp
    
    def backwards(self, grad):
        '''
        [(-y)/(yHat) - (1-y)/(1-yHat)]
        '''
        left = ((-1)*(self.y))/(self.yHat)
        right = (1-self.y)/(1-self.yHat)
        
        grad *= (left-right)
        
        return grad
        
        

In [12]:
class Sigmoid:
    def __call__(self, x):
        self.x = x
        return 1/(1+exp((-1)*x)) 
    
    def backwards(self, grad):
        '''
        (e^-x)/((1+e^-x)^2)
        '''
        top = exp((-1)*self.x)
        bottom = (1+exp((-1)*self.x))**2
        
        grad *= top/bottom
        
        return grad
        

In [14]:
# Test Sigmoid
sig = Sigmoid()

sig(-55)


1.299581425007503e-24

In [10]:
loss = LogisticRegression()
# loss(5,5.1)

In [157]:
class Linear:
    # m: Number of signals layer is receiving
    # n: Number of nodes in layer
    def __init__(self, m, n, alpha, wantBias=True):
        self.w = torch.randn(m,n)
        self.b = torch.zeros(1,n)
        self.wantBias = wantBias
        
        if wantBias:  
            self.b = torch.randn(1,n)


        self.lr = alpha
        
    def __call__(self, inp):
        self.inp = inp
        
        return inp@self.w + self.b
   
    def backwards(self, grad):
        # Calculate new weight values
        # 1) Add in a dimension so we can take transpose
        # 2) Take transpose
        # 3) Use broadcasting in order to have a matrix of proper size
        
        '''
        dL/dX = (dL/dY)*wT
        dL/dW = xT*(dL/dY)
        '''
        
        if type(grad) != type([1,2]):
            # We have a scalar
            print("A")
            
            # Don't think we need this, 
#             p = torch.unsqueeze(tmp[:-1],0) # tmp[:-1] represents our input
            # We are assuming that our input only has one dimension.
            print(self.inp)

#             modelInput = torch.unsqueeze(self.inp,0)
#             modelInput = torch.t(modelInput)
            modelInput = self.inp.t()

            # Just so I remember what this is:
            #     w :=    w   - \alpha*(gradient)
            #     w :=    w   - \alpha*(gradient of this layer)*(gradient of all of the layers)

            self.nW = self.w.add_( (-1)*(self.lr)*modelInput*grad )

            self.nB = self.b.add_( (-1)*(self.lr)*(grad) )
            
            return [grad,self.w.t()]
            
        elif type(grad) == type([1,2]):
            # We don't have a scalar
            print("B")
            
            self.nW = self.w.add_((-1)*(self.lr)*grad[0]*grad[1])
            self.nB = self.b.add_((-1)*(self.lr)*grad[0])
            
            return {"grad": grad, "weights": self.w, "biases": self.b}
        
        
        
    def update(self):
        self.w = self.nW
        self.b = self.nB

In [48]:
a = m.layers[1]

In [50]:
resid = 10

In [49]:
a.w.shape

torch.Size([5, 1])

In [57]:
t = torch.Tensor( [[5.6120, 4.8251, 1.8473, 1.7954, 2.9556]] )

In [59]:
t.t()

tensor([[5.6120],
        [4.8251],
        [1.8473],
        [1.7954],
        [2.9556]])

In [164]:
class Model:
    def __init__(self, alpha):
#         self.layers = [Linear(7,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,10,alpha), Linear(10,1,alpha)]
#         self.layers = [Linear(7,10,alpha), Linear(10,10,alpha), Linear(10,1,alpha)]
#         self.layers = [Linear(7,4,alpha), Linear(4,4,alpha), Linear(4,1,alpha)]
        self.layers = [Linear(7,5,alpha), Linear(5,1,alpha, wantBias=False)]
        self.activation = Sigmoid()
        self.loss = LogisticRegression()
        
    def forwards(self, inp):
        
        y = inp[-1]
        x = inp[:-1]
        
        for layer in self.layers:
            print("x: {}".format(x))
            x = layer(x) 
            
#         print("x: {}".format(x))
        
        
        yHat = self.activation(x)
        
        loss = self.loss(y, yHat)
        
        print("Loss is: {}".format(loss))
        
        return x
    
    def backwards(self):
        self.grad = 1
        
        
        expandedLayers = [self.loss, self.activation]
        
        for layer in expandedLayers:
            self.grad = layer.backwards(self.grad)
            
        
        
        # Iterate through expanded layers
        # Then iterate through the reverse of linear layers
        for layer in reversed(self.layers):
            self.grad = layer.backwards(self.grad)
            
        # Built update into backwards
        for layer in self.layers:
            layer.update()
        
        
        return self.grad
    
    
        
        
        
        
        

In [14]:
test = normalizedDataRaw.iloc[0,]
# print(test)
# print("\n")
# print(test[:-1])

In [50]:
type(test)

pandas.core.series.Series

In [79]:
test.shape

(8,)

In [51]:
test.values

array([0.94      , 0.92857143, 0.75      , 0.875     , 0.875     ,
       0.91346154, 1.        , 0.92063492])

In [15]:
tmp = torch.tensor(test.values, dtype=torch.float)
tmp.shape

torch.Size([8])

In [202]:
m = Model(0.01)

In [237]:
out = m.forwards(tmp)
# out = m.forwards(z)

x: tensor([0.9400, 0.9286, 0.7500, 0.8750, 0.8750, 0.9135, 1.0000])
x: tensor([[2.0027e-05, 6.2652e+00, 9.8404e-01, 1.5848e+00, 1.5815e+00]])
Loss is: 0.27779802680015564


In [238]:
r = m.backwards()

A
tensor([[2.0027e-05, 6.2652e+00, 9.8404e-01, 1.5848e+00, 1.5815e+00]])
B


In [86]:
r

{'grad': tensor([[ 0.0246, -0.0348,  0.0541,  0.2520, -0.2374]]),
 'weights': tensor([[-1.6635, -2.0175, -0.5783,  0.4368, -1.3694],
         [ 1.0926, -0.2654,  0.7496,  1.3693,  0.7699],
         [-2.0138,  0.0632, -0.2829,  0.2972, -2.6121],
         [ 0.5067,  0.3938, -2.2581,  0.6780,  0.4267],
         [-0.8548,  0.5003, -0.3730,  0.8662,  0.0191],
         [ 1.2546,  0.0237, -1.0010, -1.9121,  2.3600],
         [-1.2921, -0.5899, -0.1519, -0.2689, -0.1262]]),
 'biases': tensor([[ 0.9318, -0.0039, -0.6942, -0.1002,  0.3415]])}

'''
want to match up the columns
so want to broadcast vertically and subtract
'''

In [128]:
grad = r['grad']

In [129]:
w = r['weights']

In [130]:
b = r['biases']

In [131]:
w

tensor([[-1.3252, -1.0687,  1.0535, -1.8430, -0.4243],
        [-0.0246,  1.6342,  0.5786, -0.0766, -0.0326],
        [-1.2802,  0.4557, -0.1643,  0.5335, -1.6546],
        [-2.4095,  0.6296,  1.5242, -1.0265,  0.4545],
        [-0.4890,  1.4095,  0.2881,  0.2067,  0.5265],
        [-0.7257,  0.7895, -0.1789, -1.7820,  0.6183],
        [ 0.4492, -0.3642,  1.2904, -1.4778, -0.1929]])

In [132]:
grad

tensor([[ 0.0143,  0.0045, -0.0455,  0.0644,  0.0288]])

In [145]:
w.add_((-1)*grad)

tensor([[-1.5105, -1.1277,  1.6445, -2.6800, -0.7991],
        [-0.2099,  1.5752,  1.1696, -0.9137, -0.4074],
        [-1.4655,  0.3967,  0.4267, -0.3035, -2.0294],
        [-2.5948,  0.5705,  2.1152, -1.8635,  0.0796],
        [-0.6743,  1.3505,  0.8791, -0.6304,  0.1516],
        [-0.9111,  0.7305,  0.4121, -2.6191,  0.2435],
        [ 0.2639, -0.4232,  1.8815, -2.3148, -0.5677]])

In [103]:
b

tensor([[ 0.9318, -0.0039, -0.6942, -0.1002,  0.3415]])

In [104]:
b.add_((-1)*grad)

tensor([[ 0.9072,  0.0309, -0.7483, -0.3522,  0.5788]])