In [6]:
import numpy as np
import random
import torch
import torch.nn as nn
import sys

In [3]:
def xSum(X,W):
    h = torch.from_numpy(X)
    z = torch.matmul(W,h) #matrics multiplication
    return z

In [4]:
inputdim = 10 #features/columns
n = 1000 #datapoints/rows
X = np.random.rand(n,inputdim) 
y = np.random.randint(0,2,n) #between 0-1

X.shape,y.shape, np.unique(y)

((1000, 10), (1000,), array([0, 1]))

In [5]:
W = torch.tensor(np.random.uniform(0,1,inputdim),requires_grad=True)
W

tensor([0.0027, 0.0763, 0.7127, 0.4023, 0.9599, 0.2864, 0.2187, 0.2537, 0.8438,
        0.7549], dtype=torch.float64, requires_grad=True)

In [6]:
z = xSum(X[0,:],W)
z

tensor(2.1511, dtype=torch.float64, grad_fn=<DotBackward0>)

### Lets create a Neural Network with 1 input , 2 layers (1st with 2 neurons and 2nd with 3 neurons), output

In [7]:
def forwardStep(X,W_list):
    h = torch.from_numpy(X) #outputs of computational neurons
    for W in W_list:
        h = torch.matmul(W,h) #passing the outputs for each neuron into next layer W
    return h #Output h (input for next Layer)

In [8]:
inputdim = 10 #features/columns
n = 1000 #datapoints/rows
X = np.random.rand(n,inputdim) 
y = np.random.randint(0,2,n) #between 0-1

#Layers
W1 = torch.tensor(np.random.uniform(0,1,(2,inputdim)),requires_grad=True) #Layer 1 receiving input from X_train #2 in (2,inputdim) represents 2 neurons
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True) #Layer 2 receving input from Layer 1 #3 neurons receiving from 2 neurons so inputdim is 2
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True) #output receving input from Layer 2 #single binary classification output receving from 3 neurons
W_list = [W1
         ,W2
         ,W3]

#Feeding data through layers and generate output z
z = forwardStep(X[0,:],W_list)

z

tensor(3.0075, dtype=torch.float64, grad_fn=<DotBackward0>)

In [36]:
activation_fun = nn.Sigmoid()
# activation_fun = nn.ReLU()
x = torch.randn(1) #input
y = torch.randint(0,2,(1,),dtype=torch.float) #True Label
y_hat = activation_fun(x) #Predicted Label
loss_fun = nn.BCELoss() #Binary Cross Entropy Loss
                        #loss is a measure of performance
                        #lower loss means better model
loss_value = loss_fun(y_hat,y)
print(loss_value.item())

0.7849500775337219


In [37]:
y,y_hat

(tensor([1.]), tensor([0.4561]))

In [38]:
m = nn.Sigmoid()
loss_fun = nn.BCELoss()
lr = 0.0001
x = torch.randn(1) #SINGLE INPUT
y = torch.randint(0,2,(1,),dtype=torch.float) #NO NEED INPUT DIMENSIONS
w = torch.randn(1,requires_grad=True) #random weight

In [39]:
nIter = 100 #Gradient desent for 100 steps
for i in range(nIter):
    y_hat = m(w*x) #pass the input and weight through sigmoid m
    loss = loss_fun(y_hat,y) #Calculate loss
    loss.backward() #get Gradient for loss function with respect to W
    dw = w.grad.data #differential w
    with torch.no_grad():
        w -= lr*dw #Gradient Descent
    w.grad.data.zero_() #reset gradient data
    print(loss.item())

0.6855022311210632
0.6855022311210632
0.6855022311210632
0.6855021715164185
0.6855020523071289
0.6855020523071289
0.6855019330978394
0.6855018138885498
0.6855018138885498
0.6855018138885498
0.6855016946792603
0.6855015754699707
0.6855015754699707
0.6855015754699707
0.6855014562606812
0.6855013370513916
0.6855013370513916
0.685501217842102
0.6855010986328125
0.6855010986328125
0.6855010986328125
0.685500979423523
0.6855008602142334
0.6855008602142334
0.6855008602142334
0.6855007410049438
0.6855006217956543
0.6855006217956543
0.6855005025863647
0.6855003833770752
0.6855003833770752
0.6855003833770752
0.6855002641677856
0.6855001449584961
0.6855001449584961
0.6855001449584961
0.6855000257492065
0.685499906539917
0.685499906539917
0.6854997873306274
0.6854996681213379
0.6854996681213379
0.6854996681213379
0.6854995489120483
0.6854994297027588
0.6854994297027588
0.6854994297027588
0.6854993104934692
0.6854992508888245
0.6854992508888245
0.6854991316795349
0.6854990124702454
0.68549901247024

In [5]:
#  Pseudo Code For a Raw Neural Network
#  _______  
# |_______>  For nEpochs:
# |.nEpoch    # We have a data set with Xn features and True Labels Y      
# |
# |  _____
# | |_____> For nItern:
# | | .nItern # Pick a random row from the data set (X1,X2 ... Xn) and its Corresponding Label Y 
# | |
# | |
# | |   # Input-Data (X1,X2 ... Xn) to First-Layer
# | |        # --> First Layer (Add Weights AND Bias) = W0,X1W1,..XNWN  (in single neuron) (Only first Iteration use updated weights from second)
# | |        # --> Weighted Sum (Z) =  W0 + X1W1 + X2W2 ... + XnWn  (in single neuron)
# | |        # --> Pass through First Layer Activiation Function ( A_F() ) (in single neuron)
# | |        # --> First Layer Output A_F(Z) (from single neuron)
# | |        # --> Output -->> From all neurons in First Layer (A_F(Z)1,A_F(Z)2 ... A_F(Z)n)
# | |    
# | |    
# | |    # First-Layer Output Data (A_F(Z)1,A_F(Z)2 ... A_F(Z)n) to Second-Layer
# | |        # --> Second Layer (Add Weights AND Bias) = W0,W1(A_F(Z)1),W2(A_F(Z)2) ... Wn(A_F(Z)n) (Only first Iteration use updated weights from second) 
# | |        # --> Weighted Sum (Z) =  W0 + W1(A_F(Z)1) + W2(A_F(Z)2) ... Wn(A_F(Z)n)   
# | |        # --> Second Layer Activiation Function ( A_F() )
# | |        # --> Second Layer Output A_F(Z)
# | |        # --> Output -->> From all neurons in Second Layer (A_F(Z)1,A_F(Z)2 ... A_F(Z)n)
# | |    
# | |    # ... (n-Dense Layers, Flatten Layers, Dropout Layers, Pooling Layers, BatchNormalisation Layers) ...
# | |    
# | |    
# | |    # n-Layer Output Data (A_F(Z)1,A_F(Z)2 ... A_F(Z)n) to Output-Layer (Probability Label Layer)
# | |        # --> Output Layer (Add Weights AND Bias) = W0,W1(A_F(Z)1),W2(A_F(Z)2) ... Wn(A_F(Z)n) (Only first Iteration use updated weights from second) 
# | |        # --> Weighted Sum (Z) =  W0 + W1(A_F(Z)1) + W2(A_F(Z)2) ... Wn(A_F(Z)n)   
# | |        # --> Output Layer Activiation Function ( A_F() )
# | |        # --> Output Layer Output A_F(Z) ~ y_hat , where y_hat is a probability distribution of possible output for data
# | |    
# | |    
# | |    # Output-Layer (y_hat) to Loss Function to Compute Loss (Update Weights with new Weights from Gradient Descent)   _____
# | |        # --> Loss() = Binary CrossEntropy Loss or MSLoss or CrossEntopy Loss                                         _____>  The placement of these steps 
# | |        # --> L(W) = Loss(y_hat, Y), where L(W) is basically a function made of Weights as Variables                  _____>  can influence the type of    
# | |______  # --> Update Weight(W) for selected row to reduce Loss with Gradient Descent W -= (dL(W)/dW) * Learning rate  _____>  gradient descent: stochastic, 
# | |______< # --> End Iternation and start new Iteration with Updated Parameters W or End Epoch                           _____>  mini-batch or batch 
# | .Pick Rnd Row of Data
# |
# |_____________
# |_____________> End Epoch Loop if Loss() tends to 0 as y_hat is close to actual Label Y or Epoch hits Max
#  .Completed NN

In [7]:
#Lets create out own sigmoid activation function
def activate(x): #sigmoid activation
    return 1/(1+torch.exp(-x)) #1/(1+e^-x)

# Update Weights
def updateParams(W_List,dW_List,lr):
    with torch.no_grad(): # so we do not lose our initialised W's
        for i in range(len(W_List)):
            W_List[i] -= dW_List[i] * lr #update layer by layer
    return W_List

#Forward propagation between Layers
def forwardStep(X,W_list):
    h = torch.from_numpy(X) #outputs of computational neurons from previous layer
    for W in W_list:
        z = torch.matmul(W,h) #passing the outputs for each neuron into next layer W
        h = activate(z) #Pass through Activation Function
    return h #Output h


In [8]:
def trainNN_sgd(X,y,W_list,loss_fun,lr=0.0001,nepochs=10):
    for epoch in range(nepochs):
        avgLoss = []
        for i in range(len(y)): #go through each data row
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss = loss_fun(y_hat, torch.tensor(yTrue,dtype=torch.double)) #calculate loss
            loss.backward() #Immediately compute gradient for current row of data
            avgLoss.append(loss.item()) #record loss for each row
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data) #Create a List of W derivatives to update Old W 
            W_list = updateParams(W_list,dW_list,lr) #Immediately update W for current row of data 
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_() 
        print("Loss after epoch=%d: %f" %(epoch,np.mean(np.array(avgLoss))))
    return W_list

In [9]:
def trainNN_batch(X,y,W_list,loss_fun,lr=0.0001,nepochs=100):
    n = len(y) # Complute datapoints outside
    for epoch in range(nepochs):
        loss = 0 # no need avg loss
        for i in range(n):
            Xin = X[i,:]
            yTrue = y[i]
            y_hat = forwardStep(Xin,W_list)
            loss += loss_fun(y_hat,torch.tensor(yTrue,dtype=torch.double)) #Accumulate the loss
        loss = loss/n #Avg Loss
        loss.backward() #After loss is fully computed
        sys.stdout.flush()
        dW_list = []
        for j in range(len(W_list)):
            dW_list.append(W_list[j].grad.data)
        W_list = updateParams(W_list,dW_list,lr)
        for j in range(len(W_list)):
            W_list[j].grad.data.zero_()
        print("Loss after epoch=%d: %f" %(epoch,loss))
    return W_list

In [11]:
def trainNN_minibatch(X,y,W_list,loss_fun,lr=0.0001,nepochs=100,batchSize=16):
    n = len(y)
    numBatches = n//batchSize
    
    for epoch in range(nepochs):
        for batch in range(numBatches):
            X_batch = X[batch*batchSize:(batch+1)*batchSize,:]
            y_batch = y[batch*batchSize:(batch+1)*batchSize]
            loss = 0
            for i in range(batchSize): #Vectorising this makes the code very fast
                Xin = X_batch[i,:]
                yTrue = y_batch[i]
                y_hat = forwardStep(Xin,W_list)
                loss += loss_fun(y_hat,torch.tensor(yTrue,dtype=torch.double))
            loss = loss/batchSize
            loss.backward()
            sys.stdout.flush()
            dW_list = []
            for j in range(len(W_list)):
                dW_list.append(W_list[j].grad.data)
            W_list = updateParams(W_list,dW_list,lr)
            for j in range(len(W_list)):
                W_list[j].grad.data.zero_()
        print("Loss after epoch=%d: %f" %(epoch,loss/numBatches))
    return W_list

In [12]:
# Loss Function
loss_fun = nn.BCELoss()

# Training Process
inputdim = 10 #features/columns
n = 1000 #datapoints/rows
X = np.random.rand(n,inputdim) 
y = np.random.randint(0,2,n) #between 0-1

#Layers
W1 = torch.tensor(np.random.uniform(0,1,(2,inputdim)),requires_grad=True) #Layer 1 receiving input from X_train #2 in (2,inputdim) represents 2 neurons
W2 = torch.tensor(np.random.uniform(0,1,(3,2)),requires_grad=True) #Layer 2 receving input from Layer 1 #3 neurons receiving from 2 neurons so inputdim is 2
W3 = torch.tensor(np.random.uniform(0,1,3),requires_grad=True) #output receving input from Layer 2 #single binary classification output receving from 3 neurons
W_list = [W1
         ,W2
         ,W3]

#Feeding data through layers and generate output    
#   trainNN_sgd(X,y,W_list,loss_fun,lr=0.0001,nepochs=10) #Stochastic Gradient Descent
#   trainNN_batch(X,y,W_list,loss_fun,lr=0.0001,nepochs=10)
trainNN_minibatch(X,y,W_list,loss_fun,lr=0.0001,nepochs=10)

Loss after epoch=0: 0.013781
Loss after epoch=1: 0.013771
Loss after epoch=2: 0.013760
Loss after epoch=3: 0.013749
Loss after epoch=4: 0.013738
Loss after epoch=5: 0.013728
Loss after epoch=6: 0.013717
Loss after epoch=7: 0.013707
Loss after epoch=8: 0.013696
Loss after epoch=9: 0.013686


[tensor([[0.0702, 0.9476, 0.0570, 0.8654, 0.1617, 0.6987, 0.7447, 0.7541, 0.1051,
          0.1382],
         [0.0936, 0.2436, 0.5060, 0.6209, 0.0966, 0.2178, 0.5787, 0.0666, 0.6127,
          0.1513]], dtype=torch.float64, requires_grad=True),
 tensor([[0.7735, 0.3378],
         [0.8087, 0.7042],
         [0.7176, 0.7092]], dtype=torch.float64, requires_grad=True),
 tensor([0.2918, 0.4962, 0.3904], dtype=torch.float64, requires_grad=True)]

In [47]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data.dataloader import DataLoader

In [48]:
inputdim = 10 #features/columns
n = 1000 #datapoints/rows
X = np.random.rand(n,inputdim) 
y = np.random.randint(0,2,n) #between 0-1

tensor_x = torch.Tensor(X) #Convert data to tensors
tensor_y = torch.Tensor(y)

Xy = TensorDataset(tensor_x,tensor_y) #Convert to Tensor Dataset
Xy_loader = DataLoader(Xy, batch_size=16 ,shuffle=True, drop_last=True) #Minibatches

In [49]:
model = nn.Sequential(
    nn.Linear(inputdim,200,bias= True), #200 neurons layer 1
    nn.ReLU(),
    nn.BatchNorm1d(num_features=200), #input tensors are only 1 dimensional
    nn.Dropout(0.5),
    nn.Linear(200,100), #100 neurons layer 2
    nn.Tanh(),
    nn.BatchNorm1d(100),
    nn.Linear(100,1), #Output
    nn.Sigmoid() 
)

In [50]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [51]:
loss_fn = nn.BCELoss()

In [52]:
nepochs = 100
for epoch in range(nepochs):
    for X, y in Xy_loader:
        batch_size = X.shape[0]
        X = X.view(batch_size, -1)
        y = y.view(batch_size, -1) # Ensure that y has the same shape as y_hat
        y_hat = model(X)
        loss = loss_fn(y_hat, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch {}, Loss: {:.4f}".format(epoch+1, float(loss)))

Epoch 1, Loss: 0.7120
Epoch 2, Loss: 0.6475
Epoch 3, Loss: 0.7701
Epoch 4, Loss: 0.6521
Epoch 5, Loss: 0.6975
Epoch 6, Loss: 0.6352
Epoch 7, Loss: 0.7184
Epoch 8, Loss: 0.6658
Epoch 9, Loss: 0.6794
Epoch 10, Loss: 0.6842
Epoch 11, Loss: 0.7240
Epoch 12, Loss: 0.6431
Epoch 13, Loss: 0.6594
Epoch 14, Loss: 0.6839
Epoch 15, Loss: 0.5784
Epoch 16, Loss: 0.6848
Epoch 17, Loss: 0.6680
Epoch 18, Loss: 0.6412
Epoch 19, Loss: 0.6549
Epoch 20, Loss: 0.6244
Epoch 21, Loss: 0.7171
Epoch 22, Loss: 0.6141
Epoch 23, Loss: 0.6160
Epoch 24, Loss: 0.7380
Epoch 25, Loss: 0.6994
Epoch 26, Loss: 0.6879
Epoch 27, Loss: 0.6866
Epoch 28, Loss: 0.7166
Epoch 29, Loss: 0.6023
Epoch 30, Loss: 0.6484
Epoch 31, Loss: 0.6968
Epoch 32, Loss: 0.6907
Epoch 33, Loss: 0.7046
Epoch 34, Loss: 0.6380
Epoch 35, Loss: 0.7254
Epoch 36, Loss: 0.7008
Epoch 37, Loss: 0.5858
Epoch 38, Loss: 0.5722
Epoch 39, Loss: 0.6216
Epoch 40, Loss: 0.6311
Epoch 41, Loss: 0.6334
Epoch 42, Loss: 0.5960
Epoch 43, Loss: 0.6024
Epoch 44, Loss: 0.61

In [41]:
# lets predict a value
with torch.no_grad():
    xt = torch.tensor(np.random.rand(1,inputdim))
    y2 = model(xt.float())
    print(y2.detach().numpy()[0][0])

0.97317445
