#Task 1

In [24]:
import numpy as np
import torch
import torch.nn.functional as F

###Neural nets with pytorch

In [25]:
# Tensors for inputs x and weights w1, w2.
x_torch = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) #concat x1, x2 into one matrix
w1_torch = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]], requires_grad=True)
w2_torch = torch.tensor([[0.2, 0.3], [0.4, 0.5], [0.6, 0.7], [0.8, 0.9]], requires_grad=True)

# Forward pass
z1_torch = torch.matmul(x_torch, w1_torch)
a1_torch = F.relu(z1_torch)
z2_torch = torch.matmul(a1_torch, w2_torch)
output_torch = F.softmax(z2_torch, dim=1)

print("PyTorch Neural Network Output:")
print("output for x1:", output_torch[0])
print("output for x2:", output_torch[1])


PyTorch Neural Network Output:
output for x1: tensor([0.1324, 0.8676], grad_fn=<SelectBackward0>)
output for x2: tensor([0.0145, 0.9855], grad_fn=<SelectBackward0>)


-----------------

# Neural net with numpy

In [26]:
import numpy as np

# Activation functions
def relu(x):
    return np.maximum(0, x)

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0)

# Given weights and inputs
x = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
w1 = np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]])
w2 = np.array([[0.2, 0.3], [0.4, 0.5], [0.6, 0.7], [0.8, 0.9]])

# Forward pass
z1 = x.dot(w1) # Input to hidden
a1 = relu(z1) # Hidden activation
z2 = a1.dot(w2) # Hidden to output
output = softmax(z2.T).T # Output activation

print("Numpy Neural Network Output:")
print("output for x1:", output[0])
print("output for x2:", output[1])

Numpy Neural Network Output:
output for x1: [0.13238887 0.86761113]
output for x2: [0.01448572 0.98551428]


--------------------------------------

#Task 2
 - gradient

## Pytorch

In [27]:
# making forward process into function for pytorch
def forward(x):
    z1_torch = torch.matmul(x_torch, w1_torch) # input x into first weight layer w1
    a1_torch = F.relu(z1_torch) # activation with relu
    z2_torch = torch.matmul(a1_torch, w2_torch) # input a1 into second weight layer w2
    output_torch = F.softmax(z2_torch, dim=1) # activation with softmax
    return output_torch

# definition of cross entropy loss
def cross_entropy_loss(x, y):
    delta = 1e-7
    return -torch.sum(y*torch.log(x+delta))

In [28]:
# given data
x_torch = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
w1_torch = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]], requires_grad=True)
w2_torch = torch.tensor([[0.2, 0.3], [0.4, 0.5], [0.6, 0.7], [0.8, 0.9]], requires_grad=True)
y_torch = torch.tensor([[0,1],[1,0]]) #target label y.


# just one epoch for calculating gradient.
n_iter = 1

for i in range(n_iter):
    Y_pred = forward(x_torch)
    loss = cross_entropy_loss(Y_pred, y_torch)
    loss.backward() # backpropagation in order to get gradient

print(w1_torch.grad)
# print(w2_torch.grad)

tensor([[0.3810, 0.3810, 0.3810, 0.3810],
        [0.4663, 0.4663, 0.4663, 0.4663],
        [0.5516, 0.5516, 0.5516, 0.5516]])


---------------------------------------------

## Gradient with numpy

In [29]:
#Cross Entropy Loss for numpy (using numpy only)
def CrossEntropy_np(y_pred, y):
    return -1 * np.sum(y * np.log(y_pred))

In [30]:
# Define a class for convenience
class Neural_Net_np:
    def __init__(self):
        #weights
        self.w1 = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
        self.w2 = np.array([[0.2, 0.3],
               [0.4, 0.5],
               [0.6, 0.7],
               [0.8, 0.9]])

    def ReLU(self, z):
        return np.maximum(0, z)

    # Added relu derivative for backward pass
    def ReLU_derivative(self, x):
        return (x > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, x):
        self.z1 = x.dot(self.w1)
        self.a1 = self.ReLU(self.z1)
        self.z2 = self.a1.dot(self.w2)
        self.a2 = self.softmax(self.z2)
        return self.a2

    def cross_entropy_loss(self, y_pred, y):
        return -1 * np.sum(y * np.log(y_pred))

    def backward(self, x, y):
        # Gradient of the loss
        # Derive using chain rule
        dL_dz2 = self.a2 - y
        dL_da1 = dL_dz2.dot(self.w2.T)
        dL_dz1 = dL_da1 * self.ReLU_derivative(self.z1)
        dL_dw1 = x.T.dot(dL_dz1)
        return dL_dw1

In [31]:
# NUMPY
model = Neural_Net_np() #initialize model
x = np.array([[1.0,2.0,3.0],[4.0,5.0,6.0]])
output = model.forward(x) # pass x through neural network
y = np.array([[0,1], [1,0]])
loss = model.cross_entropy_loss(output, y) # get the loss using cross entropy loss
grad_w1 = model.backward(x, y)


print("Gradient of Loss with respect to w1:\n" , grad_w1)


Gradient of Loss with respect to w1:
 [[0.38096682 0.38096682 0.38096682 0.38096682]
 [0.46627936 0.46627936 0.46627936 0.46627936]
 [0.5515919  0.5515919  0.5515919  0.5515919 ]]


-----------

# Task 3

## ↓dropout with pytorch

In [32]:
import torch
import torch.nn.functional as F

# Set learning rate as 0.01
learning_rate = 0.01

#inputs and weights
x_torch = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
w1_torch = torch.tensor([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]], requires_grad=True)
w2_torch = torch.tensor([[0.2, 0.3], [0.4, 0.5], [0.6, 0.7], [0.8, 0.9]], requires_grad=True)
y_torch = torch.tensor([[0,1],[1,0]])

# update through 100 epochs
for epoch in range(100):
    # forward pass
    z1_torch = torch.matmul(x_torch, w1_torch)
    a1_torch = F.dropout(F.relu(z1_torch), p=0.4, training=True) #relu in inside the dropout
    z2_torch = torch.matmul(a1_torch, w2_torch)
    output = F.softmax(z2_torch)

    # Gain loss
    loss = cross_entropy_loss(output, y_torch)

    # Backpropagate
    loss.backward()

    # update weights
    with torch.no_grad():   # set torch.no_grad since we don't train at update step
        w1_torch -= learning_rate * w1_torch.grad
        w2_torch -= learning_rate * w2_torch.grad

        # gradient initialization
        w1_torch.grad.zero_()
        w2_torch.grad.zero_()

print(w1_torch)
print(w2_torch)

tensor([[0.0133, 0.0818, 0.1945, 0.2887],
        [0.3564, 0.4189, 0.5409, 0.5930],
        [0.6995, 0.7559, 0.8872, 0.8973]], requires_grad=True)
tensor([[0.2899, 0.2101],
        [0.4139, 0.4861],
        [0.5957, 0.7043],
        [0.8643, 0.8357]], requires_grad=True)


  output = F.softmax(z2_torch)


## ↓dropout with Numpy

In [33]:
# class update
class Neural_Net_np:
    def __init__(self):
        self.w1 = np.array([[0.1, 0.2, 0.3, 0.4],
               [0.5, 0.6, 0.7, 0.8],
               [0.9, 1.0, 1.1, 1.2]])
        self.w2 = np.array([[0.2, 0.3],
               [0.4, 0.5],
               [0.6, 0.7],
               [0.8, 0.9]])

    def ReLU(self, z):
        return np.maximum(0, z)

    def ReLU_derivative(self, x):
        return (x > 0).astype(float)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, x):
        self.z1 = x.dot(self.w1)
        self.a1 = self.ReLU(self.z1)
        self.z2 = self.a1.dot(self.w2)
        self.a2 = self.softmax(self.z2)
        return self.a2

    def cross_entropy_loss(self, y_pred, y):
        return -1 * np.sum(y * np.log(y_pred))


    def backward(self, x, y):
        # Gradient of the loss with respect to softmax input
        self.dL_dz2 = self.output - y
        self.dL_dw2 = self.a1.T.dot(self.dL_dz2)
        self.dL_da1 = self.dL_dz2.dot(self.w2.T)
        self.dL_dz1 = self.dL_da1 * self.ReLU_derivative(self.z1)
        self.dL_dw1 = x.T.dot(self.dL_dz1)
        return self.dL_dw1, self.dL_dw2
        # return both dL_dw1 and dL_dw2 for weight update

    # Added Dropout function
    def dropout(self, a1, rate=0.4):
        # Generate a mask to drop out neurons
        mask = np.random.binomial(1, 1-rate, size = a1.shape)
        return a1 * mask

    # Added dropout layer after the first activation (ReLU)
    def forward_with_dropout(self, x):
        self.z1 = x.dot(self.w1)
        self.a1 = self.ReLU(self.z1)
        self.a1_dropout = self.dropout(self.a1)
        self.z2 = self.a1_dropout.dot(self.w2)
        self.output = self.softmax(self.z2)
        return self.output

    # Added update weight function
    def update_weight(self, grad_w1, grad_w2, lr=0.01):
        # get the gradient with respect to w1 and w2, then multiply with learning rate(0.01)
        self.w1 -= lr * grad_w1
        self.w2 -= lr * grad_w2

In [34]:
model = Neural_Net_np()

#inputs and y values
x = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
y = np.array([[0, 1], [1, 0]])

for epoch in range(100):
    # forward pass
    output = model.forward_with_dropout(x)
    # gain loss
    loss = model.cross_entropy_loss(output, y)
    # backpropagation
    grad_w1, grad_w2 = model.backward(x, y)
    # update weight
    model.update_weight(grad_w1, grad_w2, lr = 0.01)

print(model.w1)
print(model.w2)



[[0.06099949 0.18883286 0.31666624 0.44449962]
 [0.47403567 0.5789945  0.68395333 0.78891215]
 [0.88707185 0.96915613 1.05124041 1.13332469]]
[[0.17448702 0.32551298]
 [0.42927113 0.47072887]
 [0.68405524 0.61594476]
 [0.93883934 0.76116066]]
