## Import Libraries

In [None]:
import random
import math

## Sigmoid Function

In [None]:
def sigmoid(gamma):
    u = []
    for i in range(len(gamma)):
        if gamma[i][0] < 0.0:
            j = 1 - 1/(1 + math.exp(gamma[i][0]))
        else:
            j = 1/(1 + math.exp(-gamma[i][0]))
        u.append([j])
    return u

## Sigmoid Derivative

In [None]:
def sigmoidDerivative(gamma):
    sigmoidVar = sigmoid(gamma)
    sigmoidDer = []
    for i in range(len(sigmoidVar)):
        sigmoidDer.append(sigmoidVar[i][0] *(1-sigmoidVar[i][0]))

    return sigmoidDer

## Matrix - Vector Multiplication

In [None]:
def matrixVectorMultiplication(matrix, vector):
    K = len(matrix)
    result = []
    for i in range(K):
        sum = 0
        for j in range(K):
            sum += matrix[i][j]*vector[j][0]
        result.append([sum])
    return result

## Vector - Matrix Multiplication

In [None]:
def vectorMatrixMultiplication(vector, matrix):
    K = len(vector)
    vectorTranspose = [val[0] for val in vector]
    result = []
    for i in range(K):
        sum = 0
        for j in range(K):
            sum += vectorTranspose[j]*matrix[j][i]
        result.append(sum)

    resultTranspose = []
    for val in result:
        resultTranspose.append([val])
    return resultTranspose

## Multiplication of 2 Vectors

In [None]:
def vectorMultiplication(vectorA, vectorB):
    matrix = []
    for i in range(len(vectorA)):
        row = []
        for j in range(len(vectorB)):
            row.append(vectorA[i][0]*vectorB[j][0])
        matrix.append(row)
    return matrix

## Addition of 2 Vectors

In [None]:
def vectorAddition(vectorA, vectorB):
    K = len(vectorA)
    sum = []
    for i in range(K):
        sum.append([vectorA[i][0] + vectorB[i][0]])
    return sum

In [None]:
def backwardPass(K, x, A, B, C):

    y, u, v, z, w, _ = forwardPass(K, x, A, B, C)

    dL_dw = [[2 * val[0]] for val in w]
    dw_dC = z

    dL_dz = vectorMatrixMultiplication(dL_dw, C)

    dL_dv = dL_dz
    dL_du = dL_dz

    du_dy = sigmoidDerivative(y)

    dL_dy = [[dL_du[i][0] * du_dy[i]] for i in range(len(u))]

    dL_dA = vectorMultiplication(dL_dy, x)

    dL_dB =  vectorMultiplication(dL_dv, x)

    dL_dC = vectorMultiplication(dL_dw, dw_dC)

    return dL_dA, dL_dB, dL_dC

In [None]:
def forwardPass(K, x, A , B, C):

    y = matrixVectorMultiplication(A, x)

    u = sigmoid(y)

    v = matrixVectorMultiplication(B, x)

    z = vectorAddition(u, v)

    w = matrixVectorMultiplication(C, z)

    L = sum(i**2 for [i] in w)

#     print("manualllll")
#     print("x = ", x)
#     print("y = ", y)
#     print("u = ", u)
#     print("v = ", v)
#     print("z = ", z)
#     print("w = ", w)
    print("L = ", L)


    return y, u, v, z, w, L

In [None]:
def gradientDescent(K, x, A, B, C, lr, epoch):
    for _ in range(epoch):
        dL_dA, dL_dB, dL_dC = backwardPass(K, x, A, B, C)

        newA = [[0] * len(A) for _ in range(len(A))]
        newB = [[0] * len(B) for _ in range(len(B))]
        newC = [[0] * len(C) for _ in range(len(C))]

        #assuming that length of matrix A, B and C is same
        for i in range(len(C)):
            for j in range(len(C)):
                newA[i][j] = A[i][j] - lr * dL_dA[i][j]
                newB[i][j] = B[i][j] - lr * dL_dB[i][j]
                newC[i][j] = C[i][j] - lr * dL_dC[i][j]

        A = newA
        B = newB
        C = newC

    print("\nGradient calculated manually: \ndL_dA = ", dL_dA, "\n\ndL_dB = ", dL_dB, "\n\ndL_dC = ", dL_dC)

    gradientWithPytorch(K, x, A, B, C)
    return A, B, C

In [None]:
import torch
def gradientWithPytorch(K, x, A, B, C):
    x_tensor = torch.tensor(x, requires_grad = True)
    A_tensor = torch.tensor(A, requires_grad = True)
    B_tensor = torch.tensor(B, requires_grad = True)
    C_tensor = torch.tensor(C, requires_grad = True)

    y = torch.matmul(A_tensor, x_tensor)
    u = torch.sigmoid(y)
    v = torch.matmul(B_tensor, x_tensor)
    z = u + v
    w = torch.matmul(C_tensor, z)
    L = torch.norm(w)**2

#     print("pytorchhhhh")
#     print("x = ", x)
#     print("y = ", y)
#     print("u = ", u)
#     print("v = ", v)
#     print("z = ", z)
#     print("w = ", w)
    print("\nAutograd L = ", L)

    L.backward()
    dL_dA = A_tensor.grad
    dL_dB = B_tensor.grad
    dL_dC = C_tensor.grad
    print("\nGradient calculated using autograd: \n", dL_dA, "\n\n", dL_dB, "\n\n", dL_dC)

In [None]:
N = 50
K = 3

A = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]

B = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]

C = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]

learning_rate = 0.01
epoch = 200

print("K = ", K, "\nA = ", A, "\nB = ", B, "\nC = ", C)

A_sum = [[0 for _ in range(3)] for _ in range(3)]
B_sum = [[0 for _ in range(3)] for _ in range(3)]
C_sum = [[0 for _ in range(3)] for _ in range(3)]

for i in range(N):
    print("\n\n------------------- i = ", i, " -------------------------")

    x = [[random.uniform(-1, 1)] for _ in range(K)]
    print("x = ", x)

    calcA, calcB, calcC = gradientDescent(K, x, A, B, C, learning_rate, epoch)
    A_sum = [[A_sum[i][j]+calcA[i][j] for j in range(K)] for i in range(K)]
    B_sum = [[B_sum[i][j]+calcB[i][j] for j in range(K)] for i in range(K)]
    C_sum = [[C_sum[i][j]+calcC[i][j] for j in range(K)] for i in range(K)]

    print("\nA, B, C Matrix calculated manually for x[", i, "]: \nA = ", calcA, "\n\nB = ", calcB, "\n\nC = ", calcC)

A_avg = [[A_sum[i][j]/N for j in range(K)] for i in range(K)]
B_avg = [[B_sum[i][j]/N for j in range(K)] for i in range(K)]
C_avg = [[C_sum[i][j]/N for j in range(K)] for i in range(K)]

_, _, _, _, _, L = forwardPass(K, x, A_avg, B_avg, C_avg)

print("\n\n\n\nFinal A, B, C Matrix calculated manually: \nA = ", A_avg, "\n\nB = ", B_avg, "\n\nC = ", C_avg)
print("the loss with final A, B and C matrix is", L)



K =  3 
A =  [[0.22070372579175834, -0.9881096957156834, -0.38849844972732783], [-0.6227793713462062, 0.8947986358012332, -0.3491844509823312], [-0.5882685803443013, 0.51050728875183, 0.3307601479948503]] 
B =  [[0.2730615643667915, -0.12561774261032088, 0.6670685269321845], [-0.8042946627836787, -0.44311582949567785, -0.8820295664700486], [0.6168683662985048, 0.91728787431295, -0.7168324736067053]] 
C =  [[0.5046804026639691, 0.15213285698400547, 0.3778588570050343], [0.08149144424237864, 0.41844179941895243, -0.9976645102167967], [0.34488786002313354, 0.6788007756109913, -0.6431202983634927]]


------------------- i =  0  -------------------------
x =  [[0.2642019800427373], [-0.438596634472455], [0.8553252264173248]]
L =  0.36594065108874974
L =  0.3224382581254848
L =  0.28482882736530335
L =  0.2522022616173457
L =  0.2238078864657213
L =  0.1990229541206096
L =  0.17732812106912052
L =  0.15828818458295435
L =  0.14153682597827932
L =  0.1267644359884913
L =  0.11370833213586987


L =  1.1757652720023485e-05
L =  9.93320354793419e-06
L =  8.392928695142066e-06
L =  7.092393113850434e-06
L =  5.994138346479824e-06
L =  5.066582271588394e-06
L =  4.283093242768596e-06
L =  3.621210813988605e-06
L =  3.061989711062733e-06
L =  2.589447451093993e-06
L =  2.1900991486645036e-06
L =  1.8525656782376358e-06
L =  1.5672435692302303e-06
L =  1.326026862468601e-06
L =  1.1220727117599737e-06
L =  9.496038202648691e-07
L =  8.037418984085487e-07
L =  6.803672519493817e-07
L =  5.760003836610781e-07
L =  4.877021435414519e-07
L =  4.129895102567783e-07
L =  3.4976454732775644e-07
L =  2.962544652037321e-07
L =  2.5096104656158867e-07
L =  2.1261796671078962e-07
L =  1.8015477206954012e-07
L =  1.52666474247078e-07
L =  1.2938788110792053e-07
L =  1.0967192417136794e-07
L =  9.297135793403267e-08
L =  7.882330462208198e-08
L =  6.683620037807733e-08
L =  5.6678768426083e-08
L =  4.8070703347448854e-08
L =  4.077480000258886e-08
L =  3.459030227398755e-08
L =  2.9347281918724

L =  6.506814642769939e-17
L =  5.193948654467127e-17
L =  4.146006879775855e-17
L =  3.3095226225007106e-17
L =  2.6418224356015762e-17
L =  2.1088448734427957e-17
L =  1.6834032848371535e-17
L =  1.3437988658044007e-17
L =  1.0727111172764536e-17
L =  8.563148389517672e-18
L =  6.8357551192660875e-18
L =  5.4568439187994195e-18
L =  4.356107170857857e-18
L =  3.477422743022708e-18
L =  2.7759918746217686e-18
L =  2.2160556482002394e-18
L =  1.7690686898160886e-18
L =  1.4122468265382132e-18
L =  1.1273996263244615e-18
L =  9.000076902085407e-19
L =  7.18482479889561e-19
L =  5.735716728696805e-19
L =  4.578890629501685e-19
L =  3.655390995526696e-19
L =  2.9181596209038184e-19
L =  2.329619145078083e-19
L =  1.859780793932853e-19
L =  1.484702756404784e-19
L =  1.1852721966784596e-19
L =  9.462346241264789e-20
L =  7.554025773744676e-20
L =  6.030595570785115e-20
L =  4.814398832087949e-20
L =  3.8434845425477016e-20
L =  3.068381743912791e-20
L =  2.4495992082299172e-20
L =  1.95559

OverflowError: (34, 'Result too large')

In [None]:
A =  [[-0.3768789350834251, 0.9159825767040246, -0.3375579990700469], [0.1205040252963541, -0.800641421338451, 0.44112323066208725], [0.9778883163473212, 0.7361137146293499, 0.008471689103785141]]

B =  [[0.6434637846824094, -0.4348469263410542, 0.13307270640347996], [-0.6692911785133722, 0.2184282791249969, -0.44586822712472174], [0.6250754524492683, 0.2754090142100635, 0.3409097561451826]]

C =  [[0.35515836423385055, 0.03527248868118342, -0.4361826516354353], [-0.5672308843546188, 0.2686411896374405, 0.45307529900407395], [-0.33051897171627176, 0.21321854310422225, 0.1673571071871538]]

In [None]:
(1**2 + 2**2 + 3**2)**(1/2)/3

1.247219128924647

In [None]:
(1+2+3)/3

2.0