In [1]:
import math


def matrix_multiplication(matrix1, matrix2):
    # Check if the matrices can be multiplied
    if len(matrix1[0]) != len(matrix2):
        raise ValueError("Matrix dimensions not compatible for multiplication")

    # Initialize an empty result matrix
    result = [[0 for _ in range(len(matrix2[0]))] for _ in range(len(matrix1))]

    # Perform matrix multiplication
    for i in range(len(matrix1)):
        for j in range(len(matrix2[0])):
            for k in range(len(matrix2)):
                result[i][j] += matrix1[i][k] * matrix2[k][j]

    return result


def sigmoid_matrix(matrix):
    sigmoid_values = [[1 / (1 + math.exp(-element))
                       for element in row] for row in matrix]
    return sigmoid_values


def sigmoid(x):
    return 1 / (1 + math.exp(-x))


def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


def multiply_scalar(matrix, scalar):
    result = [[element * scalar for element in row] for row in matrix]
    return result


def sigmoid_derivative_matrix(matrix):
    sigmoid_values = [
        [sigmoid_derivative(element) for element in row] for row in matrix]
    return sigmoid_values


def elementwise_addition(matrix1, matrix2):
    if len(matrix1) != len(matrix2) or len(matrix1[0]) != len(matrix2[0]):
        raise ValueError(
            "Matrices must have the same dimensions for element-wise addition.")

    result = [[matrix1[i][j] + matrix2[i][j]
               for j in range(len(matrix1[0]))] for i in range(len(matrix1))]
    return result


def forward_propagation(x, A, B, C):
    y = multiply_scalar(A, x)
    u = sigmoid_matrix(y)
    v = multiply_scalar(B, x)
    z = elementwise_addition(u, v)
    w = matrix_multiplication(C, z)
    L = matrix_multiplication(w, w)
    return y, u, v, z, w, L


x = 2
A = [[1, 2], [2, 2]]
B = [[2, 5], [1, 3]]
C = [[1, 6], [9, 0]]
y, u, v, z, w, L = forward_propagation(x, A, B, C)


In [6]:
def partial_derivative_L_B(w, C, x):
    partial_B = matrix_multiplication(multiply_scalar(w, 2), multiply_scalar(C, x))
    return partial_B

In [9]:
def partial_derivative_L_C(W, z):
    partial_C = matrix_multiplication(multiply_scalar(W, 2), z)
    return partial_C

In [17]:
def partial_derivative_L_A(W, C, x, A, B):
    sigma_prime = sigmoid_derivative_matrix(elementwise_addition(multiply_scalar(A, x), multiply_scalar(B, x)))
    partial_A = multiply_scalar(matrix_multiplication(matrix_multiplication(multiply_scalar(W, 2), C) , sigma_prime) , x)
    return partial_A

In [18]:
print(partial_derivative_L_A(w,C,x,A,B))

[[6.267666763663353, 0.026469568899416006], [11.809975350717593, 0.05096345301942277]]


In [20]:
print(out_dL_dA(w,x,A,B,C))

[[6.267666763663353, 0.026469568899416006], [11.809975350717593, 0.05096345301942277]]


In [4]:
# def partial_derivative_L_A(W, C, x, A, B):
#     z = sigmoid_matrix(multiply_scalar(A, x)) + multiply_scalar(B, x)
#     sigma_prime = [[sigmoid_derivative(element) for element in row] for row in z]
#     partial_A = multiply_scalar(multiply_scalar(W, 2), C)
#     partial_A = multiply_scalar(partial_A, sigma_prime)
#     partial_A = matrix_multiplication(partial_A, [[x]])
#     return partial_A

# # Example usage
# partial_A = partial_derivative_L_A(w, C, x, A, B)
# print("Partial derivative of L with respect to A:")
# print(partial_A)


In [5]:
# import math
# import random
# import matplotlib.pyplot as plt
# # Sigmoid function
# def sigmoid(x):
#     return 1 / (1 + math.exp(-x))

# # Forward propagation
# def forward_propagation(x, A, B, C):
#     y = [sum(A[i][j] * x[j] for j in range(len(x))) for i in range(len(A))]
#     u = [sigmoid(yi) for yi in y]
#     v = [sum(B[i][j] * x[j] for j in range(len(x))) for i in range(len(B))]
#     z = [ui + vi for ui, vi in zip(u, v)]
#     w = [sum(C[i][j] * z[j] for j in range(len(z))) for i in range(len(C))]
#     L = sum(wi ** 2 for wi in w)
#     return L, y, u, v, z, w

# # Backward propagation to compute gradients
# def backward_propagation(x, A, B, C, y, u, v, z, w):
#     # Initialize gradients
#     dL_dw = [2 * wi for wi in w]
#     dL_dz = [sum(C[i][j] * dL_dw[i] for i in range(len(C))) for j in range(len(z))]
#     dL_dv = dL_dz
#     dL_du = [dL_dv[i] * u[i] * (1 - u[i]) for i in range(len(u))]
#     dL_dy = [A[i][j] * dL_du[i] for i in range(len(A)) for j in range(len(A[i]))]

#     # Compute gradients for A
#     dL_dA = [[dL_dy[i * len(A) + j] * sigmoid(y[j]) * (1 - sigmoid(y[j])) * x[k] for k in range(len(x))] for i in range(len(A)) for j in range(len(A[i]))]

#     # Compute gradients for B
#     dL_dB = [[dL_dv[i] * sigmoid(y[i]) * (1 - sigmoid(y[i])) * x[j] for j in range(len(x))] for i in range(len(B))]

#     # Compute gradients for C
#     dL_dC = [[dL_dw[i] * z[j] for j in range(len(z))] for i in range(len(C))]

#     return dL_dA, dL_dB, dL_dC

# # Gradient descent to update A, B, and C
# def gradient_descent(A, B, C, learning_rate, dL_dA, dL_dB, dL_dC):
#     Anew = [[A[i][j] - learning_rate * dL_dA[i][j] for j in range(len(A[i]))] for i in range(len(A))]
#     Bnew = [[B[i][j] - learning_rate * dL_dB[i][j] for j in range(len(B[i]))] for i in range(len(B))]
#     Cnew = [[C[i][j] - learning_rate * dL_dC[i][j] for j in range(len(C[i]))] for i in range(len(C))]
#     return Anew, Bnew, Cnew

# # Generate random input vector x
# K = 5  # Example: K = 3
# x = [random.uniform(-1, 1) for _ in range(K)]

# # Randomly initialize A, B, and C
# A = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
# B = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]
# C = [[random.uniform(-1, 1) for _ in range(K)] for _ in range(K)]

# # Hyperparameters for gradient descent
# learning_rate = 0.1
# epochs = 500

# # Forward propagation
# L, y, u, v, z, w = forward_propagation(x, A, B, C)

# # Backward propagation
# dL_dA, dL_dB, dL_dC = backward_propagation(x, A, B, C, y, u, v, z, w)

# # Gradient descent
# for _ in range(epochs):
#     A, B, C = gradient_descent(A, B, C, learning_rate, dL_dA, dL_dB, dL_dC)

# # Print the updated A, B, and C
# print("Updated A:")
# for row in A:
#     print(row)

# print("Updated B:")
# for row in B:
#     print(row)

# print("Updated C:")
# for row in C:
#     print(row)
# # Gradient descent to update A, B, and C and calculate loss for each iteration
# def gradient_descent_with_loss(A, B, C, learning_rate, dL_dA, dL_dB, dL_dC):
#     losses = []  # List to store loss for each iteration
#     for _ in range(epochs):
#         # Update A, B, and C
#         A, B, C = gradient_descent(A, B, C, learning_rate, dL_dA, dL_dB, dL_dC)
        
#         # Forward propagation to calculate loss
#         L, _, _, _, _, _ = forward_propagation(x, A, B, C)
#         losses.append(L)
    
#     return A, B, C, losses

# # Run gradient descent and store the loss for each iteration
# A_final, B_final, C_final, losses = gradient_descent_with_loss(A, B, C, learning_rate, dL_dA, dL_dB, dL_dC)

# # Plot loss vs. iteration
# plt.plot(range(epochs), losses, label='Loss')
# plt.xlabel('Iteration')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()