In [3]:
'''This module contains a detailed implementation of the backpropagation algorithm.'''
import torch
import torch.nn as nn
import numpy as np
from dataclasses import *
from typing import Any, Callable, Dict, List, Tuple, Union, Optional


In [4]:
@dataclass
class ReLU:
    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        return torch.relu(x)
    def derivative(self, x: torch.Tensor) -> torch.Tensor:
        return torch.where(x > 0, torch.tensor(1.0), torch.tensor(0.0))
@dataclass
class Tanh:
    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        return x
    def derivative(self, x: torch.Tensor) -> torch.Tensor:
        return torch.where(x > 0, torch.tensor(1.0), torch.tensor(0.0))
@dataclass
class Null:
    def __call__(self, x: torch.Tensor) -> torch.Tensor:
        return x
    def derivative(self, x: torch.Tensor) -> torch.Tensor:
        return torch.ones_like(x)
def mse_grad(x, y):
    return torch.mean(-2*torch.mean(y-x))
def mse(x, y):
    return torch.mean(torch.mean(((x-y)**2)/2))

In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.input_layer = nn.Linear(8, 10)
        self.hidden_layer = nn.Linear(10, 1)

    def forward(self, x):
        x = torch.relu(self.input_layer(x))
        x = self.hidden_layer(x)
        return x


In [6]:
class CriticNetwork:
    '''This object represents the Value Function(Critic) used to estimate the expected value of a state-action pair.
    This value function is a neural network that will learn to more accuately predict the expected value given a state-action pair.'''
    def __init__(self, layer_sizes: list, 
                 layer_activations: list, 
                 layer_activations_derivative: list):
        self.layer_sizes= layer_sizes
        self.weights= [torch.randint(-10000, 10000, [layer_sizes[x], layer_sizes[x+1]])/10000  for x in range(len(layer_sizes)-1)]
        self.bias= [torch.randint(-10000, 10000, [1, layer_sizes[x+1]])/10000 for x in range(len(layer_sizes)-1)]
        self.layer_activations= layer_activations
    def forward(self, StateInput: torch.Tensor, 
                ActionInput: torch.Tensor, 
                full: bool= False)-> torch.Tensor:
        'Takes State Parameters and Action Parameters to outputs the expected return of the state-action pair predicted by the Main critic network'
        InputData = torch.cat([StateInput, ActionInput], dim=StateInput.ndim-1)
        LayerConnections=  [0]*len(self.weights)
        ActivatedNeuronLayer= [InputData]
        for i in range(len(self.weights)):
            LayerConnections[i]= torch.matmul(ActivatedNeuronLayer[i], self.weights[i]) + self.bias[i]
            ActivatedNeuronLayer.append(self.layer_activations[i](LayerConnections[i]))
        if full is False:
            return ActivatedNeuronLayer[-1]
        else:
            return LayerConnections, ActivatedNeuronLayer
    def compute_gradients(self, StateInput: torch.Tensor, 
                          ActionInput: torch.Tensor, 
                          OptimalReturn: torch.Tensor, 
                          loss_derivative: Callable):
        '''This function computes the gradient of the weights and biases of the network using the given derivative of a loss functio, input data and target data'''
        bias_grad = [torch.zeros_like(b) for b in self.bias]
        weight_grad = [torch.zeros_like(w) for w in self.weights]
        for x1, x2, y in zip(StateInput, ActionInput, OptimalReturn):
            dEdb = [0]*len(self.bias)
            dEdw = [0]*len(self.weights)
            LayerConnections, ActivatedNeuronLayer= self.forward(x1, x2, full= True)
            dEdA= torch.tensor([[loss_derivative(ActivatedNeuronLayer[-1], y)]])
            if ActivatedNeuronLayer[0].ndim < 2:
                ActivatedNeuronLayer[0]= ActivatedNeuronLayer[0].unsqueeze(dim=0)
            for l in range(len(self.weights)):
                z = LayerConnections[-l-1]     
                dAdz= self.layer_activations_derivative[-l-1](z)
                dEdz = torch.mul(dEdA, dAdz)
                dzdw= ActivatedNeuronLayer[-l-2].t()
                dEdb[-l-1] = dEdz
                dEdw[-l-1] = torch.mul(dzdw, dEdz)
                dzdA= self.weights[-l-1].t()
                dEdA= torch.matmul(dEdz, dzdA)
            bias_grad = [nb+dnb/len(StateInput) for nb, dnb in zip(bias_grad, dEdb)]
            weight_grad = [nw+dnw/len(StateInput) for nw, dnw in zip(weight_grad, dEdw)]
        return weight_grad, bias_grad
    def update_model(self, weight_grad, bias_grad, learning_rate):
        for i in range(len(self.weights)):
            self.weights[i] -=  learning_rate * weight_grad[i]
            self.bias[i] -=  learning_rate * bias_grad[i]


In [7]:
def compute_pytorch_gradients(input_data, target_data, model, loss_function):
    model.zero_grad()
    output = model(input_data)
    loss = loss_function(output, target_data)
    loss.backward()
    gradients = []
    for param in model.parameters():
        gradients.append(param.grad.clone())
    for i, grad in enumerate(gradients):
        print(f"Gradient for parameter {i + 1}:\n{grad}")

In [8]:
pyCritic = NeuralNetwork()
model = CriticNetwork([8, 10, 1], [torch.relu, f], [relu_derivative, f_grad])
model.weights[0]= pyCritic.input_layer.weight.t()
model.weights[1]= pyCritic.hidden_layer.weight.t()
model.bias[0]= pyCritic.input_layer.bias.t()
model.bias[1]= pyCritic.hidden_layer.bias.t()

In [9]:
inputs = torch.randn(1, 6)
inputs1 = torch.randn(1, 2)
input_data = torch.cat([inputs, inputs1], dim=1)
target_data = torch.rand(1, 1)
loss_function = nn.MSELoss()

In [10]:
gradients = compute_pytorch_gradients(input_data, target_data, pyCritic, loss_function)
a, b = model.compute_gradients(inputs, inputs1, target_data, mse_grad)
print(a)
print(b)

Gradient for parameter 1:
tensor([[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [-0.2823, -0.0956, -0.0077, -0.2742, -0.0097,  0.1919, -0.1223, -0.1113],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.1426,  0.0483,  0.0039,  0.1385,  0.0049, -0.0970,  0.0618,  0.0562],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.2031,  0.0688,  0.0056,  0.1973,  0.0070, -0.1381,  0.0880,  0.0801]])
Gradient for parameter 2:
tensor([ 0.0000,  0.2280,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000, -0.1152,
         0.0000, -0.1641])
Gradient for paramet