In [5]:
import numpy as np

In [6]:

class Layer:
    def __init__(self, n_inputs, n_neurons):
        # self.weights = np.random.randn(n_inputs, n_neurons) * np.sqrt(1 / n_inputs)
        self.weights = np.around(np.random.randn(n_inputs, n_neurons),2)
        self.biases = np.around(np.random.randn(1, n_neurons),2)
        # print(self.weights,self.biases)

    def calculate_output(self, input):
        # print("Input",input,input.shape,len(input))
        self.input=np.array(input)
        # print("I",self.input,self.input.shape)
        # print("W",self.weights,self.weights.shape)
        weighted_sum = np.dot(self.input,self.weights) + self.biases
        self.output = self.activation_function(weighted_sum)
        return self.output


    def calculate_delta(self, error, output):
        return error * self.derivative_activation_function(output)

    def update_weights(self, input, delta, learning_rate):
        if delta.shape[0] != input.shape[0]:
            input=input.reshape(1,-1)
            # delta = delta.reshape(input.shape[0],-1)
            # raise ValueError("The size of delta must be equal to the number of rows of input")
            # print("I",input,input.shape)
            # print("D",delta,delta.shape)
        self.weights += learning_rate * np.dot(delta.T,input).T
        self.biases += learning_rate * np.sum(delta)
        

    def activation_function(self, x):
        # Example: Sigmoid function
        return 1 / (1 + np.exp(-x))
        # return tanh(x)
        # return x

    def derivative_activation_function(self, x):
        # Example: Derivative of sigmoid function
        return np.exp(-x) / (1 + np.exp(-x))**2
        # return tanh_derivative(x)
        # return x

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(x, 0)

def relu_derivative(x):
    return np.array(x >= 0).astype('int')




In [7]:
class NeuralNetwork:
    def __init__(self, layers):
        np.random.seed(1)
        self.layers = layers[1:]

    def predict(self, input):
        output = input
        for layer in self.layers:
            output = layer.calculate_output(output)
        return output


    # def train(self, input, expected_output, learning_rate):
    #     output = self.predict(input)
    #     error = expected_output - output
    #     # print("val",output)
    #     # print("E",expected_output," - ",output," = ",error)
        
    #     for i in range(len(self.layers)-1, -1, -1):
    #         layer = self.layers[i]
    #         input=layer.input
    #         output = layer.output
    #         if i == len(self.layers) - 1:
    #             layer.delta =layer.calculate_delta(error, output)
    #             layer.weights += learning_rate * np.dot(layer.delta.T,input).T
    #             layer.biases += learning_rate * np.sum(layer.delta)
    #         else:
    #             next_layer = self.layers[i+1]
    #             layer.delta = layer.calculate_delta(np.dot(next_layer.delta, next_layer.weights.T), layer.output)
    #             layer.update_weights(input, layer.delta, learning_rate)


    def train(self, input, expected_output, learning_rate):
        output = self.predict(input)
        error = expected_output - output
        print("val",output)
        print("E",expected_output," - ",output," = ",error) 

        for i in range(len(self.layers)-1, -1, -1):
            print(i)
            layer = self.layers[i]
            # prev_layer = self.layers[i-1] if i>0 else 0
            input=layer.input
            output = layer.output
            print("I",input,input.shape)
            print("O",output,output.shape)
            print("W",layer.weights,(layer.weights).shape)
            if i == len(self.layers) - 1:
                print("E",error,error.shape)
                # layer.delta =np.array(np.full(layer.weights.shape,layer.calculate_delta(error, output)))
                layer.delta = layer.calculate_delta(error, output)
                print("Delta",i,layer.delta,layer.delta.shape)
                layer.update_weights(input, layer.delta, learning_rate)
                # print(np.dot(layer.delta,input))
                # layer.weights += learning_rate * np.dot(layer.delta.T,input).T
                # layer.biases += learning_rate * np.sum(layer.delta)
                print("Updated Weights",i,layer.weights)
            else:
                next_layer = self.layers[i+1]
                # print("NLD",next_layer.delta)
                # print("NLW",next_layer.weights)
                # layer.delta = np.array(np.full(layer.weights.shape,layer.calculate_delta(np.dot(next_layer.delta.T, next_layer.weights), layer.output)))
                layer.delta = layer.calculate_delta(np.dot(next_layer.delta, next_layer.weights.T), layer.output)
                print("Delta",i,layer.delta,layer.delta.shape)
                layer.update_weights(input, layer.delta, learning_rate)
                print("Updated Weights",i,layer.weights)

            # print(layer.delta)


        

In [8]:
# Example: Training a neural network to learn the XOR function

# Input dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
# X = np.array([[-1, -1], [-1, 1], [1, -1], [1, 1]])
# Output dataset
y = np.array([[0], [1], [1], [0]])
# y = np.array([[-1], [1], [1], [-1]])

# Define the layers of the neural network
input_layer = Layer(2, 2)
hidden_layer_1 = Layer(2, 2)
# hidden_layer_2 = Layer(2, 3)
# output_layer = Layer(3, 1)
output_layer = Layer(2, 1)

# Create the neural network
# nn = NeuralNetwork([input_layer, hidden_layer_1, hidden_layer_2, output_layer])
nn = NeuralNetwork([input_layer, hidden_layer_1, output_layer])

# Train the neural network
# for i in range(10000):
for i in range(1):
    # for j in range(len(X)):
    for j in range(1):
        nn.train(X[j], y[j], 0.001)

print(nn.predict(np.array([0, 0]))) # Expected output: [0]
print(nn.predict(np.array([0, 1]))) # Expected output: [1]
print(nn.predict(np.array([1, 0]))) # Expected output: [1]
print(nn.predict(np.array([1, 1]))) # Expected output: [0]


val [[0.69579476]]
E [0]  -  [[0.69579476]]  =  [[-0.69579476]]
1
I [[0.81153267 0.11304583]] (1, 2)
O [[0.69579476]] (1, 1)
W [[-0.32]
 [-0.38]] (2, 1)
E [[-0.69579476]] (1, 1)
Delta 1 [[-0.15448442]] (1, 1)
Updated Weights 1 [[-0.32012537]
 [-0.38001746]]
0
I [0 0] (2,)
O [[0.81153267 0.11304583]] (1, 2)
W [[ 1.74 -0.76]
 [ 0.32 -0.25]] (2, 2)
Delta 0 [[0.01053222 0.0146299 ]] (1, 2)
Updated Weights 0 [[ 1.74 -0.76]
 [ 0.32 -0.25]]
[[0.69573964]]
[[0.69457557]]
[[0.69016273]]
[[0.6904159]]
