---
AND Gate implementation perceptron
---

In [1]:
import numpy as np

class AND_NN:
    def __init__(self, learning_rate=0.1, num_itr=100):
        self.weights = np.random.randn(3)  # Include bias weight
        self.learning_rate = learning_rate
        self.num_itr = num_itr

    def activation_function(self, z):
        return np.where(z >= 0, 1, 0)

    def predict(self, X):
        # Ensure input is 2D and add bias column
        X = np.atleast_2d(X)
        X_with_bias = np.c_[np.ones(X.shape[0]), X]
        Z = np.dot(X_with_bias, self.weights)
        return self.activation_function(Z)

    def train(self, X, y):
        # Ensure input is 2D and add bias column
        X_with_bias = np.c_[np.ones(X.shape[0]), X]
        for _ in range(self.num_itr):
            predictions = self.predict(X)
            errors = y - predictions
            self.weights += self.learning_rate * np.dot(errors, X_with_bias)

# Input and output for AND gate
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Inputs
y = np.array([0, 0, 0, 1])  # Outputs

and_gate = AND_NN()
and_gate.train(X, y)

# Test the perceptron
print("Testing Perceptron for AND Gate:")
predictions = and_gate.predict(X)
for inputs, prediction in zip(X, predictions):
    print(f"Input: {inputs}, Prediction: {prediction}")

Testing Perceptron for AND Gate:
Input: [0 0], Prediction: 0
Input: [0 1], Prediction: 0
Input: [1 0], Prediction: 0
Input: [1 1], Prediction: 1


---
This is a XOR Gate implementation without using hidden layers
---

In [2]:
class XOR_perceptron:
    def __init__(self, input_size, learning_rate=0.1, num_itr=100):
        self.weights = np.random.rand(input_size + 1) # Include weights for bias
        self.learning_rate = learning_rate
        self.num_itr = num_itr

    def activation_function(self, x):
        return np.where(x >= 0, 1, 0)

    def predict(self, X):
        return self.activation_function(np.dot(X, self.weights))

    def train(self, X, y):
        # Train the single-layer network using the given dataset.
        for _ in range(self.num_itr):
            predictions = self.predict(X)
            errors = y - predictions
            self.weights += self.learning_rate * np.dot(errors, X)

    def test(self, X):
        # Test the network on given inputs.
        return self.predict(X)

inputs = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
outputs = np.array([0, 1, 1, 0])  # XOR outputs
# Add bias as an extra input (column of ones)
inputs_with_bias = np.hstack((inputs, np.ones((inputs.shape[0], 1))))

# Train the single-layer network
xor_nn = XOR_perceptron(input_size=2)
xor_nn.train(inputs_with_bias, outputs)

# Test the network
predicted_outputs = xor_nn.test(inputs_with_bias)

# Print results
print("Expected outputs:", outputs)
print("Predicted outputs:", predicted_outputs)

# Check if XOR was learned
if np.array_equal(predicted_outputs, outputs):
    print("The single-layer XOR learned the XOR function!")
else:
    print("The single-layer XOR fails to learn the XOR function.")

Expected outputs: [0 1 1 0]
Predicted outputs: [0 0 0 0]
The single-layer XOR fails to learn the XOR function.


---
Observation - **`No matter how many times the XOR_perceptron is trained, without a hidden layer it is unable to produce the required output.`**
---

---
This is a XOR Gate implementation using a single hidden layer
---

In [4]:
import time

class XOR_NN:
    def __init__(self):
        self.inputLayerSize = 2
        self.outputLayerSize = 1
        self.hiddenLayerSize = 3
        # 3 because it worked quite well

        # Initialize weights (parameters)
        self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)

    def forward(self, X):
        self.z2 = np.dot(X, self.W1)
        self.a2 = self.sigmoid(self.z2)
        self.z3 = np.dot(self.a2, self.W2)
        yHat = self.sigmoid(self.z3)
        return yHat

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidPrime(self, z):
        # Derivative of the sigmoid activation function.
        return np.exp(-z) / ((1 + np.exp(-z)) ** 2)

    def costFunction(self, X, y):
        # Compute the cost function.
        self.yHat = self.forward(X)
        J = 0.5 * np.sum((y - self.yHat) ** 2)
        return J

    def costFunctionPrime(self, X, y):
        # Compute the gradients of the cost function w.r.t. W1 and W2.
        self.yHat = self.forward(X)
        delta3 = np.multiply(-(y - self.yHat), self.sigmoidPrime(self.z3))
        dJdW2 = np.dot(self.a2.T, delta3)  # Gradient for W2
        delta2 = np.dot(delta3, self.W2.T) * self.sigmoidPrime(self.z2)
        dJdW1 = np.dot(X.T, delta2)  # Gradient for W1

        return dJdW1, dJdW2

    def getParams(self):
        # Get W1 and W2 parameters rolled into a single vector.
        params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
        return params

    def setParams(self, params):
        # Set W1 and W2 using a single parameter vector.
        W1_start = 0
        W1_end = self.hiddenLayerSize * self.inputLayerSize
        self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize, self.hiddenLayerSize))
        W2_end = W1_end + self.hiddenLayerSize * self.outputLayerSize
        self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))

    def computeGradients(self, X, y):
        # Compute the gradients of the cost function.
        dJdW1, dJdW2 = self.costFunctionPrime(X, y)
        return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))

    def gradientDescent(self, X, y, num_itr, learningRate):
        # Perform gradient descent to minimize the cost function.
        for itr in range(num_itr):
            gradients = self.computeGradients(X, y)
            self.setParams(self.getParams() - learningRate * gradients)  # Update parameters using gradients
            # Check how the cost changes with iterations
            # if itr % 1000 == 0:
            #     cost = self.costFunction(X, y)
            #     print(f"Iteration {itr}, Cost: {cost}")

    def predict(self, X):
        # Make predictions based on the trained model.
        return np.round(self.forward(X))

# Training Data
X = np.array([[0, 0],[0, 1],[1, 0],[1, 1]])
Y = np.array([[0],[1],[1],[0]])
# Set parameters and training configuration
xor_nn = XOR_NN()
num_itr = 100000
learningRate = 0.1
# Train the Model
start_time = time.time()
xor_nn.gradientDescent(X, Y, num_itr, learningRate)
end_time = time.time()

# Output the results
predictions = xor_nn.predict(X)
print("\nFinal Predictions :",predictions.flatten())
print(f"Training Time: {end_time - start_time} seconds")


Final Predictions : [0. 1. 1. 0.]
Training Time: 3.4659087657928467 seconds



---
Implementation of Full Adder using XOR and AND gates
---




In [9]:
class FullAdderNN:
    def __init__(self):
        # Initialize XOR and AND neural networks
        self.xor_nn = XOR_NN()  # XOR gate model
        self.and_nn = AND_NN()  # AND gate model

        # Train XOR for Sum calculation
        X_xor = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Training for XOR gate
        Y_xor = np.array([[0], [1], [1], [0]])  # Expected output for XOR
        self.xor_nn.gradientDescent(X_xor, Y_xor, num_itr=100000, learningRate=0.1)

        # Train AND for intermediate carries
        X_and = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Training for AND gate
        Y_and = np.array([0, 0, 0, 1])  # Expected output for AND
        self.and_nn.train(X_and, Y_and)

    def predict(self, A, B, Cin):
        A = np.array(A)
        B = np.array(B)
        Cin = np.array(Cin)
        xor_ab = self.xor_nn.predict(np.column_stack((A, B)))

        # Calculate final Sum = XOR(XOR(A, B), Cin)
        sum_output = self.xor_nn.predict(np.column_stack((xor_ab, Cin)))

        # Calculate final_carry = XOR(AND(A,B),AND(XOR(A,B),Cin))
        and_ab = self.and_nn.predict(np.column_stack((A, B)))
        and_cin_ab = self.and_nn.predict(np.column_stack((xor_ab, Cin)))
        final_carry = self.xor_nn.predict(np.column_stack((and_ab, and_cin_ab)))

        return sum_output.astype(int), final_carry.astype(int)

full_adder_nn = FullAdderNN()

# Test Full Adder with arrays
A = np.array([0, 0, 0, 0, 1, 1, 1, 1])
B = np.array([0, 0, 1, 1, 0, 0, 1, 1])
Cin = np.array([0, 1, 0, 1, 0, 1, 0, 1])

# Get sum and carry
sum_output, final_carry = full_adder_nn.predict(A, B, Cin)

# Output the results
import pandas as pd
df = pd.DataFrame({'A': A,'B': B,'Cin': Cin,'Sum': sum_output.flatten(),'Carry': final_carry.flatten()})
df.index = [''] * len(df)  # Reset index to remove the first column
print("Testing Full Adder:")
print(df.to_string(index=False))

Testing Full Adder:
 A  B  Cin  Sum  Carry
 0  0    0    0      0
 0  0    1    1      0
 0  1    0    1      0
 0  1    1    0      1
 1  0    0    1      0
 1  0    1    0      1
 1  1    0    0      1
 1  1    1    1      1


---
Combining the adders into a ripple carry adder
---

In [22]:
class RippleCarryAdderNN:
    def __init__(self, full_adder_nn):
        self.full_adder_nn = full_adder_nn

    def predict(self, A, B, Cin=0):
        A = np.array(A)
        B = np.array(B)
        n = len(A)
        assert len(B) == n, "A and B must have the same length"

        sum_output = np.zeros(n)
        carry = Cin
        for i in range(n - 1, -1, -1):
            sum_bit, carry = self.full_adder_nn.predict([A[i]], [B[i]], [carry])
            sum_output[i] = sum_bit.flatten()[0]
            carry = carry.flatten()[0]
        return sum_output, carry

# Testing the RippleCarryAdderNN:
full_adder_nn = FullAdderNN()
ripple_carry_adder_nn = RippleCarryAdderNN(full_adder_nn)

# Ensure that the arrays have the same length (int this case 8)
A = [0, 0, 0, 0, 1, 1, 1, 1]
B = [1, 1, 1, 1, 0, 0, 1, 1]
Cin = 0

# Results
sum_output, final_carry = ripple_carry_adder_nn.predict(A, B, Cin)
sum_with_carry = np.insert(sum_output.flatten().astype(int), 0, int(final_carry))
df = pd.DataFrame({'A': ['Carry']+A,'B': ['->']+B,'Sum': sum_with_carry,})
df.index = [''] * len(df)

print("Ripple Carry Adder Results:")
print(df.to_string(index=False))
print(f"Final Carry-Out: {int(final_carry)}")

decimal_sum = int(str(final_carry)+''.join(sum_output.astype(int).astype(str)), 2)
print(f"Sum in Decimal: {decimal_sum}")


Ripple Carry Adder Results:
    A  B  Sum
Carry ->    1
    0  1    0
    0  1    0
    0  1    0
    0  1    0
    1  0    0
    1  0    0
    1  1    1
    1  1    0
Final Carry-Out: 1
Sum in Decimal: 258
