In [1]:
import numpy as np
from tensorflow.keras.datasets import mnist


In [2]:

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softamx(x):
    exp_x = np.exp(x - np.max(x, axis = 1, keepdims=True))
    return exp_x / np.sum(exp_x, axis = 1)

In [3]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0


In [5]:
x_train_flat = x_train.reshape((x_train.shape[0], -1))
x_test_flat = x_test.reshape((x_test.shape[0], -1))

In [6]:
num_classes =10
y_train_onehot = np.eye(num_classes)[y_train]

In [7]:
# Hyperparameters
learning_rate = 0.1
num_epochs = 100
batch_size = 128
input_size = 784
hidden_size = 128

In [10]:
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, num_classes)
b2 = np.zeros(num_classes)

In [11]:
import numpy as np

# Define activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Define the NAND gate truth table
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[1], [1], [1], [0]])

# Hyperparameters
learning_rate = 0.1
num_epochs = 10000
input_size = 2
hidden_size = 2
output_size = 1

# Initialize weights and biases
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    z1 = np.dot(X, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # Compute the loss (Mean Squared Error)
    loss = np.mean((y - a2) ** 2)

    # Backpropagation
    d_loss_a2 = a2 - y
    d_a2_z2 = sigmoid_derivative(a2)
    d_loss_z2 = d_loss_a2 * d_a2_z2

    d_loss_a1 = np.dot(d_loss_z2, W2.T)
    d_a1_z1 = sigmoid_derivative(a1)
    d_loss_z1 = d_loss_a1 * d_a1_z1

    d_loss_W2 = np.dot(a1.T, d_loss_z2)
    d_loss_b2 = np.sum(d_loss_z2, axis=0, keepdims=True)
    d_loss_W1 = np.dot(X.T, d_loss_z1)
    d_loss_b1 = np.sum(d_loss_z1, axis=0, keepdims=True)

    # Update weights and biases
    W2 -= learning_rate * d_loss_W2
    b2 -= learning_rate * d_loss_b2
    W1 -= learning_rate * d_loss_W1
    b1 -= learning_rate * d_loss_b1

    # Print loss every 1000 epochs
    if (epoch + 1) % 1000 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss:.4f}")

# Test the trained neural network
def predict(x):
    z1 = np.dot(x, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)
    return a2

print("NAND Gate Predictions:")
for i in range(len(X)):
    print(f"Input: {X[i]}, Predicted Output: {predict(X[i])}, Expected Output: {y[i]}")


Epoch 1000/10000, Loss: 0.1199
Epoch 2000/10000, Loss: 0.0222
Epoch 3000/10000, Loss: 0.0075
Epoch 4000/10000, Loss: 0.0040
Epoch 5000/10000, Loss: 0.0026
Epoch 6000/10000, Loss: 0.0019
Epoch 7000/10000, Loss: 0.0015
Epoch 8000/10000, Loss: 0.0012
Epoch 9000/10000, Loss: 0.0010
Epoch 10000/10000, Loss: 0.0009
NAND Gate Predictions:
Input: [0 0], Predicted Output: [[0.99551424]], Expected Output: [1]
Input: [0 1], Predicted Output: [[0.9677281]], Expected Output: [1]
Input: [1 0], Predicted Output: [[0.9675852]], Expected Output: [1]
Input: [1 1], Predicted Output: [[0.03830163]], Expected Output: [0]
