<a href="https://colab.research.google.com/github/S-Ali-S/NLP/blob/main/NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Sigmoid and Softmax activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exp_x = np.exp(x - np.max(x))  # To avoid overflow
    return exp_x / exp_x.sum(axis=1, keepdims=True)

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Filter out only digits 1, 2, and 3
filter_train = np.isin(y_train, [1, 2, 3])
filter_test = np.isin(y_test, [1, 2, 3])

x_train_filtered = x_train[filter_train]
y_train_filtered = y_train[filter_train]
x_test_filtered = x_test[filter_test]
y_test_filtered = y_test[filter_test]

# Select 20 samples for each of the digits 1, 2, and 3
x_train_final = []
y_train_final = []
x_test_final = []
y_test_final = []

for digit in [1, 2, 3]:
    x_train_digit = x_train_filtered[y_train_filtered == digit][:20]
    y_train_digit = y_train_filtered[y_train_filtered == digit][:20]
    x_train_final.append(x_train_digit)
    y_train_final.append(y_train_digit)

    x_test_digit = x_test_filtered[y_test_filtered == digit][:20]
    y_test_digit = y_test_filtered[y_test_filtered == digit][:20]
    x_test_final.append(x_test_digit)
    y_test_final.append(y_test_digit)

# Convert lists to numpy arrays
x_train_final = np.vstack(x_train_final)
y_train_final = np.hstack(y_train_final)
x_test_final = np.vstack(x_test_final)
y_test_final = np.hstack(y_test_final)

# Normalize the data
x_train_final = x_train_final.astype('float32') / 255.0
x_test_final = x_test_final.astype('float32') / 255.0

# Subtract 1 from the labels to shift 1, 2, 3 -> 0, 1, 2
y_train_final -= 1
y_test_final -= 1

# One-hot encode the labels (now they are 0, 1, 2)
y_train_final = np.array([to_categorical(y, 3) for y in y_train_final])
y_test_final = np.array([to_categorical(y, 3) for y in y_test_final])

# Reshape data to flatten the images (28x28 -> 784)
x_train_final = x_train_final.reshape(-1, 28 * 28)
x_test_final = x_test_final.reshape(-1, 28 * 28)

# Initialize weights and biases
input_size = 28 * 28  # 784 features (28x28 pixels)
hidden_size = 10      # 10 neurons in the hidden layer
output_size = 3       # 3 possible outputs (for digits 1, 2, and 3)

# Random initialization of weights and biases
np.random.seed(42)
weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
bias_hidden = np.zeros((1, hidden_size))
weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
bias_output = np.zeros((1, output_size))

# Hyperparameters
learning_rate = 0.01
epochs = 2000

# Training the neural network
for epoch in range(epochs):
    # Forward pass
    hidden_input = np.dot(x_train_final, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)

    output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    output = softmax(output_input)

    # Compute the loss (cross-entropy)
    loss = -np.sum(y_train_final * np.log(output + 1e-9)) / x_train_final.shape[0]

    # Backpropagation
    output_error = output - y_train_final
    hidden_error = np.dot(output_error, weights_hidden_output.T) * sigmoid_derivative(hidden_output)

    # Gradients for weights and biases
    grad_weights_hidden_output = np.dot(hidden_output.T, output_error) / x_train_final.shape[0]
    grad_bias_output = np.sum(output_error, axis=0, keepdims=True) / x_train_final.shape[0]
    grad_weights_input_hidden = np.dot(x_train_final.T, hidden_error) / x_train_final.shape[0]
    grad_bias_hidden = np.sum(hidden_error, axis=0, keepdims=True) / x_train_final.shape[0]

    # Update weights and biases
    weights_input_hidden -= learning_rate * grad_weights_input_hidden
    bias_hidden -= learning_rate * grad_bias_hidden
    weights_hidden_output -= learning_rate * grad_weights_hidden_output
    bias_output -= learning_rate * grad_bias_output

    # Print loss every 10 epochs
    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}')

# Evaluate on the test set
hidden_input = np.dot(x_test_final, weights_input_hidden) + bias_hidden
hidden_output = sigmoid(hidden_input)

output_input = np.dot(hidden_output, weights_hidden_output) + bias_output
output = softmax(output_input)

# Accuracy calculation
predictions = np.argmax(output, axis=1)

# Print predicted and desired labels for each test sample
for i in range(len(predictions)):
    predicted_label = predictions[i] + 1  # Convert 0,1,2 back to 1,2,3
    desired_label = np.argmax(y_test_final[i]) + 1  # Convert 0,1,2 back to 1,2,3
    print(f"Predicted: {predicted_label}, Desired: {desired_label}")

# Compute and print accuracy
accuracy = np.mean(predictions == np.argmax(y_test_final, axis=1))
print(f'Test accuracy: {accuracy * 100:.2f}%')

Epoch 1/2000, Loss: 1.0988
Epoch 11/2000, Loss: 1.0987
Epoch 21/2000, Loss: 1.0986
Epoch 31/2000, Loss: 1.0986
Epoch 41/2000, Loss: 1.0985
Epoch 51/2000, Loss: 1.0985
Epoch 61/2000, Loss: 1.0985
Epoch 71/2000, Loss: 1.0984
Epoch 81/2000, Loss: 1.0984
Epoch 91/2000, Loss: 1.0983
Epoch 101/2000, Loss: 1.0983
Epoch 111/2000, Loss: 1.0982
Epoch 121/2000, Loss: 1.0982
Epoch 131/2000, Loss: 1.0981
Epoch 141/2000, Loss: 1.0981
Epoch 151/2000, Loss: 1.0980
Epoch 161/2000, Loss: 1.0979
Epoch 171/2000, Loss: 1.0979
Epoch 181/2000, Loss: 1.0978
Epoch 191/2000, Loss: 1.0978
Epoch 201/2000, Loss: 1.0977
Epoch 211/2000, Loss: 1.0976
Epoch 221/2000, Loss: 1.0975
Epoch 231/2000, Loss: 1.0974
Epoch 241/2000, Loss: 1.0974
Epoch 251/2000, Loss: 1.0973
Epoch 261/2000, Loss: 1.0972
Epoch 271/2000, Loss: 1.0971
Epoch 281/2000, Loss: 1.0970
Epoch 291/2000, Loss: 1.0968
Epoch 301/2000, Loss: 1.0967
Epoch 311/2000, Loss: 1.0966
Epoch 321/2000, Loss: 1.0965
Epoch 331/2000, Loss: 1.0963
Epoch 341/2000, Loss: 1.0