In [31]:
!pip install scikit-learn==1.0.2



In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [33]:
# 1. Implement the forward and backward function for NN.
# 2. Set the network structure as [64, 30, 10]
    #  a) Try to use sigmoid function as activation function.
    #  b) Try using the ReLU activation function.
    #  c) Try using the tanh activation function

In [34]:
def one_hot(y):
  # Find the unique categories and their inverse indices
  categories, inverse = np.unique(y, return_inverse=True)

  # Create the one-hot encoded matrix
  one_hot = np.zeros((y.size, categories.size))
  one_hot[np.arange(y.size), inverse] = 1
  return one_hot

In [35]:
# Load the dataset
digits = load_digits()
X = digits.data
y = digits.target

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# one-hot encoding
y_train_one_hot = one_hot(y_train)
y_test_one_hot = one_hot(y_test)

In [36]:
X_train_scaled[0]

array([ 0.        , -0.34169755, -0.46336049,  0.50836462, -2.54590607,
       -1.03722581, -0.40623424, -0.13101874, -0.06103492, -0.61725402,
        0.30881425,  0.00348328, -2.18152552, -1.35608548, -0.52465505,
       -0.13336005, -0.04991522,  0.12851911,  1.08857992, -0.19343737,
       -1.16010978, -1.27307852, -0.552537  , -0.11409248, -0.03733267,
        0.86365151,  1.13744682, -1.16195968, -1.60835913, -1.28296188,
       -0.62554872, -0.04573894,  0.        ,  1.09151514,  1.34318585,
       -1.12678131, -0.90106555, -1.13185292, -0.81347241,  0.        ,
       -0.06519029,  0.82828946,  1.39981472, -0.82540858,  0.69177178,
        1.17865528, -0.33784246, -0.09403434, -0.03963009,  0.15955797,
        1.14244768,  0.64507954, -1.42282149,  0.68917912,  1.47990131,
       -0.21608405, -0.02638899, -0.30677646, -0.49962244, -0.246272  ,
        0.84804385,  1.05270303,  0.45952251, -0.19710003])

In [44]:
def sigmoid(x, deriv=False):
  sigmoid_output = 1 / (1 + np.exp(-x))
  if deriv:
    return sigmoid_output * (1 - sigmoid_output)
  return sigmoid_output

def relu(x, deriv=False):
    if deriv:
        return np.where(x > 0, 1, 0)
    return np.maximum(0, x)

def tanh(x, deriv=False):
  if deriv:
    return 1 - np.tanh(x)**2
  return np.tanh(x)

In [45]:
class NeuralNets:
  def __init__(self, input_size, hidden_size, output_size, *, act="sigmoid"):
    self.activation = sigmoid if act == "sigmoid" else relu if act == "relu" else tanh
    self.b1 = np.zeros((1, hidden_size))
    self.b2 = np.zeros((1, output_size))

    if act == "relu":
      self.w1 = np.random.randn(input_size, hidden_size) * np.sqrt(2. / input_size)
      self.w2 = np.random.randn(hidden_size, output_size) * np.sqrt(2. / hidden_size)
    else:
      self.w1 = np.random.randn(input_size, hidden_size)
      self.w2 = np.random.randn(hidden_size, output_size)

  def feedforward(self, x):
    self.z1 = np.dot(x, self.w1) + self.b1 #First layer
    self.a1 = self.activation(self.z1)
    self.z2 = np.dot(self.a1, self.w2) + self.b2  #Output layer
    self.a2 = self.activation(self.z2)
    return self.a2

  def backprop(self, x, y):
    size = len(x)
    # Error at output layer
    output_error = self.a2 - y
    output_delta = (2/size) * output_error * self.activation(self.a2, deriv=True)

    # Error at hidden layer
    hidden_error = output_delta.dot(self.w2.T)
    hidden_delta = hidden_error * self.activation(self.a1, deriv=True)

    # Update weights and biases
    self.b1 -= self.alpha * np.sum(hidden_delta, axis=0, keepdims=True)
    self.b2 -= self.alpha * np.sum(output_delta, axis=0, keepdims=True)
    self.w1 -= self.alpha * x.T.dot(hidden_delta)
    self.w2 -= self.alpha * self.a1.T.dot(output_delta)

  def train(self, X, Y, *, learning_rate=0.01, epochs=1000):
    self.alpha = learning_rate
    for i in range(epochs):
      for x, y in zip(X, Y):
        x = x.reshape(1, -1)  # Reshape x to (1, 64) before passing to feedforward and backprop
        self.feedforward(x)
        self.backprop(x, y)
      if i%20 == 0:
        print(f"Epoch {i} completed ---------------->")
        print(f"The accuracy score is {self.accuracy(Y, self.predict(X))}")

  def predict(self, X):
    return [np.argmax(self.feedforward(x)) for x in X]

  def accuracy(self, Y, predictions):
    labels = [np.argmax(y) for y in Y]
    return np.mean(np.array(predictions) == np.array(labels))

In [39]:
model1 = NeuralNets(64, 30, 10, act="sigmoid")
model1.train(X_train_scaled, y_train_one_hot, learning_rate=0.01, epochs=200)

y_predicted = model1.predict(X_test_scaled)
accuracy_score = model1.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with sigmoid activation fn: {accuracy_score}")

Epoch 0 completed ---------------->
The accuracy score is 0.5991649269311065
Epoch 20 completed ---------------->
The accuracy score is 0.9464161447459986
Epoch 40 completed ---------------->
The accuracy score is 0.9596381350034795
Epoch 60 completed ---------------->
The accuracy score is 0.9665970772442589
Epoch 80 completed ---------------->
The accuracy score is 0.9721642310368824
Epoch 100 completed ---------------->
The accuracy score is 0.9728601252609603
Epoch 120 completed ---------------->
The accuracy score is 0.9770354906054279
Epoch 140 completed ---------------->
The accuracy score is 0.9798190675017397
Epoch 160 completed ---------------->
The accuracy score is 0.9826026443980515
Epoch 180 completed ---------------->
The accuracy score is 0.9839944328462074
The predicted score on the test set with sigmoid activation fn: 0.9555555555555556


In [40]:
model2 = NeuralNets(64, 30, 10, act="relu")
model2.train(X_train_scaled, y_train_one_hot, learning_rate=0.15, epochs=200)

y_predicted = model2.predict(X_test_scaled)
accuracy_score = model2.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with relu activation fn: {accuracy_score}")

Epoch 0 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 20 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 40 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 60 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 80 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 100 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 120 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 140 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 160 completed ---------------->
The accuracy score is 0.10090466249130133
Epoch 180 completed ---------------->
The accuracy score is 0.10090466249130133
The predicted score on the test set with relu activation fn: 0.09166666666666666


In [41]:
model3 = NeuralNets(64, 30, 10, act="tanh")
model3.train(X_train_scaled, y_train_one_hot, learning_rate=0.01, epochs=200)

y_predicted = model3.predict(X_test_scaled)
accuracy_score = model3.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with tanh activation fn: {accuracy_score}")

Epoch 0 completed ---------------->
The accuracy score is 0.535142658315936
Epoch 20 completed ---------------->
The accuracy score is 0.8677800974251914
Epoch 40 completed ---------------->
The accuracy score is 0.9178844815588031
Epoch 60 completed ---------------->
The accuracy score is 0.9283228949199722
Epoch 80 completed ---------------->
The accuracy score is 0.9436325678496869
Epoch 100 completed ---------------->
The accuracy score is 0.9519832985386222
Epoch 120 completed ---------------->
The accuracy score is 0.9631176061238692
Epoch 140 completed ---------------->
The accuracy score is 0.9624217118997912
Epoch 160 completed ---------------->
The accuracy score is 0.965205288796103
Epoch 180 completed ---------------->
The accuracy score is 0.9638135003479471
The predicted score on the test set with tanh activation fn: 0.9416666666666667


In [42]:
# 3. Experiment on your own selected different hyper-parameters -> number of epochs and the learning rate
# 4. Report your findings.

In [43]:
# Experimenting different learning rate and epochs with sigmoid activation function

print(f"With learning rate={0.000001} and epochs={50}")
model4 = NeuralNets(64, 30, 10, act="sigmoid")
model4.train(X_train_scaled, y_train_one_hot, learning_rate=0.000001, epochs=50)

y_predicted = model4.predict(X_test_scaled)
accuracy_score = model4.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with sigmoid activation fn: {accuracy_score}\n")

print(f"With learning rate={0.0001} and epochs={100}")
model5 = NeuralNets(64, 30, 10, act="sigmoid")
model5.train(X_train_scaled, y_train_one_hot, learning_rate=0.0001, epochs=100)

y_predicted = model5.predict(X_test_scaled)
accuracy_score = model5.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with sigmoid activation fn: {accuracy_score}\n")

print(f"With learning rate={0.01} and epochs={500}")
model6 = NeuralNets(64, 30, 10, act="sigmoid")
model6.train(X_train_scaled, y_train_one_hot, learning_rate=0.1, epochs=400)

y_predicted = model6.predict(X_test_scaled)
accuracy_score = model6.accuracy(y_test_one_hot, y_predicted)
print(f"The predicted score on the test set with sigmoid activation fn: {accuracy_score}\n")


With learning rate=1e-06 and epochs=50
Epoch 0 completed ---------------->
The accuracy score is 0.09116214335421016
Epoch 20 completed ---------------->
The accuracy score is 0.09046624913013222
Epoch 40 completed ---------------->
The accuracy score is 0.0918580375782881
The predicted score on the test set with sigmoid activation fn: 0.09444444444444444

With learning rate=0.0001 and epochs=100
Epoch 0 completed ---------------->
The accuracy score is 0.10786360473208072
Epoch 20 completed ---------------->
The accuracy score is 0.23451635351426584
Epoch 40 completed ---------------->
The accuracy score is 0.3583855254001392
Epoch 60 completed ---------------->
The accuracy score is 0.46903270702853167
Epoch 80 completed ---------------->
The accuracy score is 0.5567153792623522
The predicted score on the test set with sigmoid activation fn: 0.6

With learning rate=0.01 and epochs=500
Epoch 0 completed ---------------->
The accuracy score is 0.9137091162143354
Epoch 20 completed ----

  sigmoid_output = 1 / (1 + np.exp(-x))


Epoch 380 completed ---------------->
The accuracy score is 0.9965205288796103
The predicted score on the test set with sigmoid activation fn: 0.9416666666666667



In experimenting with different learning rates and epochs, the results indicate significant impacts on model training and performance. With a very low learning rate of 0.000001, the model barely learned, achieving about 9% accuracy across 50 epochs. Increasing the learning rate to 0.0001 for 100 epochs improved accuracy significantly, reaching 60% on the test set, showing that a slightly higher rate facilitated better learning. However, the highest learning rate of 0.01, used over 500 epochs, quickly boosted the accuracy to over 99%, although it caused numerical stability issues like overflow in sigmoid computations. This suggests that while a higher learning rate can accelerate learning, it may also lead to potential instability, indicating the need for a balanced approach in setting the learning rate to ensure both effective learning and numerical stability.