In [1]:
from tensorflow.keras.datasets import mnist
from matplotlib import pyplot
import numpy as np

In [2]:
# Loading the dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
# Checking the shape of the dataset
X_train.shape

(60000, 28, 28)

In [4]:
# Reshaping the dataset to be 2D (combining the 2nd and 3rd dimension to one dimension (28 * 28))
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

In [5]:
# Checking the new shape of the dataset
X_train.shape

(60000, 784)

In [6]:
# Filtering the dataset to only use classes 0 and 1 (using boolean masks in numpy)
train_mask = (y_train == 0) | (y_train == 1)
test_mask = (y_test == 0) | (y_test == 1)

(X_train, X_test), (y_train, y_test) = (X_train[train_mask], X_test[test_mask]), (y_train[train_mask], y_test[test_mask])

In [7]:
# taking only 1000 sample from the dataset
(X_train, y_train), (X_test, y_test) = (X_train[:1000], y_train[:1000]), (X_test[:1000], y_test[:1000])

In [8]:
# Standardization function
def standardize(X, means, stds):
  stds[stds == 0] = 1
  return (X - means) / stds

In [9]:
# Sigmoid function
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

In [10]:
# K-fold cross validation function
def K_fold_cross_validation(X, y, k, model):
  # Number of samples
  m = X.shape[0]

  indices = np.arange(m)
  folds = np.array_split(indices, k)

  accuracies = []

  for i in range(k):
    validate_indices = folds[i]
    train_indices = np.concatenate(folds[:i] + folds[i+1:])

    X_train, X_validate = X[train_indices], X[validate_indices]
    y_train, y_validate = y[train_indices], y[validate_indices]

    X_train_means = np.mean(X_train, axis=0)
    X_train_stds = np.std(X_train, axis=0)

    X_train = standardize(X_train, X_train_means, X_train_stds)
    X_validate = standardize(X_validate, X_train_means, X_train_stds)
    
    model.fit(X_train, y_train)
    accuracy = model.accuracy(X_validate, y_validate)
    accuracies.append(accuracy)
  
  return np.mean(accuracies)

In [11]:
# Implementing Logistic Regression
class LogisticRegression():
  def __init__(self, w, b, eta, epochs):
    self.w = w
    self.b = b
    self.eta = eta
    self.epochs = epochs
  
  def accuracy(self, X, y):
    y_pred = self.predict(X)
    return np.mean(y_pred == y) * 100
  
  def predict(self, X, threshold = 0.5):
    z = np.dot(self.w, X.T) + self.b
    return (sigmoid(z) >= threshold).astype(int)

  def fit(self, X, y):
    # Number of samples
    m = X.shape[0]
    for _ in range(self.epochs):
      z = np.dot(self.w, X.T) + self.b

      phiZ = sigmoid(z)

      self.w = self.w - ((self.eta * np.dot((phiZ - y), X)) / m)
      self.b = self.b - self.eta * np.mean(phiZ - y)


In [12]:
learning_rates = [0.1, 0.01, 0.001, 0.0001]

In [13]:
learning_rates_scores = []
for lr in learning_rates:
  model = LogisticRegression(np.random.rand(X_train.shape[1]), np.random.rand(), lr, 1000)
  score = K_fold_cross_validation(X_train, y_train, 10, model)

  learning_rates_scores.append(score)

for lr, accuracy in zip(learning_rates, learning_rates_scores):
  print(f'Learning rate: {lr} have accuracy: {accuracy}')

best_learning_rate = learning_rates[learning_rates_scores.index(max(learning_rates_scores))]
model = LogisticRegression(np.random.rand(X_train.shape[1]), np.random.rand(), best_learning_rate, 1000)

X_train_means = np.mean(X_train, axis=0)
X_train_stds = np.std(X_train, axis=0)

# Standardizing the train set
X_train = standardize(X_train, X_train_means, X_train_stds)

# Standardizing the test set
X_test = standardize(X_test, X_train_means, X_train_stds)

model.fit(X_train, y_train)
score = model.accuracy(X_test, y_test)

print(f'Final model accuracy: {score}')


Learning rate: 0.1 have accuracy: 99.3
Learning rate: 0.01 have accuracy: 98.6
Learning rate: 0.001 have accuracy: 93.6
Learning rate: 0.0001 have accuracy: 32.6
Final model accuracy: 98.3
