In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from keras.datasets import mnist

# Importing MNIST Dataset
(X_train, Y_train) , (X_test, Y_test) = mnist.load_data()
print('X_train: ' + str(X_train.shape))
print('Y_train: ' + str(Y_train.shape))
print('X_test:  ' + str(X_test.shape))
print('Y_test:  ' + str(Y_test.shape))

X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)


In [None]:
# Filtering the data to use only digit 0 and digit 1
train_filter = np.where((Y_train == 0 ) | (Y_train == 1))
test_filter = np.where((Y_test == 0) | (Y_test == 1))

X_train , X_test = X_train[train_filter] , X_test[test_filter]
Y_train , Y_test = Y_train[train_filter] , Y_test[test_filter]

print('X_train: ' + str(X_train.shape))
print('Y_train: ' + str(Y_train.shape))
print('X_test:  ' + str(X_test.shape))
print('Y_test:  ' + str(Y_test.shape))

X_train: (12665, 28, 28)
Y_train: (12665,)
X_test:  (2115, 28, 28)
Y_test:  (2115,)


In [None]:
print(X_train)

[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [None]:
print(Y_train)

[0 1 1 ... 1 0 1]


In [None]:
# Standardization of data
X_train = ((X_train - X_train.mean()) / (X_train.std()))
X_test = ((X_test - X_train.mean()) / (X_train.std()))

In [None]:
# Reshaping the data to use it as 2D not 3D
X_train = X_train.reshape(X_train.shape[0] , -1)
X_test = X_test.reshape(X_test.shape[0] , -1)
print('X_train: ' + str(X_train.shape))
print('Y_train: ' + str(Y_train.shape))
print('X_test:  ' + str(X_test.shape))
print('Y_test:  ' + str(Y_test.shape))
print(len(X_train))

X_train: (12665, 784)
Y_train: (12665,)
X_test:  (2115, 784)
Y_test:  (2115,)
12665


In [None]:
# Functions to calculate the accuracy
"""def accuracy(predict, real, normalize=True):
    accuracy=[]

    for i in range(len(predict)):
        if predict[i] == real[i]:
            accuracy.append(1)
        else:
            accuracy.append(0)

    if normalize == True:
        return np.mean(accuracy)
    if normalize == False:
        return sum(accuracy)"""

def accuracy(predict, real, normalize=True):
  return (np.sum(predict == real) / len(real)) * 100

In [None]:
# Implemeting our activation Function (Sigmoid Function)
def sigmoid(z):

    sigmoid_fn = 1/ (1 + np.exp(-z))

    return sigmoid_fn

In [None]:
# Implemeting the cross entropy
def costFunction(x, y , alpha , w , b):

    z = np.dot(w, x.T) + b #Net-Input
    phi = sigmoid(z) #Activation Function

    #Cost function
    cost = np.mean(-y * np.log(phi) - (1.0 - y) * np.log(1.0 - phi))

    #Compute Derivative
    dj_dw = (np.dot((phi - y).T, x) * alpha) / x.shape[0]
    dj_db = (np.mean((phi - y) * alpha))

    w = w - dj_dw
    b = b - dj_db

    return cost , w , b

In [None]:
# Implementing the Logistic Regression
def gradientDescent(X , Y , alpha , num_iterations):

    cost_history = []
    cost = 0.0
    w = np.random.rand(X.shape[1])
    b = np.random.randn()

    for i in range(num_iterations):
        cost , w , b = costFunction(X , Y , alpha , w , b)

        cost_history.append(cost)

    return cost , w , b

In [None]:
# Function that calculates the prediction
def predict(x, w, b):

  z = np.dot(w, x.T) + b #Net-Input
  result = sigmoid(z)

  return np.where(result >= 0.5, 1, 0).flatten()

In [None]:
from numpy.lib.function_base import average
def k_fold_cross_valid(X, Y, k):

  learning_rates = [0.0001, 0.0005, 0.001, 0.005, 0.01 , 0.1]
  lr_scores = []

  # Shuffle the data
  n = X.shape[0]
  fold_siz = n // k
  indices = np.arange(n)
  np.random.shuffle(indices)

  # Calculate the scores for each fold
  fold_scores = []
  for fold in range(k):
        start = fold * fold_siz
        end = (fold + 1) * fold_siz
        val_indices = indices[start:end]
        train_indices = np.concatenate([indices[:start], indices[end:]])
        X_train_fold = X[train_indices].copy()
        Y_train_fold = Y[train_indices].copy()
        X_val = X[val_indices]
        Y_val = Y[val_indices]

        for alpha in learning_rates:
          cost , w , b = gradientDescent(X_train_fold , Y_train_fold , alpha , 500)
          pred = predict(X_train_fold , w , b)
          #print(pred)
          #print(Y_train_fold)
          accuracy_score = accuracy(pred , Y_train_fold)
          lr_scores.append(accuracy_score)
          print("Fold#: " + str(fold) + " , Learning Rate: " + str(alpha) + " , Accuracy: " + str(accuracy_score))

  res = max(lr_scores)
  res_index = lr_scores.index(res) % 6
  print("Maximum Accuracy: " + str(res) + " , Learning Rate: " + str(learning_rates[res_index]))

In [None]:
k_fold_cross_valid(X_train, Y_train, 3)

  cost = np.mean(-y * np.log(phi) - (1.0 - y) * np.log(1.0 - phi))
  cost = np.mean(-y * np.log(phi) - (1.0 - y) * np.log(1.0 - phi))


Fold#: 0 , Learning Rate: 0.0001 , Accuracy: 8.183325438180956
Fold#: 0 , Learning Rate: 0.0005 , Accuracy: 21.210326859308385
Fold#: 0 , Learning Rate: 0.001 , Accuracy: 48.70914258645192
Fold#: 0 , Learning Rate: 0.005 , Accuracy: 92.23117006158219
Fold#: 0 , Learning Rate: 0.01 , Accuracy: 96.13927048792041
Fold#: 0 , Learning Rate: 0.1 , Accuracy: 99.54997631454287
Fold#: 1 , Learning Rate: 0.0001 , Accuracy: 7.922785409758408
Fold#: 1 , Learning Rate: 0.0005 , Accuracy: 23.223590715300805
Fold#: 1 , Learning Rate: 0.001 , Accuracy: 45.108953102794885
Fold#: 1 , Learning Rate: 0.005 , Accuracy: 92.02984367598295
Fold#: 1 , Learning Rate: 0.01 , Accuracy: 96.44718143060162
Fold#: 1 , Learning Rate: 0.1 , Accuracy: 99.58550450023685
Fold#: 2 , Learning Rate: 0.0001 , Accuracy: 9.628138323069635
Fold#: 2 , Learning Rate: 0.0005 , Accuracy: 22.513027001421126
Fold#: 2 , Learning Rate: 0.001 , Accuracy: 48.42491710090005
Fold#: 2 , Learning Rate: 0.005 , Accuracy: 92.18379914732354
Fold