<a href="https://colab.research.google.com/github/MalihaUCF/Machine-Learning-Course-Assignments--Spring-2019/blob/master/Assignment1/Problem1/Problem1_correct.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [30]:
#This code trains 1 neural network classifier for digit 5


#Code Author : Maliha Arif
#PID:4506817
    

#Downloading MNIST dataset

from keras.datasets import mnist
import numpy as np

(train_images_original,train_labels_original),(test_images_original,test_labels_original)=mnist.load_data()  



print('Training Data Shape',train_images_original.shape)
print('Test Data Shape',test_images_original.shape)
print('The train and test labels look like,' ,train_labels_original.shape,test_labels_original.shape)


Training Data Shape (60000, 28, 28)
Test Data Shape (10000, 28, 28)
The train and test labels look like, (60000,) (10000,)


In [31]:
#The downloaded images are reshaped to 1D feature vector of size 28*28


train_images_original = train_images_original.reshape(60000, 28*28) 
test_images_original = test_images_original.reshape(10000, 28*28)

train_images_original = train_images_original.T 
test_images_original=test_images_original.T

 
train_images_original = train_images_original.astype('float32') 
test_images_original = test_images_original.astype('float32') 

#The images are normalized to have values between 0 and 1
train_images_original /= 255 
test_images_original /= 255


print(train_images_original.shape)
print(test_images_original.shape)
print(train_labels_original.shape)
print(test_labels_original.shape)

(784, 60000)
(784, 10000)
(60000,)
(10000,)


In [32]:
#Here we modify class labels using np.where so that we can train a 1 class classifier that identifies class 5
#Train labels with digit 5 label are labelled as 1 and rest other digits are labelled 0

y_new = np.zeros(train_labels_original.shape)
y_new[np.where(train_labels_original == 5)[0]] = 1
train_labels = y_new

y_new = np.zeros(test_labels_original.shape)
y_new[np.where(test_labels_original == 5)[0]] = 1
test_labels = y_new

print(train_labels.shape)
print(test_labels.shape)

(60000,)
(10000,)


In [0]:

#Here we define activation and all loss functions

#defining sigmoid activation function
def sigmoid(z):
  s = 1 / (1 + np.exp(-z))
  return s


#defining mean squared error loss
def mse(Y,A):   
  
  m =  m = Y.shape[1]
  L = np.square(Y - A).mean()
  return L
  
  
#defining mean squared error loss with activation function
def squared_loss(w,b,X,Y):
  
  m =  m = X.shape[1]
  A = sigmoid(np.dot(w.T, X) + b)
  L = np.square(Y - A).mean()   #calculating loss
 
  # binary entropy gradients
  dw = (1 / m) * np.dot(X, (A - Y).T)  #weight update
  db = (1 / m) * np.sum(A - Y)
  
  grads = {"dw": dw,      #storing gradients in dictionary
            "db": db }
    
  return L,grads

 
#binary cross entropy   
def compute_loss(Y, Y_hat):   

  m = Y.shape[1]
  L = -(1./m) * ( np.sum( np.multiply(np.log(Y_hat),Y) ) + np.sum( np.multiply(np.log(1-Y_hat),(1-Y)) ) )

  return L

#binary cross entropy with activation function
def binary_cross_entropy(w,b,X,Y):  
  
  m =  m = X.shape[1]
  A = sigmoid(np.dot(w.T, X) + b)
  L = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A)))  #calculating loss
 
  L = np.squeeze(L)
  dw = (1 / m) * np.dot(X, (A - Y).T)  #weight update
  db = (1 / m) * np.sum(A - Y)
  
  grads = {"dw": dw,    #storing gradients in dictionary
            "db": db }
    
  return L,grads

In [0]:

#We initialize weights and biases
def initialize_wb(size):
  w = np.zeros(shape=(size, 1))  #an empty vector for weight
  b = 0  #bias simply is 0, scalar quantity
  return w,b


#this function splits the training set into slices to train in batches

def mini_batches(X_whole, Y_whole, batchsize):
  for index in range(0, X_whole.shape[0] - batchsize + 1, batchsize):
    batch = slice(index, index + batchsize)
    yield X_whole[batch], Y_whole[batch]

In [0]:

#Defining some hyperparameters
learning_rate = 0.01
batch_size = 128
classes = 1

#our main optimize function - Minibatch - Stochastic Gradient Descent is defined here

def SGD(w, b, X, Y, epochs, learning_rate):
  
  loss_total = []
  print(X.shape)
  print(Y.shape)
  
  print('\n Results for Network 5\n\n')
  
  for i in range(epochs):
      
    for batch in mini_batches(X.T, Y.T, batch_size):
    
       x_batch, y_batch = batch
       #print(x_batch.shape)
       L,grads = squared_loss(w, b, x_batch.T, y_batch.T)  #Calling function MSE 
       dw = grads["dw"]
       db = grads["db"]
       w = w - learning_rate * dw   #updating weights
       b = b - learning_rate * db   #updating biases
        
       #print(w.shape)

       
    loss_total.append(L)
    print (" Epoch %i , Loss: %f" % (i, L))
            
  final_wb = {"w": w,
            "b": b}

  grads = {"dw": dw,
               "db": db}

  return final_wb, loss_total

In [36]:
#This evaluate function is used to test our model

def evaluate(w, b, X):
  m = X.shape[1]
  y_hat = np.zeros((1, m))
  w = w.reshape(X.shape[0], classes)
  A = sigmoid(np.dot(w.T, X) + b)
  for i in range(A.shape[1]):
    y_hat[0, i] = 1 if A[0, i] > 0.5 else 0
  return y_hat


#This function defines our model, test set is also then evaluated

def model(X_train, Y_train, X_test, Y_test, num_iterations=2000):
   w, b = initialize_wb(X_train.shape[0])
   final_wb,loss = SGD(w, b, X_train, Y_train, num_iterations, learning_rate)
   #print(w.shape)
   w = final_wb["w"]
   b = final_wb["b"]
   test_accuracy = evaluate(w, b, X_test)
   print("\n\n Test accuracy is : {} %".format(100 - np.mean(np.abs(test_accuracy - Y_test)) * 100))

#we call our model function and begin training
train_classifier = model(train_images_original, train_labels, test_images_original, test_labels, num_iterations = 15)   
  

(784, 60000)
(60000,)

 Results for Network 5


 Epoch 0 , Loss: 0.038486
 Epoch 1 , Loss: 0.022932
 Epoch 2 , Loss: 0.016934
 Epoch 3 , Loss: 0.014101
 Epoch 4 , Loss: 0.012507
 Epoch 5 , Loss: 0.011494
 Epoch 6 , Loss: 0.010796
 Epoch 7 , Loss: 0.010287
 Epoch 8 , Loss: 0.009901
 Epoch 9 , Loss: 0.009601
 Epoch 10 , Loss: 0.009361
 Epoch 11 , Loss: 0.009167
 Epoch 12 , Loss: 0.009008
 Epoch 13 , Loss: 0.008875
 Epoch 14 , Loss: 0.008764


 Test accuracy is : 96.46 %
