<a href="https://colab.research.google.com/github/Abdullah-Al-Arafat/CAP-5610-Machine-Learning/blob/master/ML_hw1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**CAP 5610: Machine Learning** 
Home work 1

In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import keras
from keras.datasets import mnist

Using TensorFlow backend.


In [2]:
# load MNIST dataset from keras and preprocess the dataset

(x_train, y_train), (x_test, y_test) = mnist.load_data()

X_train = x_train.reshape(x_train.shape[0], -1)/255
X_test = x_test.reshape(x_test.shape[0], -1)/255

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


Necessary functions for forward and back propagation, and loss calculation

In [0]:
def sigmoid (x):
  y = 1/(1 + np.exp(-x))
  return y

def softmax(Z):
  return np.exp(Z)/np.sum(np.exp(Z), axis = 1, keepdims= True)

def d_sigmoid (x):   # differentiation of sigmoid function 
  y = sigmoid(x)*(1-sigmoid(x))
  return y

def forward_pro(X,parameters):
  W = parameters['W']
  b = parameters['b']
  y = sigmoid(np.dot(X, W) + b)
  return y

def forward_softmax(X, parameters):
  W = parameters['W']
  b = parameters['b']
  Z = np.dot(X, W) + b
  y = softmax(Z)
  return y

def loss_cross(y, yest):
  loss = - np.sum(y*np.log(yest)+(1-y)*np.log(1 - yest))/y.shape[0]
  return loss

def loss_mse (y, yest):
  loss = np.sum((y-yest)*(y-yest))/y.shape[0]
  return loss

def loss_cc (y, yest): #catogorical cross entropy 
  loss = - np.sum(y*np.log(yest))/y.shape[0]
  return loss

**Problem 1:**
***logistic regression with mean squared error loss***

In [0]:
def model_mse(X_train, Y_train, learning_rate, batch_size, parameter, epochs):
  
  W = parameter['W']
  b = parameter['b']
  m = X_train.shape[0] # number of samples in training set
  
  for j in range (epochs):
    
    shuffled_indices = np.random.permutation(m)
    X_shuffled = X_train[shuffled_indices]
    y_shuffled = Y_train[shuffled_indices]
    
    for i in range(0, m, batch_size):
      
      X = X_shuffled[i : i + batch_size]
      Y = y_shuffled[i : i + batch_size]  

      y_est = forward_pro(X, parameter)   

      loss = loss_mse(Y, y_est)
      
      dZ = (1/batch_size)*y_est*(1-y_est)*(y_est - Y) # dZ = A'*(A-Y) for mean square error
      dW = np.dot(X.T, dZ)
      db = np.sum(dZ)      

      W = W - learning_rate*dW
      b = b -learning_rate*db
      
      parameter['W'] = W
      parameter['b'] = b  
  
  return parameter
    
    

In [0]:
# run ten individual one vs all classifier for mean square error
Parameter = {}
for i in range(10):
  parameters = {'W' : np.zeros((X_train.shape[1], 1)),
             'b': np.zeros(1)}
  Y = (y_train == i).reshape(-1,1)*1
  Parameter[i] = model_mse(X_train, Y, learning_rate = 0.001, batch_size = 40, parameter = parameters , epochs = 20)

In [0]:
# Accuracy for one vs all classifiers 
for i in range (10):
  Y_est = (forward_pro(X_test, Parameter[i])>0.5)*1
  acc = np.sum((Y_est == (y_test == i).reshape(-1,1))*1)/y_test.shape[0]
  print ("Accuracy for %d vs all: %f" %(i, acc))

Accuracy for 0 vs all: 0.979300
Accuracy for 1 vs all: 0.984200
Accuracy for 2 vs all: 0.952600
Accuracy for 3 vs all: 0.952900
Accuracy for 4 vs all: 0.957300
Accuracy for 5 vs all: 0.922400
Accuracy for 6 vs all: 0.973200
Accuracy for 7 vs all: 0.970800
Accuracy for 8 vs all: 0.908000
Accuracy for 9 vs all: 0.921600


In [0]:
# Accuracy of the overall classifiers using argmax 

Y_est = np.zeros((10000, 10))
for i in range (10):
  Y_est[:,i] = np.squeeze(forward_pro(X_test, Parameter[i]))
   
ara=np.argmax(Y_est, axis = 1) 
acc = np.sum(ara == y_test)/y_test.shape[0]

print ("Accuracy for the overall classifier using argmax: %f" %acc)  


Accuracy for the overall classifier using argmax: 0.838200


**Problem 2:**  ***logistic regression with binary cross entropy loss*** 

In [0]:
def model_cross(X_train, Y_train, learning_rate, batch_size,parameters, epochs):
  
  W = parameters['W']
  b = parameters['b']
  m = X_train.shape[0] # number of samples in training set
  
  for j in range (epochs):
    
    shuffled_indices = np.random.permutation(m)
    X_shuffled = X_train[shuffled_indices]
    y_shuffled = Y_train[shuffled_indices]
    
    for i in range(0, m, batch_size):
      
      X = X_shuffled[i : i + batch_size]
      Y = y_shuffled[i : i + batch_size]
      
      y_est = forward_pro(X, parameters)   

      loss = loss_cross(Y, y_est)
      
      dZ = (1/batch_size)*(y_est - Y) # dZ = A - Y for cross entropy loss
      dW = np.dot(X.T, dZ)
      db = np.sum(dZ)      

      W = W - learning_rate*dW
      b = b -learning_rate*db
      
      parameters['W'] = W
      parameters['b'] = b
      
  
  
  return parameters

In [0]:
# run ten individual one vs all classifier for binary cross entropy loss
Parameter = {}
for i in range(10):
  pmeter = {'W' : np.zeros((X_train.shape[1], 1)),
             'b': np.zeros(1)}
  Y = (y_train == i).reshape(-1,1)*1
  Parameter[i] = model_cross(X_train, Y, learning_rate = 0.001, batch_size = 40, parameters = pmeter, epochs = 20)

In [0]:
# Accuracy for one vs all classifiers 
for i in range (10):
  Y_est = (forward_pro(X_test, Parameter[i])>0.5)*1
  acc = np.sum((Y_est == (y_test == i).reshape(-1,1))*1)/y_test.shape[0]
  print ("Accuracy for %d vs all: %f" %(i, acc))

Accuracy for 0 vs all: 0.988400
Accuracy for 1 vs all: 0.988700
Accuracy for 2 vs all: 0.969800
Accuracy for 3 vs all: 0.967400
Accuracy for 4 vs all: 0.972400
Accuracy for 5 vs all: 0.957100
Accuracy for 6 vs all: 0.979700
Accuracy for 7 vs all: 0.978700
Accuracy for 8 vs all: 0.941100
Accuracy for 9 vs all: 0.952000


In [0]:
# Accuracy of the overall classifiers using argmax 

Y_est = np.zeros((10000, 10))
for i in range (10):
  Y_est[:,i] = np.squeeze(forward_pro(X_test, Parameter[i]))
   
ara=np.argmax(Y_est, axis = 1) 
acc = np.sum(ara == y_test)/y_test.shape[0]

print ("Accuracy for the overall classifier using argmax: %f" %acc)  

Accuracy for the overall classifier using argmax: 0.882700


**Problem 3:** ***softmax and categorical cross entropy loss***

In [0]:
def model_softmax(X_train, Y_train, learning_rate, batch_size, parameters, epochs):
  
  W = parameters['W']
  b = parameters['b']
  m = X_train.shape[0] # number of samples in training set
  
  for j in range (epochs):
    
    shuffled_indices = np.random.permutation(m)
    X_shuffled = X_train[shuffled_indices]
    y_shuffled = Y_train[shuffled_indices]
    
    for i in range(0, m, batch_size):
      
      X = X_shuffled[i : i + batch_size]
      Y = y_shuffled[i : i + batch_size]  

      y_est = forward_softmax(X, parameters)   
     
      loss = loss_cc(Y, y_est)
      
      dZ = (1/batch_size)*(y_est - Y) 
      '''dzi = {ai - 1, if ith level is true. ai otherwise} 
      which vectorized as dZ = A-Y. dZ is divided by batch size for normalizing 
      '''  
      dW = np.dot(X.T, dZ)
      db = np.sum(dZ)      

      W = W - learning_rate*dW
      b = b -learning_rate*db
      
      parameters['W'] = W
      parameters['b'] = b     
  
  
  return parameters

In [0]:
# converting labels(y) into one hot vector 

a = y_train
Y = np.zeros((a.size, a.max()+1))
Y[np.arange(a.size),a] = 1

parameters_3 = {'W' : np.zeros((X_train.shape[1], 10)),
             'b': np.zeros((1,10))}

In [0]:
Parameter = model_softmax(X_train, Y, learning_rate = 0.001, batch_size = 40, parameters = parameters_3, epochs = 20)

In [0]:
y_est = forward_softmax(X_test, Parameter)
Y_est = np.argmax(y_est, axis = 1)
acc = np.sum(Y_est == y_test)/y_test.shape[0]

print ("Accuracy for softmax classifier: %f" %acc)
             

Accuracy for softmax classifier: 0.892400


**Problem 4:** ***Keras implementation ***

In [3]:
from keras import models
from keras import layers

network = models.Sequential()
network.add(layers.Dense(10, activation='softmax', input_shape=(28 * 28,)))
network.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [0]:
# preprocessing of dataset

X_train = x_train.reshape(x_train.shape[0], -1)/255
X_test = x_test.reshape(x_test.shape[0], -1)/255

a = y_train
Y_train = np.zeros((a.size, a.max()+1))
Y_train[np.arange(a.size),a] = 1
Y_train = Y_train

b = y_test
Y_test = np.zeros((b.size, b.max()+1))
Y_test[np.arange(b.size),b] = 1
Y_test = Y_test

In [0]:
from keras.optimizers import sgd

network.compile(optimizer='sgd',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [6]:
epochs = 20
history = network.fit(X_train, 
                      Y_train, 
                      epochs=epochs, 
                      batch_size=40, 
                      validation_data=(X_test, Y_test))

Instructions for updating:
Use tf.cast instead.
Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [7]:
network.evaluate(X_test, Y_test)



[0.2923765900492668, 0.9192]

**Problem 5:** ***Add a new feature as number of white areas in each images ***

In [0]:
# converting the images into binary images

X_train = ((x_train/255)>0.5)*1
X_test = ((x_test/255)>0.5)*1

DFS algorithm for finding out the number of white regions in each black and white images  

In [0]:
adj = {'row' : [-1, -1, -1, 0, 0, 1, 1, 1],
          'column' : [-1, 0, 1, -1, 1, -1, 0, 1]}

In [0]:
def DFS_visit(V, adj, x, y, parent):
    r = adj['row']
    c = adj['column']
    for i in range (len(r)):
        p = x+r[i]
        q = y + c[i]
        if p >= 0 and q >= 0 and p < V.shape[0] and q < V.shape[1]:
            if V[p,q] == 0:
                if (p,q) not in parent:
                    parent [p,q] = (x,y)
                    DFS_visit(V, adj, p, q, parent)

In [0]:
def DFS (V, adj):
    x, y = V.shape
    white_area = 0
    parent = {}
    for i in range(x):
        for j in range(y):
            if (i,j) not in parent:
                parent [i,j] = None
                if V[i,j] == 0:
                    DFS_visit(V, adj, i,j, parent)
                    white_area += 1
    return white_area

In [0]:
# training dataset with extra feature 

white_area = np.zeros((X_train.shape[0],1))

for i in range (X_train.shape[0]):
  X = X_train[i]
  white_area[i, 0] = DFS(X,adj)
  
X_train_dfs = x_train.reshape(x_train.shape[0], -1)/255

X_train_dfs = np.append(X_train_dfs, white_area/3, axis = 1)

In [0]:
# test dataset with extra feature 

white_area_t = np.zeros((X_test.shape[0],1))

for i in range (X_test.shape[0]):
  X = X_test[i]
  white_area_t[i, 0] = DFS(X,adj)
  
X_test_dfs = x_test.reshape(x_test.shape[0], -1)/255

X_test_dfs = np.append(X_test_dfs, white_area_t/3, axis = 1)

In [0]:
X_train_dfs.shape

(60000, 785)

In [0]:
parameters_dfs = {'W' : np.zeros((X_train_dfs.shape[1], 10)),
             'b': np.zeros((1,10))}

In [0]:
Parameter_dfs = model_softmax(X_train_dfs, Y, learning_rate = 0.001, batch_size = 40, parameters = parameters_dfs, epochs = 20)

In [0]:
y_est = forward_softmax(X_test_dfs, Parameter_dfs)
Y_est = np.argmax(y_est, axis = 1)
acc = np.sum(Y_est == y_test)/y_test.shape[0]

print ("Accuracy for softmax classifier: %f" %acc)

Accuracy for softmax classifier: 0.895400
