In [None]:
from PIL import Image
import numpy as np
from numpy import asarray
from matplotlib import pyplot as plt
import pandas as pd
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import time

C:\Users\Toshiba\anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
C:\Users\Toshiba\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


In [None]:
translate = {"cane": "Dog", "cavallo": "Horse", "elefante": "Elephant", "farfalla": "Butterfly", "gallina": "Chicken", 
"gatto": "Cat", "mucca": "Cow", "pecora": "Sheep", "scoiattolo": "Squirrel", "ragno": "Spider"}

In [None]:
folders = os.listdir("raw-img/")

def resize_gray():
    files, categories = [], []
    for i, folder in enumerate(folders):
        filenames = os.listdir("raw-img/" + folder) # iterate over files
        for file in filenames:

            im = Image.open("raw-img/" + folder + "/" + file)
            im = im.convert('L')#convert to gray scale
            im = im.resize((100,100)) # resize image
            data = np.asarray(im) # convert numpy array
            data = data/255.0 # normalize
            
            sample_row = list() # temp pixel holder list
            
            for i in data: # for every row
                for z in range(len(i)): # for every column
                    sample_row.append(i[z]) # append temp list so array is converted from 2D to 1D
            
            files.append(sample_row)
            categories.append(translate[folder])
            
    return files, categories
            


In [None]:
files, categories = resize_gray()

In [None]:
files_arr = np.array(files)
categories_arr = np.array(categories)
files_arr.shape

(26179, 10000)

In [None]:
def categories_tonum(category_arr): # convert category arrays element from text to related class number
    categories_index = {'Dog' : 0, 'Horse' : 1, 'Elephant' : 2, 'Butterfly' : 3, 'Chicken' : 4, 'Cat' : 5, 'Cow' : 6, 
                        'Sheep' : 7, 'Spider' : 8, 'Squirrel' : 9}

    for i in range(len(category_arr)):
        category_arr[i] = categories_index[category_arr[i]]

In [None]:
categories_tonum(categories_arr)
categories_arr = categories_arr.astype(np.int)
categories_arr

array([0, 0, 0, ..., 9, 9, 9])

In [None]:
def shuffle_split(X_arr, Y_arr): # shuffle X and Y by same random seed number so the connection preserved
    random.Random(85).shuffle(X_arr)
    random.Random(85).shuffle(Y_arr)
    return train_test_split(X_arr, Y_arr, test_size=0.20, shuffle=False)

In [None]:
files_arr_train, files_arr_test, categories_arr_train, categories_arr_test = shuffle_split(files_arr, categories_arr)
print(files_arr_train.shape)

(20943, 10000)


In [None]:
def init_params(layers):
    #Layers : List consisting of how many nodes each layer has including input and output.
    np.random.seed(42)

    N = len(layers) #Number of layers in the network.
    weights = {}
    bias = {}
    for i in range(1,N):
        weights['W' + str(i)] = np.random.rand(layers[i],layers[i-1])*0.001 # initialize weights randomly and minimize them
        bias['b' + str(i)] = np.zeros(layers[i]).reshape(layers[i],1) # initialize biases as 0s

    return weights, bias

In [None]:
print(init_params([3,3]))

({'W1': array([[3.74540119e-04, 9.50714306e-04, 7.31993942e-04],
       [5.98658484e-04, 1.56018640e-04, 1.55994520e-04],
       [5.80836122e-05, 8.66176146e-04, 6.01115012e-04]])}, {'b1': array([[0.],
       [0.],
       [0.]])})


In [None]:
def get_node_num(X, layer_num): # calculate neuron_size of each layer
    layer_nodes = list()
    layer_nodes.append(len(X[0].T)) # 100x100 pixel images(input layer)
    for i in range(layer_num):
        neuron_size = int(np.round(len(X)/(2*(layer_nodes[i]/5 + 10))))
        layer_nodes.append(neuron_size)
    layer_nodes.append(10) # append output layer
    return layer_nodes # return list that contains neuron size of each layer

In [None]:
get_node_num(files_arr, 1)

[10000, 7, 10]

In [None]:
def RELU(Z):
    return np.maximum(Z, 0)

def RELU_deriv(Z):
    return Z > 0

def tanh(x):
    return np.tanh(x)

def TANH_deriv(x):
    return (1 - np.power(np.tanh(x), 2))

def softmax(Z):
    expZ = np.exp(Z)
    return expZ / np.sum(expZ, axis = 0)

def cost_func(A, Y):
    sum = 0
    predicts = A.T # predictions
    
    for i in range(len(Y)):
        sum += -np.log10(predicts[i][Y[i]]) # sum of negative log likelihood of correct class
    sum = sum / len(Y)
    return sum

In [None]:
def forward_prop(weights, bias, X, act_func):
    
    Z_dict = {}
    A_dict = {}
    
    weight_list = list(weights.keys()) # get keys as list
    
    for i in range(1, len(weight_list)+1):
        Z, A = 0,0
        if(i == len(weight_list)): # means output layer
            if(i==1): # input layer (the case no hidden layer)
                Z = np.dot(weights['W'+str(i)], X.T) + bias['b'+str(i)] # X*W + B
                A = softmax(Z)
            else: # not input layer
                Z = np.dot(weights['W'+str(i)], A_dict['A'+str(i-1)]) + bias['b'+str(i)]
                A = softmax(Z)
        else: # not output layer
            if i == 1: # input layer
                Z = np.dot(weights['W'+str(i)], X.T) + bias['b'+str(i)]
                if(act_func == 'relu'): # check for activation function
                    A = RELU(Z)
                else:
                    A = tanh(Z)
            else:
                Z = np.dot(weights['W'+str(i)], A_dict['A'+str(i-1)]) + bias['b'+str(i)]
                if(act_func == 'relu'): # check for activation funcion
                    A = RELU(Z)
                else:
                    A = tanh(Z)
            
        Z_dict['Z'+str(i)] = Z # save calculations into dict
        A_dict['A'+str(i)] = A # save calculations into dict
        
        
    
    
    return Z_dict, A_dict

In [None]:
def one_hot(target_list): # return a list that only target class index is 1 and other are 0
    one_hot_list = list()
    for i in range(len(target_list)):
        arr_encoded = [0] * 10 # initialize a list filled with 0's
        arr_encoded[target_list[i]] = 1 # set target class index to 1 
        one_hot_list.append(arr_encoded)
    
    one_hot_list = np.array(one_hot_list).T
    return one_hot_list # a sample is [0,0,0,1,0,0,0,0,0,0] for target 3

In [None]:
def back_prop(Z, A, target_list, X, weights, act_func):
    
    dW_dict = {}
    db_dict = {}
    dZ_dict = {}
    
    
    one_hot_arr = one_hot(target_list)
    
    weight_list = list(weights.keys()) # get keys as list
    
    for i in range(len(weight_list), 0, -1):
        dZ, dW, db = 0,0,0
        if(i == len(weight_list)): # means output layer
            if(i==1): # means input layer (case no hidden layer)
                dZ = A['A'+str(i)] - one_hot_arr
                dW = np.dot(dZ,X) / len(X)
                db = np.sum(dZ, axis=1, keepdims=True) / len(X)
            else: # not input layer
                dZ = A['A'+str(i)] - one_hot_arr
                dW = np.dot(dZ, A['A'+str(i-1)].T) / len(X)
                db = np.sum(dZ, axis=1,keepdims=True) / len(X)
        else: # not output layer
            if(i==1): # means input layer
                if(act_func == 'relu'): # check for activation func
                    dZ = np.dot(weights['W'+str(i+1)].T, dZ_dict['dZ'+str(i+1)]) * RELU_deriv(Z['Z'+str(i)])
                else:
                    dZ = np.dot(weights['W'+str(i+1)].T, dZ_dict['dZ'+str(i+1)]) * TANH_deriv(Z['Z'+str(i)])
                dW = np.dot(dZ, X) / len(X)
                db = np.sum(dZ, axis=1, keepdims=True) / len(X)
            else: # not input layer
                if(act_func == 'relu'): # check for activation func
                    dZ = np.dot(weights['W'+str(i+1)].T, dZ_dict['dZ'+str(i+1)]) * RELU_deriv(Z['Z'+str(i)])
                else:
                    dZ = np.dot(weights['W'+str(i+1)].T, dZ_dict['dZ'+str(i+1)]) * TANH_deriv(Z['Z'+str(i)]) 
                dW = np.dot(dZ, A['A'+str(i-1)].T) / len(X)
                db = np.sum(dZ, axis=1, keepdims=True) / len(X)
        dZ_dict['dZ'+str(i)] = dZ # save calculations
        dW_dict['dW'+str(i)] = dW # save calculations
        db_dict['db'+str(i)] = db # save calculations
    
            
    return dW_dict, db_dict
    

In [None]:
def update_params(weights, bias, dW_dict, db_dict, learn_rate): # no comment is necessary, this function is pretty simple
    
    for i in range(1, len(weights.keys())+1):
        
        weights['W'+str(i)] = weights['W'+str(i)] - (learn_rate*dW_dict['dW'+str(i)])
        bias['b'+str(i)] = bias['b'+str(i)] - (learn_rate*db_dict['db'+str(i)])
        
        
    
    return weights,bias

In [None]:
def get_accuracy(predictions, Y): # get predictions and Y's
    total = 0
    
    for i in range(len(predictions)):
        if (predictions[i] == Y[i]): # if correct 
            total += 1 # increment total by 1
    
    return total/Y.size # return the accuracy

In [None]:
def get_prediction(A): # A is an array which holds lists with shape of 10,1 shape
    return np.argmax(A, 0) # the index of max is our prediction

In [None]:
epoch_list = list()
train_cost_list = list()
test_cost_list = list()

In [None]:
def gradient_descent(X_train, Y_train, X_test, Y_test, iterations, learning_rate, hidden_layer_num, mini_batch_size, act_func):
    data_len = len(Y_train)
    
    weights, bias = init_params(get_node_num(X_train, hidden_layer_num)) # initialize parameters
    print('Neural Network Created With Nodes:')
    print(get_node_num(X_train, hidden_layer_num))
    
    ### INITIAL ACCURACY AND INITIAL LOSS FOR TRAIN
    
    z, prediction_list = forward_prop(weights, bias, X_train, act_func) # initial situation
    prediction_list = prediction_list['A'+str(2+hidden_layer_num-1)] # initial predictions
    
    print('Initial:')
    cost = cost_func(prediction_list, Y_train) # initial cost
    train_cost_list.append(cost)
    epoch_list.append(0)
    predictions = get_prediction(prediction_list)
    print("Accuracy: ", get_accuracy(predictions, Y_train)) # initial accuracy
    print(cost)
    
    ### INITIAL ACCURACY AND INITIAL LOSS FOR TEST
    
    z, prediction_list = forward_prop(weights, bias, X_test, act_func)
    prediction_list = prediction_list['A'+str(2+hidden_layer_num-1)]

    test_cost_list.append(cost_func(prediction_list, Y_test))
    
    
    #### STARTS LEARNING
    
    start_time = time.time()
    for i in range(iterations):
        counter_iter_batch = 0
        counter = 0
        while(counter+mini_batch_size < data_len):
            
            Z, A = forward_prop(weights, bias, X_train[counter:counter+mini_batch_size], act_func)
            dW, db = back_prop(Z, A, Y_train[counter:counter+mini_batch_size], X_train[counter:counter+mini_batch_size], weights, act_func)
            weights, bias = update_params(weights, bias, dW, db, learning_rate)


            counter += mini_batch_size
            counter_iter_batch += 1
        
        if(i%10 == 0 and i!=0):
            epoch_list.append(i)
            
            z, prediction_list = forward_prop(weights, bias, X_train, act_func)
            prediction_list = prediction_list['A'+str(2+hidden_layer_num-1)]
            
            print('In epoch ' + str(i) + ':')
            sum = cost_func(prediction_list, Y_train)
            train_cost_list.append(sum)
            predictions = get_prediction(prediction_list)
            print("Accuracy: ", get_accuracy(predictions, Y_train))
            print(sum)
            
            z, prediction_list = forward_prop(weights, bias, X_test, act_func)
            prediction_list = prediction_list['A'+str(2+hidden_layer_num-1)]
            predicted_list = get_prediction(prediction_list)
            test_cost_list.append(cost_func(prediction_list, Y_test))
            
    end_time = time.time()
    print('\n Computation time: ' + str(end_time - start_time))
    return weights, bias, sum, predicted_list, Y_test

In [None]:
weights, bias, sum, predictions, Y_test = gradient_descent(files_arr_train, categories_arr_train, files_arr_test, categories_arr_test, 300, 0.008, 1, 128, 'relu')
np.save('weights.npy', weights)
np.save('bias.npy', bias)

Neural Network Created With Nodes:
[10000, 10]
Initial:
Accuracy:  0.09750274554743829
1.0011443151384576
In epoch 10:
Accuracy:  0.16592656257460728
1.071417789034592
In epoch 20:
Accuracy:  0.16740677075872606
1.0967433293010787
In epoch 30:
Accuracy:  0.19863438857852267
0.9750385107840105
In epoch 40:
Accuracy:  0.2556462779926467
1.0410121562901213
In epoch 50:
Accuracy:  0.22160148975791433
1.115942596809649
In epoch 60:
Accuracy:  0.20885259991405242
1.0964426391532471
In epoch 70:
Accuracy:  0.24910471279186364
1.197646786554083
In epoch 80:
Accuracy:  0.20617867545241847
1.350673634444734
In epoch 90:
Accuracy:  0.25001193716277514
1.0936042741613867
In epoch 100:
Accuracy:  0.22766556844769134
0.9738301025217836
In epoch 110:
Accuracy:  0.26777443537220075
0.9296628032846439
In epoch 120:
Accuracy:  0.28486845246621784
0.9126198499742213
In epoch 130:
Accuracy:  0.2675356921166977
1.0129569744598383
In epoch 140:
Accuracy:  0.3062120995081889
0.861576743030091
In epoch 150:
A

# Report

## Comment on different parameters of  0 hidden layers model

| Accuracy | batch _size = 16 | batch_size=128 | no_batch_size |
| :- | :-: | :-: | :-: |
| epoch 0 | 0.097 | 0.097 | 0.097 |
| epoch 10 | 0.182 | 0.165 | 0.187 |
| epoch 100 | 0.301 | 0.227 | 0.185 |
| epoch 300 | 0.370 | 0.350 | 0.187 |


| Cost | batch _size = 16 | batch_size=128 | no_batch_size |
| :- | :-: | :-: | :-: |
| epoch 0 | 1.00 | 1.00 | 1.00 |
| epoch 10 | 2.490 | 1.07 | 0.99 |
| epoch 100 | 1.160 | 0.97 | 1.11 |
| epoch 300 | 0.930 | 0.80 | 1.09 |

Computation time for mini_batch_size = 128 is 240.71 secs
Computation time for mini_batch_size = 128 is 367.58 secs






## Different parameters of  1 hidden layers model

Same parameters in both cases: Relu, batch size = 128, learning rate = 0.008

__Case: neuron size = 5__ in hidden layer

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.070 | 1.00 |
| epoch 10 | 0.188 | 0.96 |
| epoch 100 | 0.214 | 0.93 |

__Case: neuron size = 1000__ in hidden layer

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.070 | 1.00 |
| epoch 10 | 0.192 | 0.95 |
| epoch 100 | 0.245 | 0.91 |

## Comment on different parameters of  2 hidden layers model

Same parameters: hidden layer size = 2, learning rate = 0.008, batch_size = 128 

__Case: Relu__

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.187 | 1.00 |
| epoch 10 | 0.187 | 0.95 |
| epoch 100 | 0.200 | 0.95 |
| epoch 300 | 0.237 | 0.93 |

__Case: Tanh__

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.187 | 1.00 |
| epoch 10 | 0.187 | 0.95 |
| epoch 100 | 0.196 | 0.95 |
| epoch 300 | 0.201 | 0.95 |

## Parameters effects on different values

### Learning Rate

I've runned the model twice, first time; both with 1 hidden layer, RELU function, 60 mini batch size but the learning rate is 0.005 in first run and 0.02 in second run, and the results are:

__Learning Rate = 0.005__ / batch_size = 128 / 1 hidden layer / Relu function

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.070 | 1.00 |
| epoch 10 | 0.187 | 0.96 |
| epoch 100 | 0.214 | 0.94 |
| epoch 300 | 0.252 | 0.908 |

__Learning Rate = 0.02__ / batch_size = 128 / 1 hidden layer / Relu function

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.070 | 1.00 |
| epoch 10 | 0.199 | 0.95 |
| epoch 100 | 0.23 | 0.93 |
| epoch 300 | 0.24 | 0.93 |

In the tables above, the initial(epoch 0) values are same; accuracy is 0.070 in both and loss is 1.00 in both but in 10th epoch accuracy is 0.187 in first table but 0.199 in second also cost is 0.96 in first but 0.95 in second __so we can say that as learning rate increases the model learns faster.__ But if we check the 300th epoch the accuracy of first table is bigger __so if learning rate is big then the final accuracy will be smaller because it will be hard for the model to detect local minimum of cost graph.__


### Batch Size

Tables from 0 hidden layer part shows us =>

First of all using a mini_batch decreases the computation time. Computation time for even the biggest mini_batch_size(128) is 240.71 seconds for 300 epochs while 367.58 seconds for 300 epoch with no mini_batch.

Also as we can see accuracy decreases as we don't use batch_size and cost slightly increases.

### Layer Size

Same parameters: Relu, batch size = 128, learning rate = 0.008

no hidden layer

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.097 | 1.00 |
| epoch 10 | 0.165 | 1.07 |
| epoch 100 | 0.227 | 0.97 |

1 hidden layer

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.070 | 1.00 |
| epoch 10 | 0.188 | 0.96 |
| epoch 100 | 0.214 | 0.93 |

2 hidden layer

| epoch | Accuracy | Cost |
| :- | :-: | :-: |
| epoch 0 | 0.187 | 1.00 |
| epoch 10 | 0.187 | 0.96 |
| epoch 100 | 0.200 | 0.95 |


1 hidden layer neural network has smaller loss than no  hidden layer, also it starts with lower accuracy but at 10th epoch it has higher accuracy. 2 hidden layer does not have higher accuracy or lower loss than 1 hidden layer neural network, so nothing clear to say.


### Hidden neuron Size

In this part I've take the result table from 1 hidden layer part. There are 2 tables with all same parameters but only difference is the neuron size, in first case neuron size is 5 in hidden layer while in second case neuron size is 1000.

Since the accuracy is higher and loss is lower, the results are clearly shows us that increase in neuron size increase accuracy and decrease loss.

### Activation Function

From the tables in 2 hidden layer part, the 1st table is shows the output with RELU while the other shows the output with tanh and there is certain decrease in accuracy and increase in loss if the function is tanh. So RELU is more efficient for our project.

### Confusion Matrix for Part 1

In [None]:
print('Accuracy score:')
print(accuracy_score(Y_test, predictions))
print('Precision score:')
print(precision_score(Y_test, predictions, average = 'macro'))
print('Recall score:')
print(recall_score(Y_test, predictions, average = 'macro'))
print('f1 score:')
print(f1_score(Y_test, predictions, average = 'macro'))

Accuracy score:
0.17417876241405653
Precision score:
0.15437851165398744
Recall score:
0.13376743955706144
f1 score:
0.11315124985439884
