In [1]:
import numpy as np
from sklearn import preprocessing
from sklearn import datasets
from sklearn.model_selection import train_test_split,KFold
from sklearn import metrics
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
lb=preprocessing.LabelBinarizer()

In [2]:
#fit label binarizer by classes 
lb.fit([0,1,2,3,4,5,6,7,8,9])

LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)

In [3]:
#pre-processing
data,label=datasets.load_digits(return_X_y=True);
data = preprocessing.MinMaxScaler().fit_transform(data)
label=label.reshape(len(label),1)

#classes included in dataset
labels = [0,1,2,3,4,5,6,7,8,9]

In [4]:
#set number of fold, random seeds minibatch size and other variables
kf=KFold(n_splits=10,shuffle=True)
np.random.seed(0)
minibatch_size=128
l=0.01
epochs=500

In [5]:
#relu function
def ReLu(v):
    return np.maximum(0,v)

In [6]:
#derivative of relu funtion
def ReLu_derivative(v):
    return np.where(v<=0,0,1)

In [7]:
#softmax funtion
def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)

In [8]:
#function to convert softmax output to single label
def convert(arr):
    arr_size = arr.shape[1]
    arr_max = np.argmax(arr, axis=1)
    return arr_max

In [21]:
#create an array to store each fold's f-score, create a varible to visualize current progress
f1s=[]
fold=1;

#cross validation split
for train_index,test_index in kf.split(data):
    w = np.random.uniform(-1,1,[data.shape[1],10])        #10 neurons for hidden layer
    b = np.zeros([1,10])                                       #number of bias equal to number of neurons in hidden layer
    w2 = np.random.uniform(-1,1,[10,10])                       #10 output neurons for 10 classes
    b2 = np.zeros([1,10])
    
    #epoch split
    for epoch in range(epochs):
        
        #minibatch split
        for i in range(0,data[train_index].shape[0],minibatch_size):
            x_train=data[train_index]
            y_train=label[train_index]
            x_mini=x_train[i:i+minibatch_size,:]
            y_mini=y_train[i:i+minibatch_size,:]
        
            #forward pass
            in_=x_mini@w+b
            out1=ReLu(in_)
            in2=out1@w2+b2
            out2=np.zeros([x_mini.shape[0],10])
            for j in range(x_mini.shape[0]):
                out2[j,:]=softmax(in2[j,:])
        
            #one-hot label
            oh_label=lb.transform(y_mini)
            
            #training error
            train_error=metrics.log_loss(y_mini,out2,labels=labels)
        
            #Back propergation layer2
            dEdIn2=np.zeros([x_mini.shape[0],10]) 
            for j in range(x_mini.shape[0]):
                dEdIn2[j,:]=out2[j,:]-oh_label[j,:]
            dIn2_dW2 = out1
            dEdW2 = (1/x_mini.shape[0])*out1.T@dEdIn2
            dEdB2 = (1/x_mini.shape[0])*np.ones([1,len(x_mini)])@dEdIn2
            
            #update layer2's weights and bias
            w2 -= l*dEdW2
            b2 -= l*dEdB2

            #Back propergation layer1
            dEdOut1 = dEdIn2 @ w2.T
            dOut1dIn1 = ReLu_derivative(in_)
            dEdIn1 = dEdOut1*dOut1dIn1
            dIn1dW = x_mini
            dEdW = (1/x_mini.shape[0])*dIn1dW.T@((dEdIn2@w2.T)*dOut1dIn1)
            dEdB = (1/x_mini.shape[0])*np.ones([len(x_mini)])@((dEdIn2@w2.T)*dOut1dIn1)

            #update layer1's weights and bias
            w -= l*dEdW
            b -= l*dEdB
        
        #print output for each 100 epoch
        if(epoch%100==0):
            print("Training error of fold",fold,"epoch",epoch,"is:",train_error)
            
    #predict test dataset and convert result to label         
    in22=(ReLu(data[test_index]@w+b)@w2)+b2
    out22=np.zeros([data[test_index].shape[0],10])
    for j in range(data[test_index].shape[0]):
        out22[j,:]=softmax(in22[j,:])
    prediction=convert(out22)
    
    #calculate each fold's f-score
    f1=f1_score(y_true=label[test_index], y_pred=prediction, labels=labels,average='macro')
    print("Fold",fold,"'s f-score is:",f1)
    f1s.append(f1)
    fold=fold+1

#print 10 fold's average f-score
print("Average F-score across 10 folds is: ", np.mean(f1s))

Training error of fold 1 epoch 0 is: 6.077740559982455
Training error of fold 1 epoch 100 is: 1.0594304190454154
Training error of fold 1 epoch 200 is: 0.6194675510749282
Training error of fold 1 epoch 300 is: 0.4177586361026791
Training error of fold 1 epoch 400 is: 0.31837559846245406
Fold 1 's f-score is: 0.8973113056336672
Training error of fold 2 epoch 0 is: 4.730870697892228
Training error of fold 2 epoch 100 is: 0.5822308047842655
Training error of fold 2 epoch 200 is: 0.317307547463822
Training error of fold 2 epoch 300 is: 0.24398369073080461
Training error of fold 2 epoch 400 is: 0.20093963401118486
Fold 2 's f-score is: 0.9339063410925121
Training error of fold 3 epoch 0 is: 4.862286853745861
Training error of fold 3 epoch 100 is: 1.068523774805214
Training error of fold 3 epoch 200 is: 0.4983904145978191
Training error of fold 3 epoch 300 is: 0.3382123281431242
Training error of fold 3 epoch 400 is: 0.27025811656532395
Fold 3 's f-score is: 0.9204012380501869
Training error