In [32]:
import numpy as np
np.set_printoptions(threshold=np.nan)
from sklearn.datasets import fetch_mldata
from sklearn.metrics import confusion_matrix
import matplotlib
import matplotlib.pyplot as plt
import math
import pandas as pd
from matplotlib import pyplot as plt
import time
import re

# changing labels to make one vs all

def change_label(which_class,y_train,y_test):

    samples_train = y_train.shape[1]
    samples_test = y_test.shape[1]
    
    new_labels_train = np.zeros((1,samples_train ))
    new_labels_test = np.zeros((1,samples_test))

    ind_row_tr, ind_col_tr = np.where(y_train == which_class)
    ind_row_tst, ind_col_tst = np.where(y_test== which_class)
    
    new_labels_train[ind_row_tr,ind_col_tr] = 1
    new_labels_test[ind_row_tst,ind_col_tst] = 1
    
    return new_labels_train, new_labels_test


'''Criando a funcao de normalização de um dataframe inteiro
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def normalize_dataframe(df):
    for column in df:
        df[column] = df[column]/255
    return df
    
# plot image
def plot_image(this_digit):
    some_digit_image = this_digit.reshape(28,28)
    plt.imshow(some_digit_image, cmap = matplotlib.cm.binary, interpolation = "nearest")
    plt.axis("off")
    plt.show() 

#create sigmoid function
def sigmoid(z):
    s = 1.0 / (1.0 + np.exp(-z))
    return s

#initialize values of thetas and bias
def initialize_parameters(dim):
    w = np.zeros((784,10))
    b = np.zeros(10)
    return w,b

def to_classlabel(z):
    return z.argmax(axis=1)

#implement cost function and its gradient propagation
def cost_function(w, b, X, Y,m):

    
    z =np.matmul(X,w)+b #hipotese without sigmoid
    A = softmax(z) # applying hipotese on sigmoid, A is the result of sigmoid for each sample
    cost = cross_entropy(A,Y) #find cost values for all samples
   
    dw = (1.0/m)*(np.matmul((A-Y),X)) #find residual of weights
    db = (1.0/m)*(np.sum(A-Y)) # find residual of bias
   
    return dw.T,db.T, cost

#apply gradient descent
def gradient_descent(w,b,X,Y,num_iterations,alpha, print_cost,samples):
    cost_array = []
    for i in range(num_iterations):
        #call cost function
        dw,db,cost = cost_function(w,b,X,Y,samples)
        w = w - alpha*dw
        b = b - alpha*db
        
        #save cost
        cost_array.append(cost)
        
        if (print_cost == True and i%50 == 0):
            print("Cost from iteration ",i,"= ",cost,"\n")
            
    return w,b,cost_array

#prediction
def prediction (w,b,X):
    #w = w.reshape(X.shape[0],1)
    A = np.dot(w.T, X)+b
    return A

# one vs all prediction
def one_vs_all(w,X):
    samples = X.shape[1]
    weighs_qut = w.shape[0]
    bias = w[-1]
    weights = w[:(len(w))-1]
    A = sigmoid (np.dot(weights.T, X)+bias)
    
    return A;

def softmax(y_linear):
    exp = np.exp(y_linear)
    norms = np.sum(exp, axis=1)
    return exp.T / norms

def cross_entropy(yhat, y):
    return - (np.sum(y * np.log(yhat+1e-6) + (1-y)*np.log(1-yhat+1e-6)))/y.shape[0]

'''Funcao para obter as métricas de performance: precision, recall e f1_score
    input:
        confusion_matrix: matriz de confusão (use get_confusion_matrix() para calculá-la)
    output:
        precision: relação entre a quantidade de positivos preditos pela quantidade real de positivos 
        recall: relação entre a quantidade de positivos esperados pela quantidade de positivos preditos
        f1_score: metrica para relacionar precision e recall em uma única métrica
'''
def get_metrics(confusion_matrix):
    precision = get_precision(confusion_matrix)
    recall = get_recall(confusion_matrix)
    f1_score = get_f1_score(precision, recall)
    
    return precision, recall, f1_score

def print_metrics(precision, recall, f1_score):
    precision = np.around(precision, decimals=2).reshape( precision.shape[0],1)
    recall = np.around(recall, decimals=2).reshape( recall.shape[0],1)
    f1_score = np.around(f1_score, decimals=2).reshape( f1_score.shape[0],1)
    
    average_precision = np.sum(precision)/precision.shape[0]
    average_recall = np.sum(recall)/recall.shape[0]
    average_f1_score = np.sum(f1_score)/f1_score.shape[0]
    
    print("\n\nPrecision (Pr), Recall (Re) and F1_Score (F1) of each class: ")
    print("Pr   Re    F1")
    print(re.sub(r' *\n *', '\n', np.array_str(np.c_[precision, recall, f1_score]).replace('[', '').replace(']', '').strip()))
    
    print("\n\nAverage Precision: ", round(average_precision,2) , "\nAverage Recall: ", round(average_recall, 2) ,"\nAverage F1_Score: ", round(average_f1_score,2))
    

def get_recall(confusion_matrix):
    precision = np.ones((confusion_matrix.shape[0]))
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fp = np.sum(confusion_matrix[:,i])
        
        precision[i] = tp/tp_fp
        
    return precision

def get_precision(confusion_matrix):
    recall = np.ones(confusion_matrix.shape[0])
    
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fn = np.sum(confusion_matrix[i,:])
        
        recall[i] = tp/tp_fn
        
    return recall

def get_f1_score(precision, recall):
    f1 = 2*( (precision*recall) / (precision+recall) )
    return f1

'''Funcao para calcular a matriz de confusão
    input:
        Y: labels corretas
        Y_pred: labels preditas
        classes: numéro de classes
    output:
        confusion_matrix: matriz de confusão
'''
def get_confusion_matrix(Y, Y_pred, classes):
    
    if(Y_pred.max() != 1):
        Y_pred = binarize_labels(Y_pred,classes)
        
    if(Y.max() != 1 ):
        Y = binarize_labels(Y,classes)
    
    confusion_matrix = np.zeros((classes,classes)).astype(int)
    
    for y in range (Y.shape[1]):
        for c_pred in range (classes):
            if(Y_pred[c_pred, y] == 1):
                for c in range (classes):
                    if(Y[c, y] == 1):
                        confusion_matrix[c, c_pred]+=1
    
    return confusion_matrix


'''Funcao para converter um vetor de probabilidades de classes em one-hot-encoding
    input:
        L: vetor de probabilidade das labels
    output:
        Y_pred: vetor de labels (one-hot-encoding) 
    Ex: 
        Y_pred[:,1] = [0,1,0,0,0,0,0,0,0,0] quer dizer que a classe da sample 1 é 2
'''
def binarize_labels(L, classes):
    samples_number = L.shape[0]
    
    Y_pred = np.array(L).reshape(1, samples_number)

    y_aux = np.eye(classes)[Y_pred.astype('int32')]
    y_aux = y_aux.T.reshape(classes, samples_number)

    Y_pred = y_aux
    
    return Y_pred

In [3]:
print("Loading Dataframes......")
df = pd.read_csv("data/fashion-mnist_train.csv")

print("Separating into training and validation.....")
#Separate the Training DF into Train and Validation
msk = np.random.rand(len(df)) < 0.7 

train_df = df[msk]
validation_df = df[~msk]

Y_train = train_df["label"]
Y_validation = validation_df["label"]

train_df = train_df.loc[:, train_df.columns != "label"]
validation_df = validation_df.loc[:, validation_df.columns != "label"]

#test_df = pd.read_csv("data/fashion-mnist_test.csv")

print("Normalizing (This may take a couple minutes).....")
normalized_train_df = normalize_dataframe(train_df)
normalized_validation_df = normalize_dataframe(validation_df)


print("Preparing the labels.....")
#One hot encoding labels para o softmax
classes = 10

samples_train = Y_train.shape[0]
samples_validation = Y_validation.shape[0]

Y_train = np.array(Y_train).reshape(1, samples_train)

y_aux = np.eye(classes)[Y_train.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_train)
Y_train = y_aux

Y_validation = np.array(Y_validation).reshape(1, samples_validation)

y_aux = np.eye(classes)[Y_validation.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_validation)

Y_validation = y_aux
Y_validation =Y_validation.T

print("Done!")



Loading Dataframes......
Separating into training and validation.....
Normalizing (This may take a couple minutes).....
Preparing the labels.....


In [21]:
#Training
ws, bs = initialize_parameters(samples_train) #CREATING NEW WEIGHTS AND BIAS

w,b,costs = gradient_descent(ws,bs,train_df,Y_train,1000,0.3,True,samples_train) # pesos e bias treinados




Cost from iteration  0 =  13670.955255237594 

Cost from iteration  50 =  6054.856657995749 

Cost from iteration  100 =  5834.210095215725 

Cost from iteration  150 =  5835.382871331535 

Cost from iteration  200 =  4976.132575654226 

Cost from iteration  250 =  5342.470231403505 

Cost from iteration  300 =  4702.789119779424 

Cost from iteration  350 =  4833.654889055105 

Cost from iteration  400 =  4566.992620632636 

Cost from iteration  450 =  4413.078712128228 

Cost from iteration  500 =  4309.677843082872 

Cost from iteration  550 =  4700.549725478374 

Cost from iteration  600 =  4456.175988280427 

Cost from iteration  650 =  4349.5520356359075 

Cost from iteration  700 =  4285.035467299555 

Cost from iteration  750 =  4239.971566115341 

Cost from iteration  800 =  4131.8749923447 

Cost from iteration  850 =  4035.168174878393 

Cost from iteration  900 =  3956.9792049300477 

Cost from iteration  950 =  4189.903814219751 



In [31]:
#predicting
z =np.matmul(validation_df,w)+b #hipotese without sigmoid
A = softmax(z)
sda = np.argmax(A,axis=0)

y_aux = np.eye(classes)[sda.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_validation)
final = y_aux

y_test_new = Y_validation.T
print(y_test_new.shape)

confusion_matrix = get_confusion_matrix(y_test_new,final, 10)

print(confusion_matrix)

precision, recall, f1_score = get_metrics(confusion_matrix)

print_metrics(precision, recall, f1_score)

(10, 17946)
[[1651   13   24   67   29    0   26    0   26    0]
 [   9 1676   10   41   15    0    1    0    2    0]
 [  41    4 1119   17  581    1   31    0   13    0]
 [ 126   21   14 1489  161    0   13    0    7    0]
 [   9    0   51   32 1737    0   20    0   10    0]
 [   4    4    1    3    0 1488    0  105   23   86]
 [ 450    4  202   57  639    0  388    1   41    0]
 [   0    0    0    0    0   45    0 1638    5   93]
 [  11    2    8   20   25   17    7   14 1736    2]
 [   1    0    0    1    0   19    0   64    3 1652]]


Precision (Pr), Recall (Re) and F1_Score (F1) of each class: 
Pr   Re    F1
0.9  0.72 0.8
0.96 0.97 0.96
0.62 0.78 0.69
0.81 0.86 0.84
0.93 0.55 0.69
0.87 0.95 0.91
0.22 0.8  0.34
0.92 0.9  0.91
0.94 0.93 0.94
0.95 0.9  0.92


Average Precision:  0.81 
Average Recall:  0.84 
Average F1_Score:  0.8
