In [2]:
import numpy as np
np.set_printoptions(threshold=np.nan)
from sklearn.datasets import fetch_mldata
from sklearn.metrics import confusion_matrix
import matplotlib
import matplotlib.pyplot as plt
import math
import pandas as pd
from matplotlib import pyplot as plt
import time
import re

#-------------MISC------------------

def change_label(which_class,y_train,y_test):

    samples_train = y_train.shape[1]
    samples_test = y_test.shape[1]
    
    new_labels_train = np.zeros((1,samples_train ))
    new_labels_test = np.zeros((1,samples_test))

    ind_row_tr, ind_col_tr = np.where(y_train == which_class)
    ind_row_tst, ind_col_tst = np.where(y_test== which_class)
    
    new_labels_train[ind_row_tr,ind_col_tr] = 1
    new_labels_test[ind_row_tst,ind_col_tst] = 1
    
    return new_labels_train, new_labels_test


'''Criando a funcao de normalização de um dataframe inteiro
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def normalize_dataframe(df):
    for column in df:
        df[column] = df[column]/255
    return df


'''Funcao para converter um vetor de probabilidades de classes em one-hot-encoding
    input:
        L: vetor de probabilidade das labels
    output:
        Y_pred: vetor de labels (one-hot-encoding) 
    Ex: 
        Y_pred[:,1] = [0,1,0,0,0,0,0,0,0,0] quer dizer que a classe da sample 1 é 2
'''
def binarize_labels(L, classes):
    samples_number = L.shape[0]
    
    Y_pred = np.array(L).reshape(1, samples_number)

    y_aux = np.eye(classes)[Y_pred.astype('int32')]
    y_aux = y_aux.T.reshape(classes, samples_number)

    Y_pred = y_aux
    
    return Y_pred

#-------------MISC------------------

    
    
#-------------TRAINING------------------

'''Criando funcao para inicializar matriz de pesos e biases nos formatos corretos
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def initialize_parameters(dim,num_classes):
    w = np.zeros((dim,10))
    b = np.zeros(10)
    return w,b


'''Criando funcao para calcular o custo do modelo
    input:
        w: matriz de pesos (featuresXn_classes)
        b: vetor de biases, um para cada classe
        X: matriz de features de treino
        Y: vetor de labels-Ground Truth
        m: quantidade de amostras
    output:
        dw: matriz do residuo calculado para os pesos
        db: vetor do residuo calculado para os biases
        cost: custo gerado pelo modelo naquela iteracao
'''
def cost_function(w, b, X, Y,m):

    z =np.matmul(X,w)+b #hipotese without sigmoid
    A = softmax(z) # applying hipotese on sigmoid, A is the result of softmax activations
    cost = cross_entropy(A,Y) #find cost values for all samples
   
    dw = (1.0/m)*(np.matmul((A-Y),X)) #find residual of weights
    db = (1.0/m)*(np.sum(A-Y)) # find residual of bias
   
    return dw.T,db.T, cost



'''Criando funcao para calcular o custo do modelo
    input:
        w: matriz de pesos (featuresXn_classes)
        b: vetor de biases, um para cada classe
        X: matriz de features de treino
        Y: vetor de labels-Ground Truth
        num_iterations: numero de iteracoes
        alpha: learning rate
        print_cost: flag para retornar ou nao o valor de todos os custos computados
        samples: quantidade de amostras
    output:
        w: matriz dos pesos finais 
        b: vetor dos biases finais
        cost_array: vetor de custos
'''
def gradient_descent(w,b,X,Y,num_iterations,alpha, print_cost,samples):
    cost_array = []
    mydict_costs={} 
    for i in range(num_iterations):
        #call cost function
        dw,db,cost = cost_function(w,b,X,Y,samples)
        w = w - alpha*dw
        b = b - alpha*db
        
        #save cost
       
        
        if (print_cost == True and i%50 == 0):
            print("Cost from iteration ",i,"= ",cost,"\n")
            cost_array.append(cost)
            mydict_costs.update({i:cost})
            
    return w,b,mydict_costs



'''Funcao calcular os valores das precições utilizando a funcao softmax
    input:
        y_linear: matriz resultante da multiplicacao dos pesos pelos features, adicionando o vetore de biases (calculo da hipotese)
    output:
       ativacoes de classe geradas pelo softmax para cada sample
'''
def softmax(y_linear):
    exp = np.exp(y_linear)
    norms = np.sum(exp, axis=1)
    return exp.T / norms



'''Funcao obter o custo dado a funcao cross-entropy
    input:
       yhat: vetor de predicoes
       y: vetor de labels ground truth
    output:
        erro daqueles determinados pesos calculado pela funcao cross entropy
'''
def cross_entropy(yhat, y):
    return - (np.sum(y * np.log(yhat+1e-6) + (1-y)*np.log(1-yhat+1e-6)))/y.shape[0]



#-------------TRAINING------------------




#-------------METRICS------------------


'''Funcao para obter as métricas de performance: precision, recall e f1_score
    input:
        confusion_matrix: matriz de confusão (use get_confusion_matrix() para calculá-la)
    output:
        precision: relação entre a quantidade de positivos preditos pela quantidade real de positivos 
        recall: relação entre a quantidade de positivos esperados pela quantidade de positivos preditos
        f1_score: metrica para relacionar precision e recall em uma única métrica
'''
def get_metrics(confusion_matrix):
    precision = get_precision(confusion_matrix)
    recall = get_recall(confusion_matrix)
    f1_score = get_f1_score(precision, recall)
    
    return precision, recall, f1_score


def print_metrics(precision, recall, f1_score):
    precision = np.around(precision, decimals=2).reshape( precision.shape[0],1)
    recall = np.around(recall, decimals=2).reshape( recall.shape[0],1)
    f1_score = np.around(f1_score, decimals=2).reshape( f1_score.shape[0],1)
    
    average_precision = np.sum(precision)/precision.shape[0]
    average_recall = np.sum(recall)/recall.shape[0]
    average_f1_score = np.sum(f1_score)/f1_score.shape[0]
    
    print("\n\nPrecision (Pr), Recall (Re) and F1_Score (F1) of each class: ")
    print("Pr   Re    F1")
    print(re.sub(r' *\n *', '\n', np.array_str(np.c_[precision, recall, f1_score]).replace('[', '').replace(']', '').strip()))
    
    print("\n\nAverage Precision: ", round(average_precision,2) , "\nAverage Recall: ", round(average_recall, 2) ,"\nAverage F1_Score: ", round(average_f1_score,2))
    

def get_recall(confusion_matrix):
    precision = np.ones((confusion_matrix.shape[0]))
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fp = np.sum(confusion_matrix[:,i])
        
        precision[i] = tp/tp_fp
        
    return precision


def get_precision(confusion_matrix):
    recall = np.ones(confusion_matrix.shape[0])
    
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fn = np.sum(confusion_matrix[i,:])
        
        recall[i] = tp/tp_fn
        
    return recall


def get_f1_score(precision, recall):
    f1 = 2*( (precision*recall) / (precision+recall) )
    return f1


#-------------METRICS------------------



#-------------Ploting------------------





'''Funcao para calcular a matriz de confusão
    input:
        Y: labels corretas
        Y_pred: labels preditas
        classes: numéro de classes
    output:
        confusion_matrix: matriz de confusão
'''
def get_confusion_matrix(Y, Y_pred, classes):
    
    if(Y_pred.max() != 1):
        Y_pred = binarize_labels(Y_pred,classes)
        
    if(Y.max() != 1 ):
        Y = binarize_labels(Y,classes)
    
    confusion_matrix = np.zeros((classes,classes)).astype(int)
    
    for y in range (Y.shape[1]):
        for c_pred in range (classes):
            if(Y_pred[c_pred, y] == 1):
                for c in range (classes):
                    if(Y[c, y] == 1):
                        confusion_matrix[c, c_pred]+=1
    
    return confusion_matrix

#-------------Ploting------------------



In [3]:
print("Loading Dataframes......")
df = pd.read_csv("data/fashion-mnist_train.csv")

print("Separating into training and validation.....")
#Separate the Training DF into Train and Validation
msk = np.random.rand(len(df)) < 0.7 

train_df = df[msk]
validation_df = df[~msk]

Y_train = train_df["label"]
Y_validation = validation_df["label"]

train_df = train_df.loc[:, train_df.columns != "label"]
validation_df = validation_df.loc[:, validation_df.columns != "label"]

#test_df = pd.read_csv("data/fashion-mnist_test.csv")

print("Normalizing (This may take a couple minutes).....")
normalized_train_df = normalize_dataframe(train_df)
normalized_validation_df = normalize_dataframe(validation_df)


print("Preparing the labels.....")
#One hot encoding labels para o softmax
classes = 10

samples_train = Y_train.shape[0]
samples_validation = Y_validation.shape[0]

Y_train = np.array(Y_train).reshape(1, samples_train)

y_aux = np.eye(classes)[Y_train.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_train)
Y_train = y_aux

Y_validation = np.array(Y_validation).reshape(1, samples_validation)

y_aux = np.eye(classes)[Y_validation.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_validation)

Y_validation = y_aux
Y_validation =Y_validation.T

print(normalized_train_df.shape[1])

print("Done!")



Loading Dataframes......
Separating into training and validation.....
Normalizing (This may take a couple minutes).....
Preparing the labels.....
784
Done!


In [4]:
#Training/testing

ws, bs = initialize_parameters(normalized_train_df.shape[1],num_classes=10) #CREATING NEW WEIGHTS AND BIAS

print("Create Testing values, this may take some minutes")
start_time1 = time.time()
w,b,costs1 = gradient_descent(ws,bs,train_df,Y_train,500,0.3,True,samples_train) # pesos e bias treinados
print("--- %s seconds ---" % (time.time() - start_time1))

# start_time2 = time.time()
# w,b,costs2 = gradient_descent(ws,bs,train_df,Y_train,2000,0.1,True,samples_train) # pesos e bias treinados
# print("--- %s seconds ---" % (time.time() - start_time2))

# start_time3 = time.time()
# w,b,costs3 = gradient_descent(ws,bs,train_df,Y_train,2000,0.05,True,samples_train) # pesos e bias treinados
# print("--- %s seconds ---" % (time.time() - start_time3))

Create Testing values, this may take some minutes
Cost from iteration  0 =  13629.995052589573 

Cost from iteration  50 =  5827.8150215554 

Cost from iteration  100 =  5086.096193721556 

Cost from iteration  150 =  4729.81660347897 

Cost from iteration  200 =  4506.5209558308 

Cost from iteration  250 =  4348.85606714359 

Cost from iteration  300 =  4229.458915595504 

Cost from iteration  350 =  4134.735586872308 

Cost from iteration  400 =  4057.044137878621 

Cost from iteration  450 =  3991.706979820591 

Cost from iteration  500 =  3935.676367802348 

Cost from iteration  550 =  3886.8689706151827 

Cost from iteration  600 =  3843.8054674888103 

Cost from iteration  650 =  3805.4023344958287 

Cost from iteration  700 =  3770.845162854672 

Cost from iteration  750 =  3739.508242974407 

Cost from iteration  800 =  3710.901677861131 

Cost from iteration  850 =  3684.6355415504977 

Cost from iteration  900 =  3660.3949537415638 

Cost from iteration  950 =  3637.92235176

In [8]:
#predicting/tests
z =np.matmul(validation_df,w)+b #hipotese without sigmoid
A = softmax(z)
sda = np.argmax(A,axis=0)

y_aux = np.eye(classes)[sda.astype('int32')]
y_aux = y_aux.T.reshape(classes, samples_validation)
final = y_aux

y_test_new = Y_validation.T
print(y_test_new.shape)



confusion_matrix = get_confusion_matrix(y_test_new,final, 10)

print(confusion_matrix)

precision, recall, f1_score = get_metrics(confusion_matrix)

print_metrics(precision, recall, f1_score)

(10, 18072)
[[1448   15   33  102    8    0  141    0   13    0]
 [   6 1731   20   54    7    0   11    0    1    0]
 [  27    6 1326   12  276    3  141    0   16    0]
 [  82   27   15 1568   53    0   48    0   10    0]
 [   7    3  159   72 1401    0  166    0   11    0]
 [   1    1    0    2    0 1610    0  113   17   99]
 [ 293    4  218   51  198    0 1008    0   30    0]
 [   0    0    0    0    0   45    0 1667    3   94]
 [   7    1   12   25   10   10   47   10 1711    3]
 [   1    0    0    0    0   27    0   67    2 1666]]


Precision (Pr), Recall (Re) and F1_Score (F1) of each class: 
Pr   Re    F1
0.82 0.77 0.8
0.95 0.97 0.96
0.73 0.74 0.74
0.87 0.83 0.85
0.77 0.72 0.74
0.87 0.95 0.91
0.56 0.65 0.6
0.92 0.9  0.91
0.93 0.94 0.94
0.94 0.89 0.92


Average Precision:  0.84 
Average Recall:  0.84 
Average F1_Score:  0.84
