In [1]:
import numpy as np
np.set_printoptions(threshold=np.nan)
from sklearn.datasets import fetch_mldata
from sklearn.metrics import confusion_matrix
import matplotlib
import matplotlib.pyplot as plt
import re
import time
import pandas as pd

# changing labels to make one vs all
def change_label(which_class,y_train,y_test):

    samples_train = y_train.shape[0]
    samples_test = y_test.shape[0]
    
    new_labels_train = np.zeros((samples_train,1))
    new_labels_test = np.zeros((samples_test,1))

    new_labels_train = np.array(y_train == which_class).astype(int)
    new_labels_test = np.array(y_test == which_class).astype(int)
    
    return new_labels_train, new_labels_test
    
    
'''Criando a funcao de normalização de um dataframe inteiro
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def normalize_dataframe(df):
    for column in df:
        df[column] = df[column]/255
    return df


'''Funcao para obter as métricas de performance: precision, recall e f1_score
    input:
        confusion_matrix: matriz de confusão (use get_confusion_matrix() para calculá-la)
    output:
        precision: relação entre a quantidade de positivos preditos pela quantidade real de positivos 
        recall: relação entre a quantidade de positivos esperados pela quantidade de positivos preditos
        f1_score: metrica para relacionar precision e recall em uma única métrica
'''
def get_metrics(confusion_matrix):
    precision = get_precision(confusion_matrix)
    recall = get_recall(confusion_matrix)
    f1_score = get_f1_score(precision, recall)
    
    return precision, recall, f1_score

def print_metrics(precision, recall, f1_score):
    precision = np.around(precision, decimals=2).reshape( precision.shape[0],1)
    recall = np.around(recall, decimals=2).reshape( recall.shape[0],1)
    f1_score = np.around(f1_score, decimals=2).reshape( f1_score.shape[0],1)
    
    average_precision = np.sum(precision)/precision.shape[0]
    average_recall = np.sum(recall)/recall.shape[0]
    average_f1_score = np.sum(f1_score)/f1_score.shape[0]
    
    print("\n\nPrecision (Pr), Recall (Re) and F1_Score (F1) of each class: ")
    print("Pr   Re    F1")
    print(re.sub(r' *\n *', '\n', np.array_str(np.c_[precision, recall, f1_score]).replace('[', '').replace(']', '').strip()))
    
    print("\n\nAverage Precision: ", round(average_precision,2) , "\nAverage Recall: ", round(average_recall, 2) ,"\nAverage F1_Score: ", round(average_f1_score,2))
    
    #print(precision," ", recall," ", f1_score)

def get_recall(confusion_matrix):
    precision = np.ones((confusion_matrix.shape[0]))
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fp = np.sum(confusion_matrix[:,i])
        
        precision[i] = tp/tp_fp
        
    return precision

def get_precision(confusion_matrix):
    recall = np.ones(confusion_matrix.shape[0])
    
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fn = np.sum(confusion_matrix[i,:])
        
        recall[i] = tp/tp_fn
        
    return recall

def get_f1_score(precision, recall):
    f1 = 2*( (precision*recall) / (precision+recall) )
    return f1

'''Funcao para calcular a matriz de confusão
    input:
        Y: labels corretas
        Y_pred: labels preditas
        classes: numéro de classes
    output:
        confusion_matrix: matriz de confusão
'''
def get_confusion_matrix(Y, Y_pred, classes):
    
    
    
    if(Y_pred.max() != 1):
        Y_pred = binarize_labels(Y_pred,classes)
        
    if(Y.max() != 1 ):
        Y = binarize_labels(Y,classes)
    
    confusion_matrix = np.zeros((classes,classes)).astype(int)
    
    for y in range (Y.shape[1]):
        for c_pred in range (classes):
            if(Y_pred[c_pred, y] == 1):
                for c in range (classes):
                    if(Y[c, y] == 1):
                        confusion_matrix[c, c_pred]+=1
    
    return confusion_matrix


'''Funcao para converter um vetor de probabilidades de classes em one-hot-encoding
    input:
        L: vetor de probabilidade das labels
    output:
        Y_pred: vetor de labels (one-hot-encoding) 
    Ex: 
        Y_pred[:,1] = [0,1,0,0,0,0,0,0,0,0] quer dizer que a classe da sample 1 é 2
'''
def binarize_labels(L, classes):
    samples_number = L.shape[0]
    #classes = L.shape[0]
    
    Y_pred = np.array(L).reshape(1, samples_number)

    y_aux = np.eye(classes)[Y_pred.astype('int32')]
    y_aux = y_aux.T.reshape(classes, samples_number)

    Y_pred = y_aux
    
    return Y_pred

# plot image 
def plot_image(this_digit):
    some_digit_image = this_digit.reshape(28,28)
    plt.imshow(some_digit_image, cmap = matplotlib.cm.binary, interpolation = "nearest")
    plt.axis("off")
    plt.show()
    

#create sigmoid function
def sigmoid(z):
    s = 1.0 / (1.0 + np.exp(-z))
    return s


#initialize values of thetas and bias
def initialize_parameters(dim):
    w = np.zeros((dim,1))
    b = 0
    return w,b

#implement cost function and its gradient propagation
def cost_function(w, b, X, Y):

    m = X.shape[1] #quantity of samples
    
    z = np.dot(w.T,X)+b #hipotese without sigmoid
    A = sigmoid(z) # applying hipotese on sigmoid, A is the result of sigmoid for each sample
    cost = -1.0/m*np.sum(Y*np.log(A)+(1.0-Y)*np.log(1.0-A)) #find cost values for all samples
    dw = 1.0/m*np.dot(X, (A-Y).T) #find residual of weights
    db = 1.0/m*np.sum(A-Y) # find residual of bias
    cost = np.squeeze(cost)
    
    return dw,db, cost


#apply gradient descent
def gradient_descent(w,b,X,Y,num_iterations,alpha, print_cost):
    cost_array = []
    mydict_costs={} 
    for i in range(num_iterations):
        #call cost function
        dw,db,cost = cost_function(w,b,X,Y)
        # update weights and bias
        w = w - alpha*dw
        b = b - alpha*db
        
        cost_array.append(cost)
        
        if (print_cost == True and i%100 == 0):
            print("Cost from iteration ",i,"= ",cost,"\n")
            
            
    return w,b,cost_array


#prediction
def prediction (w,b,X,threshold):
    samples = X.shape[1]
    Y_prediction = np.zeros((1,samples))
    w = w.reshape(X.shape[0],1)
    A = sigmoid (np.dot(w.T, X)+b)
    return A

# one vs all prediction
def one_vs_all(w,X):
    samples = X.shape[1]
    weighs_qut = w.shape[0]
    bias = w[-1]
    weights = w[:(len(w))-1]
    A = sigmoid (np.dot(weights.T, X)+bias)
    
    return A;
        

In [2]:
print("Loading Dataframe......")
df = pd.read_csv("data/fashion-mnist_train.csv")

#Separate the Training DF into Train and Validation
msk = np.random.rand(len(df)) < 0.7 

print("Separating into training and validation.....")
train_df = df[msk]
validation_df = df[~msk]

Y_train = train_df["label"]
Y_validation = validation_df["label"]

train_df = train_df.loc[:, train_df.columns != "label"]
validation_df = validation_df.loc[:, validation_df.columns != "label"]

#test_df = pd.read_csv("data/fashion-mnist_test.csv")

print("Normalizing (This may take a couple minutes).....")
normalized_train_df = normalize_dataframe(train_df)
normalized_validation_df = normalize_dataframe(validation_df)
print("Done!")

Loading Dataframe......
Separating into training and validation.....
Normalizing (This may take a couple minutes).....
Done!


In [29]:
X_train = normalized_train_df.T
X_test = normalized_validation_df.T


i = 0;
dim_train = X_train.shape[0]
dim_test = X_test.shape[0]
num_classes = 10


y_train = Y_train
y_test = Y_validation

classifiers = {} #dicionarios de classificadores treinados
#probabilities = np.zeros((num_classes,dim_test))
probabilities = []
start_time1 = time.time()
while i < num_classes:
    print("Trainig class ", i)
    new_label_train, new_label_test = change_label(i,y_train,y_test)
    ws, bs = initialize_parameters(dim_train) 
    #Mude o change cost para True para verbose dos custos
    w,b,costs = gradient_descent(ws,bs,X_train,new_label_train,500,0.3
                                 ,print_cost=True) # pesos e bias treinados
    treined_parameters = (np.append(w,b)).reshape(dim_train+1,1)  # concatenate weights
    classifiers.update({i:treined_parameters})
    i = i+1

print("--- %s time 1 seconds ---" % (time.time() - start_time1))



print("Done!")

Trainig class  0
Cost from iteration  0 =  0.6931471805599454 

Cost from iteration  100 =  0.1209759604452918 

Cost from iteration  200 =  0.11547497490981043 

Cost from iteration  300 =  0.11246451156513441 

Cost from iteration  400 =  0.11043221049531557 

Cost from iteration  500 =  0.10894196278551203 

Cost from iteration  600 =  0.10779497364627443 

Cost from iteration  700 =  0.10688103292817885 

Cost from iteration  800 =  0.10613264358500807 

Cost from iteration  900 =  0.1055058691093985 

Cost from iteration  1000 =  0.10497086095024231 

Cost from iteration  1100 =  0.10450667294175309 

Cost from iteration  1200 =  0.10409821927930518 

Cost from iteration  1300 =  0.10373439647453411 

Cost from iteration  1400 =  0.10340687751965313 

Cost from iteration  1500 =  0.10310931320963503 

Cost from iteration  1600 =  0.10283678952096957 

Cost from iteration  1700 =  0.10258545097172625 

Cost from iteration  1800 =  0.10235223430616787 

Cost from iteration  1900 =  

Cost from iteration  100 =  0.08223469134149816 

Cost from iteration  200 =  0.07028396898159565 

Cost from iteration  300 =  0.06490380725244058 

Cost from iteration  400 =  0.061465807893482896 

Cost from iteration  500 =  0.05895429587006626 

Cost from iteration  600 =  0.05699653830496318 

Cost from iteration  700 =  0.05541237456826529 

Cost from iteration  800 =  0.05409849094261796 

Cost from iteration  900 =  0.05298878483957185 

Cost from iteration  1000 =  0.052037865705536125 

Cost from iteration  1100 =  0.05121308463698501 

Cost from iteration  1200 =  0.05049019887524378 

Cost from iteration  1300 =  0.0498507848054861 

Cost from iteration  1400 =  0.0492805847443649 

Cost from iteration  1500 =  0.048768397576630096 

Cost from iteration  1600 =  0.048305308048110854 

Cost from iteration  1700 =  0.04788413750414706 

Cost from iteration  1800 =  0.04749904462214292 

Cost from iteration  1900 =  0.04714523042499829 

Trainig class  9
Cost from iteration  

In [30]:
j=0

probabilities = []
#compute probabilities for each class
while j < num_classes:
    prob = one_vs_all(classifiers.get(j),X_test)
    #probabilities[:][:j] = prob
    #print(prob.shape)
    probabilities.append(prob)
    #print(np.array(probabilities).shape)
    j = j+1

k = 0
aux_array = np.zeros(num_classes)
aux_array_2 = np.zeros((num_classes, X_test.shape[1]))
#print(np.array(aux_array_2).shape)
prediction_array = []
y = y_test

for c in range (num_classes):
    aux_array_2[c] = probabilities[c][0]

final = np.array(np.argmax(aux_array_2, axis=0))

y_test_new = y_test.T

#print("oi", final.shape)

confusion_matrix = get_confusion_matrix(y_test_new,final, 10)

print(confusion_matrix)

precision, recall, f1_score = get_metrics(confusion_matrix)

print_metrics(precision, recall, f1_score)

[[1499    7   44   77    9    3   59    0   23    2]
 [   7 1689   24   48    4    0    2    0    3    0]
 [  27    4 1470   17  173    2   35    0   26    0]
 [  79   30   40 1552   53    1   18    1    8    0]
 [   5    2  290   89 1370    0   71    0   18    0]
 [   2    0    1    1    0 1640    2   93   16   45]
 [ 348    6  368   88  228    2  723    1   46    0]
 [   0    0    0    0    0   67    0 1614    4   76]
 [   6    0   28   14    9   11   12   12 1778    2]
 [   0    1    0    1    0   19    0   70    3 1649]]


Precision (Pr), Recall (Re) and F1_Score (F1) of each class: 
Pr   Re    F1
0.87 0.76 0.81
0.95 0.97 0.96
0.84 0.65 0.73
0.87 0.82 0.85
0.74 0.74 0.74
0.91 0.94 0.93
0.4  0.78 0.53
0.92 0.9  0.91
0.95 0.92 0.94
0.95 0.93 0.94


Average Precision:  0.84 
Average Recall:  0.84 
Average F1_Score:  0.83
