In [1]:
import numpy as np
np.set_printoptions(threshold=np.nan)
from sklearn.datasets import fetch_mldata
from sklearn.metrics import confusion_matrix
import matplotlib
import matplotlib.pyplot as plt
import re
import time
import pandas as pd

In [2]:
# changing labels to make one vs all

def change_label(which_class,y_train,y_test):

    samples_train = y_train.shape[0]
    samples_test = y_test.shape[0]
    
    new_labels_train = np.zeros((samples_train,1))
    new_labels_test = np.zeros((samples_test,1))

    new_labels_train = np.array(y_train == which_class).astype(int)
    new_labels_test = np.array(y_test == which_class).astype(int)
    
#     ind_row_tr, ind_col_tr = np.where(y_train == which_class)
#     ind_row_tst, ind_col_tst = np.where(y_test== which_class)
    
#     new_labels_train[ind_row_tr,ind_col_tr] = 1
#     new_labels_test[ind_row_tst,ind_col_tst] = 1
    
    return new_labels_train, new_labels_test
    
    
'''Criando a funcao de normalização de um dataframe inteiro
    input:
        df: Dataframe
    output:
        df: Dataframe com valores normalizados
'''
def normalize_dataframe(df):
    for column in df:
        df[column] = df[column]/255
    return df


'''Funcao para obter as métricas de performance: precision, recall e f1_score
    input:
        confusion_matrix: matriz de confusão (use get_confusion_matrix() para calculá-la)
    output:
        precision: relação entre a quantidade de positivos preditos pela quantidade real de positivos 
        recall: relação entre a quantidade de positivos esperados pela quantidade de positivos preditos
        f1_score: metrica para relacionar precision e recall em uma única métrica
'''
def get_metrics(confusion_matrix):
    precision = get_precision(confusion_matrix)
    recall = get_recall(confusion_matrix)
    f1_score = get_f1_score(precision, recall)
    
    return precision, recall, f1_score

def print_metrics(precision, recall, f1_score):
    precision = np.around(precision, decimals=2).reshape( precision.shape[0],1)
    recall = np.around(recall, decimals=2).reshape( recall.shape[0],1)
    f1_score = np.around(f1_score, decimals=2).reshape( f1_score.shape[0],1)
    
    average_precision = np.sum(precision)/precision.shape[0]
    average_recall = np.sum(recall)/recall.shape[0]
    average_f1_score = np.sum(f1_score)/f1_score.shape[0]
    
    print("\n\nPrecision (Pr), Recall (Re) and F1_Score (F1) of each class: ")
    print("Pr   Re    F1")
    print(re.sub(r' *\n *', '\n', np.array_str(np.c_[precision, recall, f1_score]).replace('[', '').replace(']', '').strip()))
    
    print("\n\nAverage Precision: ", round(average_precision,2) , "\nAverage Recall: ", round(average_recall, 2) ,"\nAverage F1_Score: ", round(average_f1_score,2))
    
    #print(precision," ", recall," ", f1_score)

def get_recall(confusion_matrix):
    precision = np.ones((confusion_matrix.shape[0]))
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fp = np.sum(confusion_matrix[:,i])
        
        precision[i] = tp/tp_fp
        
    return precision

def get_precision(confusion_matrix):
    recall = np.ones(confusion_matrix.shape[0])
    
    for i in range (confusion_matrix.shape[0]):
        tp = confusion_matrix[i,i]
        tp_fn = np.sum(confusion_matrix[i,:])
        
        recall[i] = tp/tp_fn
        
    return recall

def get_f1_score(precision, recall):
    f1 = 2*( (precision*recall) / (precision+recall) )
    return f1

'''Funcao para calcular a matriz de confusão
    input:
        Y: labels corretas
        Y_pred: labels preditas
        classes: numéro de classes
    output:
        confusion_matrix: matriz de confusão
'''
def get_confusion_matrix(Y, Y_pred, classes):
    
    
    
    if(Y_pred.max() != 1):
        Y_pred = binarize_labels(Y_pred,classes)
        
    if(Y.max() != 1 ):
        Y = binarize_labels(Y,classes)
    
    confusion_matrix = np.zeros((classes,classes)).astype(int)
    
    for y in range (Y.shape[1]):
        for c_pred in range (classes):
            if(Y_pred[c_pred, y] == 1):
                for c in range (classes):
                    if(Y[c, y] == 1):
                        confusion_matrix[c, c_pred]+=1
    
    return confusion_matrix


'''Funcao para converter um vetor de probabilidades de classes em one-hot-encoding
    input:
        L: vetor de probabilidade das labels
    output:
        Y_pred: vetor de labels (one-hot-encoding) 
    Ex: 
        Y_pred[:,1] = [0,1,0,0,0,0,0,0,0,0] quer dizer que a classe da sample 1 é 2
'''
def binarize_labels(L, classes):
    samples_number = L.shape[0]
    #classes = L.shape[0]
    
    Y_pred = np.array(L).reshape(1, samples_number)

    y_aux = np.eye(classes)[Y_pred.astype('int32')]
    y_aux = y_aux.T.reshape(classes, samples_number)

    Y_pred = y_aux
    
    print(Y_pred.shape)
    
    return Y_pred

In [3]:
#Data preparation

def data_preparation(X,Y):
    
    X_binary = X[np.any([y == 1,y == 2, y==3], axis = 0)]
    y_binary = y[np.any([y == 1,y == 2, y==3], axis = 0)]
    shuffle_index = np.random.permutation(X_binary.shape[0])
    X_binary_shuffled, y_binary_shuffled = X_binary[shuffle_index], y_binary[shuffle_index]
    
    train_proportion = 0.8
    train_test_cut = int(len(X_binary)*train_proportion)
    
    #sufle create new data set
    X_train, X_test, y_train, y_test = \
    X_binary_shuffled[:train_test_cut], \
    X_binary_shuffled[train_test_cut:], \
    y_binary_shuffled[:train_test_cut], \
    y_binary_shuffled[train_test_cut:]
    
    #normalize
    X_train_normalised= X_train/255.0
    X_test_normalised = X_test/255.0
    
    #reshape -> feature in rows and label in collumn
    X_train_tr = X_train_normalised.transpose()
    y_train_tr = y_train.reshape(1,y_train.shape[0])
    X_test_tr = X_test_normalised.transpose()
    y_test_tr = y_test.reshape(1,y_test.shape[0])
    
    #change labels from 1-2 to 0-1
#     y_train_shifted = y_train_tr - 1
#     print(y_binary.shape)
#     y_test_shifted = y_test_tr - 1
#     print("Shape of X_train is", X_train.shape)
#     print("Shape of X_test is", X_test.shape)
#     print("Shape of y_train is", y_train.shape)
#     print("Shape of y_test is", y_test.shape)
    
#     print(X_train_tr.shape)
#     print(y_train_tr.shape)
#     print(X_test_tr.shape)
#     print(y_test_tr.shape)

    
    return (X_train_tr,X_test_tr,y_train_tr,y_test_tr)
    #for i in range(10):
        #print ("digit", i, "appear", np.count_nonzero(y == i), "times")
       


In [4]:
# plot image 
def plot_image(this_digit):
    some_digit_image = this_digit.reshape(28,28)
    plt.imshow(some_digit_image, cmap = matplotlib.cm.binary, interpolation = "nearest")
    plt.axis("off")
    plt.show()

In [5]:
#create sigmoid function

def sigmoid(z):
    s = 1.0 / (1.0 + np.exp(-z))
    return s

In [6]:
#initialize values of thetas and bias

def initialize_parameters(dim):
    w = np.zeros((dim,1))
    b = 0
    return w,b

In [7]:
#implement cost function and its gradient propagation
def cost_function(w, b, X, Y):

    m = X.shape[1] #quantity of samples
    
    z = np.dot(w.T,X)+b #hipotese without sigmoid
    A = sigmoid(z) # applying hipotese on sigmoid, A is the result of sigmoid for each sample
    cost = -1.0/m*np.sum(Y*np.log(A)+(1.0-Y)*np.log(1.0-A)) #find cost values for all samples
    dw = 1.0/m*np.dot(X, (A-Y).T) #find residual of weights
    db = 1.0/m*np.sum(A-Y) # find residual of bias
    cost = np.squeeze(cost)
    
    return dw,db, cost

In [8]:
#apply gradient descent

def gradient_descent(w,b,X,Y,num_iterations,alpha, print_cost):
    cost_array = []
    for i in range(num_iterations):
        #call cost function
        dw,db,cost = cost_function(w,b,X,Y)
        # update weights and bias
        w = w - alpha*dw
        b = b - alpha*db
        
        #save cost
        cost_array.append(cost)
        
        if (print_cost == True and i%200 == 0):
            print("Cost from iteration ",i,"= ",cost,"\n")
            
    return w,b,cost_array
        

In [9]:
#prediction

def prediction (w,b,X,threshold):
    samples = X.shape[1]
    Y_prediction = np.zeros((1,samples))
    w = w.reshape(X.shape[0],1)
    A = sigmoid (np.dot(w.T, X)+b)
#     for i in range(A.shape[1]):
#         if (A[:,i] > threshold): 
#             Y_prediction[:, i] = 1
#         elif (A[:,i] <= threshold):
#             Y_prediction[:, i] = 0
    return A

In [10]:
# one vs all prediction

def one_vs_all(w,X):
    samples = X.shape[1]
    weighs_qut = w.shape[0]
    bias = w[-1]
    weights = w[:(len(w))-1]
    A = sigmoid (np.dot(weights.T, X)+bias)
    
    return A;

In [11]:
print("Loading Dataframe......")
df = pd.read_csv("../data/fashion-mnist_train.csv")

#Separate the Training DF into Train and Validation
msk = np.random.rand(len(df)) < 0.7 

print("Separating into training and validation.....")
train_df = df[msk]
validation_df = df[~msk]

Y_train = train_df["label"]
Y_validation = validation_df["label"]

train_df = train_df.loc[:, train_df.columns != "label"]
validation_df = validation_df.loc[:, validation_df.columns != "label"]

#test_df = pd.read_csv("data/fashion-mnist_test.csv")

print("Normalizing.....")
normalized_train_df = normalize_dataframe(train_df)
normalized_validation_df = normalize_dataframe(validation_df)
print("Done!")

Loading Dataframe......
Separating into training and validation.....
Normalizing.....


In [16]:
X_train = normalized_train_df.T
X_test = normalized_validation_df.T


i = 0;
dim_train = X_train.shape[0]
dim_test = X_test.shape[0]
num_classes = 10
num_iterations = 1000
alpha = 0.3

y_train = Y_train
y_test = Y_validation

classifiers = {} #dicionarios de classificadores treinados
#probabilities = np.zeros((num_classes,dim_test))
probabilities = []
while i < num_classes:
    print("Trainig class ", i)
    new_label_train, new_label_test = change_label(i,y_train,y_test)
    ws, bs = initialize_parameters(dim_train)
    
    w,b,costs = gradient_descent(ws,bs,X_train,new_label_train,num_iterations,alpha,False) # pesos e bias treinados
    treined_parameters = (np.append(w,b)).reshape(dim_train+1,1)  # concatenate weights
    classifiers.update({i:treined_parameters})
    i = i+1

print("Done!")

Trainig class  0
Trainig class  1
Trainig class  2
Trainig class  3
Trainig class  4
Trainig class  5
Trainig class  6
Trainig class  7
Trainig class  8
Trainig class  9
Done!


In [17]:
j=0

probabilities = []
#compute probabilities for each class
while j < num_classes:
    prob = one_vs_all(classifiers.get(j),X_test)
    #probabilities[:][:j] = prob
    #print(prob.shape)
    probabilities.append(prob)
    #print(np.array(probabilities).shape)
    j = j+1

k = 0
aux_array = np.zeros(num_classes)
aux_array_2 = np.zeros((num_classes, X_test.shape[1]))
#print(np.array(aux_array_2).shape)
prediction_array = []
y = y_test

for c in range (num_classes):
    aux_array_2[c] = probabilities[c][0]

final = np.array(np.argmax(aux_array_2, axis=0))

y_test_new = y_test.T
#print("oi", final.shape)

confusion_matrix = get_confusion_matrix(y_test_new,final, 10)

print(confusion_matrix)

precision, recall, f1_score = get_metrics(confusion_matrix)

print_metrics(precision, recall, f1_score)

(10, 17866)
(10, 17866)
[[1483    7   51  117    4    4   39    0   32    1]
 [   6 1708   20   45    4    0    0    0    2    0]
 [  37    2 1498   15  185    3   25    0   27    0]
 [  64   29   48 1546   62    0   13    0   13    0]
 [   3    5  345   86 1290    1   57    0   22    0]
 [   2    0    2    0    0 1614    1  105   11   64]
 [ 365    3  417  103  238    2  642    2   59    1]
 [   0    0    0    0    0   57    0 1625    6  103]
 [   8    1   22   27    7    7    6    4 1698    1]
 [   0    1    0    0    0   25    0   60    0 1678]]


Precision (Pr), Recall (Re) and F1_Score (F1) of each class: 
Pr   Re    F1
0.85 0.75 0.8
0.96 0.97 0.96
0.84 0.62 0.71
0.87 0.8  0.83
0.71 0.72 0.72
0.9  0.94 0.92
0.35 0.82 0.49
0.91 0.9  0.91
0.95 0.91 0.93
0.95 0.91 0.93


Average Precision:  0.83 
Average Recall:  0.83 
Average F1_Score:  0.82
