In [1]:
# %load perceptron.py
import math
import numpy as np
import pandas as pd

class Perceptron:

    def __init__(self,input):
        self.W = np.zeros(input.shape[1]+1) ## weight vector
        ## +1 slot cause of bias in input
        self.bias = 1
        self.lr = 0.1 ##learning rate
        self.epoch = 100

    def predict(self,input):
        x = np.insert(input,0,1) # x0 = 1, for bias
        net = np.inner(self.W,x)
        s = self.sigmoid(net)
        return s if s >= 0.5 else 0.0

    def fit(self,input,d):
        for i in range(self.epoch):
            for j in range(d.shape[0]):
                t = np.array(input.iloc[j])
                x = np.insert(t,0,1) # x0 = 1, for bias
                net = np.inner(self.W,x)
                y = self.sigmoid(net)
                self.W = self.W + self.lr * (d[j]-y)*x
                
    def step_function(self,value):
        return 1.0 if value >= 0.0 else 0.0    
    
    def sigmoid(self,value):
        if -value > np.log(np.finfo(type(value)).max):
            return 0.0    
        a = np.exp(-value)
        return 1.0/ (1.0 + a) 
        
    def testPrediction(self, input, y_expected):
        hits = 0
        acc = 0
        for i in range(input.shape[0]):
            t = np.array(input.iloc[i])
            x = np.insert(t,0,1) # x0 = 1, for bias
            y = self.sigmoid( np.inner(self.W,x) )
            if y == y_expected[i]:
                hits += 1
        acc = hits / input.shape[0]
        print('Accuracy: {}'.format(acc))
        
if __name__ == '__main__':
    X_input = pd.DataFrame([
    [0,0],[0,1],[1,0],[1,1]
    ])
    exp_out = np.array([0,0,0,1])
    perceptron = Perceptron(X_input)
    perceptron.fit(X_input,exp_out)
    print(perceptron.W)


[-2.93684159  1.81802157  1.79301854]


In [2]:
df_test = pd.read_csv('mnist_test.csv')
df_train = pd.read_csv('mnist_train.csv')

y_train,X_train = np.split(df_train,[1], axis=1)  ## Splits DataFrame into Labels // Training sets
y_test,X_test = np.split(df_test,[1], axis=1)  ## Splits DataFrame into Labels // Training sets


In [3]:
##      onde for == 1, troca por 1, se nao 0   ;;; treina o perceptron a ver somente o numero 1
Y_train = np.where(y_train['5'] == 1, 1, 0)  ##normalizes input so perceptron sees it as 1/0
Y_test = np.where(y_test['7'] == 1, 1, 0)

In [4]:
df_sorted = df_train.sort_values(by=['5'])
a = df_train['5'].value_counts(sort = True, ascending = True)
lowest_sample_amount = a.min()

In [5]:
normalized_df = df_train
for i in df_train['5'].unique():
    drop_amount = a[i] - lowest_sample_amount  ##gets amount of rows to be dropped
    tmp = df_train[df_train['5'] == i]  ##select all == to label
    indexes = tmp.sample(n= drop_amount).index           ## get indexes to be dropped
    normalized_df = normalized_df.drop(indexes)         ## normalized df will have same number of every sample

In [6]:
perceptrons = []
for i in range(10): 
    perceptrons.append(Perceptron(X_train))   ##create 10 perceptrons
    
for i in range(10):
    tmp = []
    tmp2 = []
    ### prepares data
    final_df = normalized_df
    for j in range(10):  ## this for loop organizes the data before training perceptron
        if j != i:
            sets_per_value = math.floor(lowest_sample_amount/9)
            
            df_tmp = normalized_df[ normalized_df['5'] == j ]
            indexes = df_tmp.sample(n= (lowest_sample_amount - sets_per_value) ).index   ##if yes group 5000, then drop (5000 - 554)...
            final_df = final_df.drop(indexes)
    
    y_train_final,X_train_final = np.split(final_df,[1],axis=1)  ##splits data into labels // data
    
    tmp.append( np.where(y_train_final['5'] == i, 1 , 0) )  #changes labels to 1/0 in training data
    tmp2.append( np.where(y_test['7'] == i, 1 , 0) )         #changes labels to 1/0 in test data
    
    y_train_array = np.array(tmp)  
    y_test_array = np.array(tmp2)
    #print(perceptrons)
    perceptrons[i].fit(X_train_final,y_train_array[0])   ##training
    print('Perceptron {} obteve:'.format(i))
    perceptrons[i].testPrediction(X_test,y_test_array[0])   ##testing

Perceptron 0 obteve:
Accuracy: 0.967996799679968
Perceptron 1 obteve:
Accuracy: 0.9731973197319732
Perceptron 2 obteve:
Accuracy: 0.9621962196219622
Perceptron 3 obteve:
Accuracy: 0.888988898889889
Perceptron 4 obteve:
Accuracy: 0.931893189318932
Perceptron 5 obteve:
Accuracy: 0.953995399539954
Perceptron 6 obteve:
Accuracy: 0.9572957295729573
Perceptron 7 obteve:
Accuracy: 0.9547954795479549
Perceptron 8 obteve:
Accuracy: 0.917991799179918
Perceptron 9 obteve:
Accuracy: 0.8121812181218122


In [8]:
## Tests all 10 perceptrons with test data
predictions = []
for i in range(X_test.shape[0]):  #for each input 
    test_ar = np.array(X_test.iloc[i])  #cast to np.array
    p = [] 
    for j in range(10):
        p.append( perceptrons[j].predict(test_ar) )  #appends perceptrons predictions to array
    
    predictions.append( p.index( max(p) ) )  #perceptron with highest value is probably right

In [9]:
df_test = pd.DataFrame(predictions, columns=['q'])  ## creates dataFrame with results

res = np.where((df_test['q'] == y_test['7']), 1, 0)  ##compares expected results dataFrame with test-results dataFrame

In [10]:
hits = np.count_nonzero(res == 1)
f_accuracy = hits / res.size
f_accuracy

0.7416741674167416

In [42]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sn
import matplotlib.pyplot as plt

In [69]:
df_cm = confusion_matrix(y_test, predictions)

In [70]:
print(classification_report(y_test, predictions))

             precision    recall  f1-score   support

          0       0.61      0.99      0.75       980
          1       0.82      0.99      0.89      1135
          2       0.84      0.74      0.79      1032
          3       0.56      0.86      0.68      1010
          4       0.65      0.89      0.75       982
          5       0.88      0.44      0.59       892
          6       0.92      0.78      0.85       958
          7       0.87      0.76      0.81      1027
          8       0.91      0.39      0.55       974
          9       0.79      0.51      0.62      1009

avg / total       0.78      0.74      0.73      9999



In [74]:
df_cm

array([[ 969,    0,    0,    4,    1,    0,    3,    1,    2,    0],
       [   5, 1120,    4,    3,    1,    0,    2,    0,    0,    0],
       [ 100,   70,  766,   45,   16,    0,   11,    9,   10,    5],
       [  69,   23,   31,  868,    2,    0,    2,    3,    2,   10],
       [  23,   12,   14,   56,  872,    0,    1,    0,    4,    0],
       [ 125,    9,    7,  218,   76,  390,   18,    6,   11,   32],
       [  63,   11,   36,   25,   46,   24,  752,    0,    1,    0],
       [  28,   36,   36,   78,   49,    5,    0,  779,    2,   14],
       [ 189,   62,    8,  148,   45,   19,   29,   16,  384,   74],
       [  24,   27,    6,  106,  237,    3,    0,   83,    7,  516]],
      dtype=int64)