In [None]:
!unzip ea


In [4]:
import pandas as pd
import numpy as np
from os import walk
from os.path import join
import cv2
import random

class DataReader:

  def __init__(self, path):
    self.path = path
  
  def preprocess(self, img):
    img[img<255] = -1
    img[img==255] = +1
    return img
  
  def get_files_path(self):
    files = {}
    for i, w in enumerate(walk(self.path)):
      if i == 0:
        continue
      dirpath, dirnames, filenames = w
      files[i] = []
      for f in filenames:
        files[i].append(join(dirpath, f))
    return files


  def read_normal(self):
    files = self.get_files_path()
    data = pd.DataFrame({'img': [], 'label': []})
    i = 0
    for k in files.keys():
      for path in files[k]:
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = img.astype('float32')
        img = self.preprocess(img)
        img = np.array(img) 
        row = pd.DataFrame({'img': [np.ndarray.flatten(img)], 'label': [int(k)-1]})
        data = pd.concat([data,row], ignore_index = True)
    data['label'] = pd.to_numeric(data['label'], downcast='integer')
    data['id'] = data.index
    return data



In [5]:
from math import floor
from copy import deepcopy
class Perceptron:

  def __init__(self, input_size=3600, lr=0.01 ):
    self.size = input_size
    self.w = np.zeros(3600)
    self.b = 1
    self.lr = lr
  
  def update(self, x, y_hat, t):
    if y_hat != t:
      self.w = self.w + (self.lr * t * x)
      self.b = self.b + (self.lr * t)

  def output(self, x):
    y = np.dot(x, self.w.T)
    y = y + self.b
    return self.activation(y)
    
  def activation(self, y):
    if y >= 0:
      return 1
    else:
      return -1

class Layer:
  
  def __init__(self, input_shape=(60,60), num_perceptrons=26):
    self.num_perceptrons = num_perceptrons
    self.perceptrons = [Perceptron() for i in range(self.num_perceptrons)]
    self.input_shape = input_shape
  
  def train(self, X, Y, epochs=10):
    for _ in range(epochs):
      for x,y in zip(X,Y):
        for i, p in enumerate(self.perceptrons):
          y_hat = p.output(x)
          p.update(x, y_hat, y[i])

  def predict(self, X):
    preds = np.zeros(X.shape[0])
    for i, x in enumerate(X):
      pp = np.zeros(self.num_perceptrons)
      for j, p in enumerate(self.perceptrons):
        pp[j] = p.output(x)
      preds[i] = np.argmax(pp)
    return preds

  def predict_noisy(self, X, percent):
    preds = np.zeros(X.shape[0])
    for i, x1 in enumerate(X):
      pp = np.zeros(self.num_perceptrons)
      x = self.add_noise(deepcopy(x1), percent)
      for j, p in enumerate(self.perceptrons):
        pp[j] = p.output(x)
      preds[i] = np.argmax(pp)
    return preds

  def add_noise(self, img, percent):
    num_of_pixels = floor(percent * img[img==-1].size)
    b = np.where(img == -1)[0]
    indices = random.sample(range(0, b.size), num_of_pixels)
    img[b[indices]] = 1
    return img

In [10]:
from keras.utils.np_utils import to_categorical
from sklearn.model_selection import train_test_split

reader = DataReader('/content/ea')
data = reader.read_normal()
data['label_hot'] = data['label'].map(lambda x: to_categorical(x, 1+data['label'].max()))
data['label_hot'] = data['label_hot'].map(lambda x: list(map(lambda y: -1.0 if y == 0 else y, x)))
train_ids, valid_ids = train_test_split(data['id'], test_size = 0.25, 
                                   random_state = 2020,
                                   stratify = data['label'])
train = data[data['id'].isin(train_ids)]
train = train.sample(frac=1).reset_index(drop=True)
valid = data[data['id'].isin(valid_ids)]
print('train', train.shape[0], 'valid', valid.shape[0])
x_train = train['img'].to_numpy()
y_train = train['label_hot'].to_numpy()
x_test = valid['img'].to_numpy()
y_test = valid['label_hot'].to_numpy()

data = data.sample(frac=1).reset_index(drop=True)
data.sample(3)

train 390 valid 130


Unnamed: 0,img,label,id,label_hot
246,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",4,93,"[-1.0, -1.0, -1.0, -1.0, 1.0, -1.0, -1.0, -1.0..."
244,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",12,246,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."
96,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",24,480,"[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1...."


In [11]:
from sklearn.metrics import accuracy_score, classification_report

model = Layer()
model.train(x_train, y_train, 10)
preds = model.predict(x_train)
print('Accuracy on Train Data: %2.2f%%' % (accuracy_score( train['label'].to_numpy(), preds)))
preds = model.predict(x_test)
print('Accuracy on Test Data: %2.2f%%' % (accuracy_score( valid['label'].to_numpy(), preds)))

Accuracy on Train Data: 1.00%
Accuracy on Test Data: 0.95%


In [12]:
from sklearn.model_selection import LeaveOneOut
from tqdm import tqdm

X = data['img'].to_numpy()
y = data['label_hot'].to_numpy()
loo = LeaveOneOut()
pred_acc = np.zeros(X.shape[0])
_pred = []
for i, indices in tqdm(enumerate(loo.split(X))):
  train_index, test_index = indices
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  model = Layer()
  model.train(X_train, y_train, 10)
  preds = model.predict(X_test)
  _pred.append(preds)
  acc = accuracy_score( data['label'].iloc[test_index] , preds)
  pred_acc[i] = acc

print("Mean Accuracy for LOOCV : ", sum(pred_acc)/X.shape[0])


520it [07:47,  1.11it/s]

Mean Accuracy for LOOCV :  0.9403846153846154





In [14]:
X = data['img'].to_numpy()
y = data['label_hot'].to_numpy()
model = Layer()
model.train(X, y, 10)
preds = model.predict_noisy(X, 0.15)
print('Accuracy on 15%% Noisy Data: %2.2f%%' % (accuracy_score(data['label'].to_numpy(), preds)))

preds = model.predict_noisy(X, 0.25)
print('Accuracy on 25%% Noisy Data: %2.2f%%' % (accuracy_score(data['label'].to_numpy(), preds)))

Accuracy on 15% Noisy Data: 0.93%
Accuracy on 25% Noisy Data: 0.84%
