In [8]:
import cv2
import os
import glob
import numpy as np
from numpy.linalg import eig
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
img_dir = "./dataset/"
data_path = os.path.join(img_dir,'*.jpg')
files = glob.glob(data_path)
data = []
labels = []
for f1 in files:
    a = str(f1).split('.')
    b = str(a[1]).split('/')
    c = str(b[-1]).split('_')
    labels.append(str(c[0]))
    img = cv2.imread(f1,0)
    img = cv2.resize(img, (64,64), interpolation = cv2.INTER_AREA) 
    data.append(img)

#prep for traning data and labels
data = np.array(data)
data = data.reshape(data.shape[0], data.shape[1]*data.shape[2]) 
labels = np.array(labels)
labels = labels.reshape(-1)
labelsUNQ = np.unique(labels)

In [3]:
avg = np.mean(data.T, axis=1)
C = data - avg
V = np.cov(C.T)
values, vectors = eig(V)

In [4]:
indEig = values.argsort()[-100:][::-1]
vectors = vectors[:,indEig]
dataPCA = np.dot(C, vectors)

In [5]:
dataFin = np.c_[ np.ones(len(dataPCA)), dataPCA ]
trainData = dataFin[:420]
testData = dataFin[420:]
trainLabel = labels[:420]
testLabel = labels[420:]

In [6]:
# Make a prediction with coefficients
def predict(data , coef):
    coef = coef.reshape(len(coef),1)
    yhat = np.dot(data, coef)
    return 1.0 / (1.0 + np.exp(-yhat))

# Estimate logistic regression coefficients using stochastic gradient descent
def coefficients_sgd(trainData , labels, alpha, itr):
    coef = np.zeros((len(trainData[0]),1))
    labels = labels.reshape(len(labels),1)
    for i in range(itr):
        yhat = predict(trainData, coef)
        error = labels - yhat
#         a = (yhat * (1-yhat))
#         a = a.reshape(len(a),1)
        p = alpha * np.dot(trainData.T, error)
        coef = coef + p
    return coef
 
alpha = 5e-10
itr = 10000
shape = (trainLabel.size, labelsUNQ.size)
labels1H = np.zeros(shape)
for i in range(len(labelsUNQ)):
    temp = np.where(trainLabel == labelsUNQ[i])
    labels1H[temp[0],i] = 1
predictions = np.zeros((len(testData),len(labelsUNQ)))
coefficients = np.zeros((len(labels1H[0]), len(trainData[0])))

for i in range(len(labels1H[0])):
    coefficients[i] = coefficients_sgd(trainData, labels1H[:,i] , alpha, itr).reshape(coefficients[i].shape)
for i in range(len(labelsUNQ)):
    temp = predict(testData, coefficients[i])
    predictions[:,i] = temp.reshape(len(temp))

labelsInd = np.argmax(predictions, axis=1)
labelsPred = labelsUNQ[labelsInd]



In [9]:
print ("\nAccuracy score for Logistic regression\n", accuracy_score(testLabel, labelsPred))
print("\nConfusion matrix for Logistic regression\n",confusion_matrix(testLabel, labelsPred))
print("\nClassification report for Logistic regression\n",classification_report(testLabel, labelsPred))


Accuracy score
 0.64

Confusion matrix for MLP
 [[ 5  0  2  0  2  1  0  1]
 [ 1 10  0  4  1  0  0  0]
 [ 0  2  7  0  1  0  0  0]
 [ 1  2  0 11  2  0  0  0]
 [ 0  2  0  0  4  2  0  1]
 [ 0  0  0  1  2  5  0  0]
 [ 1  2  0  1  0  0  6  1]
 [ 0  1  0  1  1  0  0 16]]

Classification report for MLP
               precision    recall  f1-score   support

         000       0.62      0.45      0.53        11
         001       0.53      0.62      0.57        16
         002       0.78      0.70      0.74        10
         003       0.61      0.69      0.65        16
         004       0.31      0.44      0.36         9
         005       0.62      0.62      0.62         8
         006       1.00      0.55      0.71        11
         007       0.84      0.84      0.84        19

    accuracy                           0.64       100
   macro avg       0.66      0.62      0.63       100
weighted avg       0.68      0.64      0.65       100

