<a href="https://colab.research.google.com/github/MykhailoFokin/MachineLearning/blob/Dev/simple_classification_with_MSE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import cv2
import fnmatch
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import explained_variance_score

from sklearn.model_selection import train_test_split

In [2]:
#Copy images from repo to colab
!git clone https://github.com/SilvesterHsu/ORLFaceRecognition-PCA.git

Cloning into 'ORLFaceRecognition-PCA'...
remote: Enumerating objects: 604, done.[K
remote: Total 604 (delta 0), reused 0 (delta 0), pack-reused 604[K
Receiving objects: 100% (604/604), 3.59 MiB | 5.47 MiB/s, done.
Resolving deltas: 100% (93/93), done.


In [13]:
# read folder structure with files and put it to collection
folder = []
for i in os.walk(os.path.join('ORLFaceRecognition-PCA','att_faces')):
  folder.append(i)

split_regexp = [r'[0]*',r'[0|9]*',r'[0|8-9]*',r'[0|7-9]*',r'[0|6-9]*',r'[0|5-9]*',r'[0|4-9]*',r'[0|3-9]*',r'[0|2-9]*']

for split_index in range(1, 10):

  test_matrix = []
  train_matrix = []

  test_labels = []
  train_labels = []

  for address, dirs, files in folder:
      
      for filename in fnmatch.filter(files, '*.pgm'):

        img = cv2.imread(os.path.join(address,filename))
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    
        gray.flatten().reshape(1, 10304)
        person = address.replace('ORLFaceRecognition-PCA/att_faces/','')

        # r'[0|8-9]*'
        if fnmatch.filter(filename, r'[0|9]*'):
          test_matrix.append(gray.flatten().reshape(1, 10304))
          test_labels.append(person)
        else:
        #if fnmatch.filter(filename, r'[1-5]*'):
          train_matrix.append(gray.flatten().reshape(1, 10304))
          train_labels.append(person)

  np_test_matrix = np.array(test_matrix)
  np_train_matrix = np.array(train_matrix)
    
  # Different deviation results
  result_mse = np.empty((len(np_train_matrix),1))     # mean_squared_error
  result_mae = np.empty((len(np_train_matrix),1))     # mean_absolute_error
  result_msle = np.empty((len(np_train_matrix),1))    # mean_squared_log_error
  result_evs = np.empty((len(np_train_matrix),1))     # explained_variance_score
  result_std = np.empty((len(np_train_matrix),1))     # standart deviance

  predictions = np.empty((len(np_test_matrix),6),dtype="S3") 

  estimations_mse = np.empty((len(np_test_matrix),1)) 
  estimations_mae = np.empty((len(np_test_matrix),1)) 
  estimations_msle = np.empty((len(np_test_matrix),1)) 
  estimations_evs = np.empty((len(np_test_matrix),1)) 
  estimations_std = np.empty((len(np_test_matrix),1))

  for i in range(len(np_test_matrix)):
    
    for j in range(len(np_train_matrix)):
      result_mse[j] = mean_squared_error(np_test_matrix[i], np_train_matrix[j])
      result_mae[j] = mean_absolute_error(np_test_matrix[i], np_train_matrix[j])
      result_msle[j] = mean_squared_log_error(np_test_matrix[i], np_train_matrix[j])
      result_evs[j] = explained_variance_score(np_test_matrix[i], np_train_matrix[j])
      result_std[j] = np.sqrt(mean_squared_error(np_test_matrix[i], np_train_matrix[j]))
    
    # write source labes with predicted ones
    # comparing predicted and source label in estimation array
    #print(train_labels[np.argmin(result_mse)])
    predictions[i][0] = test_labels[i]
    predictions[i][1] = train_labels[np.argmin(result_mse)]
    predictions[i][2] = train_labels[np.argmin(result_mae)]
    predictions[i][3] = train_labels[np.argmin(result_msle)]
    predictions[i][4] = train_labels[np.argmin(result_evs)]
    predictions[i][5] = train_labels[np.argmin(result_std)]

    estimations_mse[i] = int(test_labels[i]==train_labels[np.argmin(result_mse)])
    estimations_mae[i] = int(test_labels[i]==train_labels[np.argmin(result_mae)])
    estimations_msle[i] = int(test_labels[i]==train_labels[np.argmin(result_msle)])
    estimations_evs[i] = int(test_labels[i]==train_labels[np.argmin(result_evs)])
    estimations_std[i] = int(test_labels[i]==train_labels[np.argmin(result_std)])

  #print(predictions)
  estimation_result_mse = estimations_mse.sum()/len(estimations_mse)
  estimation_result_mae = estimations_mae.sum()/len(estimations_mae)
  estimation_result_msle = estimations_msle.sum()/len(estimations_msle)
  estimation_result_evs = estimations_evs.sum()/len(estimations_evs)
  estimation_result_std = estimations_std.sum()/len(estimations_std)

  #print("Estimation for MSE: {} ; MAE: {} ; MSLE: {} ; EVS: {} ; STD: {} ;"
  #.format(estimation_result_mse, estimation_result_mae, estimation_result_msle, estimation_result_evs, estimation_result_std))

  #print("MSE: {}", estimation_result_mse)
  #print("MAE: {}", estimation_result_mae)
  #print("MSLE: {}", estimation_result_msle)
  #print("EVS: {}", estimation_result_evs)
  #print("STD: {}", estimation_result_std)

  SCORES = dict(mean_squared_error       = estimation_result_mse,
                mean_absolute_error      = estimation_result_mae,
                mean_squared_log_error   = estimation_result_msle,
                explained_variance_score = estimation_result_evs,
                standart_square_deviance = estimation_result_std)

  print("Test {} percents to Train {} percents",split_index*10,(10-split_index)*10)
  print(SCORES)

MSE: {} 1.0
MAE: {} 0.075
MSLE: {} 0.925
EVS: {} 0.025
STD: {} 1.0
{'mean_squared_error': 1.0, 'mean_absolute_error': 0.075, 'mean_squared_log_error': 0.925, 'explained_variance_score': 0.025, 'standart_square_deviance': 1.0}


In [144]:
dataset = []
indexes = []
test_matrix = []
train_matrix = []
test_labels = []
train_labels = []
dataset_shuffle = False
stratify = None
random_state = 42
path_x=[]

#print('MSE{}.  Train/Test : {:d} '.format('SJFSAFASF' if dataset_shuffle else '',random_state))

#includes = r'|'.join([fnmatch.translate(x) for x in includes])

# read folder structure with files and put it to collection
folder = []
for i in os.walk(os.path.join('ORLFaceRecognition-PCA','att_faces')):
  folder.append(i)

for address, dirs, files in folder:
  for file in fnmatch.filter(files, '*.pgm'):
    path_x.append(os.path.join(address,file))

for file in sorted(path_x, key=lambda x: int(x.split("/",3)[3].rstrip('.pgm')+x.split("/",3)[2].lstrip("s").zfill(2))):

  img = cv2.imread(file)
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    
  gray.flatten().reshape(1, 10304)
  person = file.split("/",3)[2].lstrip("s")

  dataset.append(gray.flatten().reshape(1, 10304))
  indexes.append(int(person))

for split_mark in range(1, 19):

  if split_mark>=10:
    dataset_shuffle = True
    split_k=split_mark-9
  else:
    split_k=split_mark

  train_matrix, test_matrix, train_labels, test_labels = train_test_split(dataset, indexes, test_size=0.1*split_k, random_state=random_state, shuffle=dataset_shuffle, stratify=stratify)

  estimations_mse = []
  estimations_mae = []
  estimations_msle = []
  estimations_std = []

  for test_image in test_matrix:

    matrix_mse = []
    matrix_mae = []
    matrix_msle = []
    matrix_std = []

    for train_image in train_matrix:

      matrix_mse.append(mean_squared_error(test_image, train_image))
      matrix_mae.append(mean_absolute_error(test_image, train_image))
      matrix_msle.append(mean_squared_log_error(test_image, train_image))
      matrix_std.append(np.sqrt(mean_squared_error(test_image, train_image)))

    estimations_mse.append(train_labels[np.argmin(matrix_mse)])
    estimations_mae.append(train_labels[np.argmin(matrix_mae)])
    estimations_msle.append(train_labels[np.argmin(matrix_msle)])
    estimations_std.append(train_labels[np.argmin(matrix_std)])

  result_mse = np.equal(test_labels, estimations_mse)
  result_mae = np.equal(test_labels, estimations_mae)
  result_msle = np.equal(test_labels, estimations_msle)
  result_std = np.equal(test_labels, estimations_std)

  matrix_length = len(result_mse)

  evaluation_mse = np.sum(result_mse)/matrix_length
  evaluation_mae = np.sum(result_mae)/matrix_length
  evaluation_msle = np.sum(result_msle)/matrix_length
  evaluation_std = np.sum(result_std)/matrix_length

  print('MSE{:s}.  Train/Test : {:d} / {:d} - {:.3f}'.format(' with Shuffle' if dataset_shuffle else '',100-split_k*10, split_k*10, evaluation_mse))
  print('MAE{:s}.  Train/Test : {:d} / {:d} - {:.3f}'.format(' with Shuffle' if dataset_shuffle else '',100-split_k*10, split_k*10, evaluation_mae))
  print('MSLE{:s}.  Train/Test : {:d} / {:d} - {:.3f}'.format(' with Shuffle' if dataset_shuffle else '',100-split_k*10, split_k*10, evaluation_msle))
  print('STD{:s}.  Train/Test : {:d} / {:d} - {:.3f}'.format(' with Shuffle' if dataset_shuffle else '',100-split_k*10, split_k*10, evaluation_std))
  #print('MAE.  Train/Test : %d / %d - %.3f' % (100-split_k*10, split_k*10, evaluation_mae))
  #print('MSLE. Train/Test : %d / %d - %.3f' % (100-split_k*10, split_k*10, evaluation_msle))
  #print('STD.  Train/Test : %d / %d - %.3f' % (100-split_k*10, split_k*10, evaluation_std))


MSE.  Train/Test : 90 / 10 - 1.000
MAE.  Train/Test : 90 / 10 - 0.075
MSLE.  Train/Test : 90 / 10 - 0.925
STD.  Train/Test : 90 / 10 - 1.000
MSE.  Train/Test : 80 / 20 - 1.000
MAE.  Train/Test : 80 / 20 - 0.075
MSLE.  Train/Test : 80 / 20 - 0.963
STD.  Train/Test : 80 / 20 - 1.000
MSE.  Train/Test : 70 / 30 - 1.000
MAE.  Train/Test : 70 / 30 - 0.066
MSLE.  Train/Test : 70 / 30 - 0.950
STD.  Train/Test : 70 / 30 - 1.000
MSE.  Train/Test : 60 / 40 - 1.000
MAE.  Train/Test : 60 / 40 - 0.081
MSLE.  Train/Test : 60 / 40 - 0.938
STD.  Train/Test : 60 / 40 - 1.000
MSE.  Train/Test : 50 / 50 - 0.930
MAE.  Train/Test : 50 / 50 - 0.080
MSLE.  Train/Test : 50 / 50 - 0.910
STD.  Train/Test : 50 / 50 - 0.930
MSE.  Train/Test : 40 / 60 - 0.846
MAE.  Train/Test : 40 / 60 - 0.062
MSLE.  Train/Test : 40 / 60 - 0.892
STD.  Train/Test : 40 / 60 - 0.846
MSE.  Train/Test : 30 / 70 - 0.814
MAE.  Train/Test : 30 / 70 - 0.071
MSLE.  Train/Test : 30 / 70 - 0.850
STD.  Train/Test : 30 / 70 - 0.814
MSE.  Train/T