# Dependencies

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.image as mpimg
import shutil
import random
import pickle


from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, classification_report

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from numpy import linalg as LA
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

import keras
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils

import os
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import cv2
import matplotlib.image as mpimg
import glob 
import shutil

# Loading Data

In [None]:
face_data_path = '/content/drive/MyDrive/Data_set/Mini_Proj/AFDB_face_dataset/*/*'
mask_data_path = '/content/drive/MyDrive/Data_set/Mini_Proj/AFDB_masked_face_dataset/*/*'

def image_loader(folder):
  img_path = []
  for img in glob.glob(folder):
    img_path.append(str(img))
  return img_path

no_mask_images = image_loader(face_data_path)
mask_images = image_loader(mask_data_path)

In [None]:
a = len(mask_images) if (len(no_mask_images) > len(mask_images)) else len(no_mask_images) 
no_mask_img = no_mask_images[0:a]
mask_img = mask_images[0:a]
print('Number of no_mask images' , len(no_mask_img))
print('Number of mask images' , len(mask_img))

Number of no_mask images 2203
Number of mask images 2203


In [None]:
def feature_extraction (dir_path , label_value):
  data = []
  label = []
  #img_list = []
  for filename in dir_path:
    img = mpimg.imread(filename)
    if img is not None:
      #img_list.append(img)
      gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
      resized = cv2.resize(gray_img,(28, 28), interpolation=cv2.INTER_CUBIC)
      # cv2_imshow(resized)
      data.append(resized.flatten())
      label.append(label_value)
  return data , label

In [None]:
data_no_mask , label_no_mask = feature_extraction(no_mask_img , 0)
data_mask , label_mask = feature_extraction(mask_img , 1)

# Creating Laten Vector from Images

In [None]:
X = data_no_mask + data_mask
Y = label_no_mask + label_mask

In [None]:
if len(X) == len(Y):
  print('Number of data values: ' , len(X))

print('Number of features' , len(X[0]))

Number of data values:  4406
Number of features 784


In [None]:
X_df = pd.DataFrame(X, columns = list(range(0 , len(X[0]))))
Y_df = pd.DataFrame(Y , columns = ['Label'])

df = pd.concat([X_df, Y_df], axis=1)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,Label
0,46.0,47.0,41.0,41.0,37.0,44.0,38.0,42.0,45.0,39.0,41.0,38.0,40.0,47.0,55.0,61.0,78.0,67.0,68.0,95.0,109.0,80.0,68.0,55.0,43.0,52.0,53.0,49.0,39.0,38.0,32.0,33.0,39.0,40.0,38.0,34.0,33.0,45.0,36.0,26.0,...,220.0,195.0,48.0,33.0,18.0,22.0,39.0,50.0,59.0,59.0,55.0,188.0,177.0,175.0,177.0,177.0,177.0,178.0,182.0,190.0,189.0,190.0,194.0,200.0,199.0,212.0,215.0,209.0,182.0,45.0,8.0,31.0,32.0,26.0,23.0,45.0,62.0,58.0,54.0,0
1,15.0,6.0,15.0,10.0,19.0,18.0,6.0,6.0,9.0,7.0,5.0,6.0,7.0,11.0,7.0,10.0,13.0,14.0,8.0,9.0,1.0,3.0,3.0,3.0,1.0,7.0,8.0,7.0,18.0,14.0,10.0,15.0,14.0,16.0,9.0,4.0,11.0,7.0,8.0,7.0,...,189.0,187.0,187.0,188.0,184.0,179.0,178.0,175.0,168.0,162.0,161.0,26.0,62.0,82.0,90.0,78.0,37.0,38.0,184.0,186.0,194.0,191.0,202.0,200.0,195.0,195.0,193.0,188.0,188.0,183.0,183.0,184.0,181.0,178.0,179.0,174.0,168.0,163.0,154.0,0
2,177.0,136.0,115.0,81.0,68.0,53.0,75.0,77.0,64.0,59.0,47.0,36.0,68.0,66.0,52.0,43.0,36.0,38.0,46.0,37.0,40.0,34.0,45.0,48.0,45.0,39.0,44.0,45.0,156.0,134.0,96.0,62.0,59.0,67.0,104.0,51.0,46.0,47.0,43.0,71.0,...,142.0,139.0,135.0,133.0,124.0,108.0,76.0,108.0,109.0,99.0,88.0,248.0,250.0,247.0,225.0,184.0,213.0,147.0,51.0,51.0,201.0,192.0,187.0,177.0,165.0,156.0,152.0,145.0,142.0,135.0,134.0,122.0,99.0,90.0,114.0,115.0,108.0,107.0,94.0,0
3,106.0,53.0,12.0,9.0,19.0,11.0,10.0,9.0,11.0,11.0,13.0,13.0,13.0,9.0,6.0,18.0,12.0,14.0,11.0,6.0,13.0,11.0,18.0,13.0,24.0,14.0,8.0,16.0,87.0,15.0,15.0,19.0,16.0,14.0,10.0,5.0,5.0,9.0,8.0,12.0,...,98.0,66.0,48.0,32.0,43.0,47.0,53.0,69.0,70.0,81.0,7.0,136.0,112.0,84.0,63.0,14.0,74.0,176.0,221.0,146.0,135.0,158.0,165.0,147.0,126.0,89.0,58.0,47.0,36.0,26.0,48.0,73.0,92.0,112.0,120.0,91.0,75.0,28.0,178.0,0
4,23.0,20.0,27.0,35.0,36.0,34.0,37.0,46.0,52.0,46.0,40.0,44.0,52.0,61.0,87.0,106.0,119.0,126.0,134.0,155.0,162.0,153.0,148.0,108.0,68.0,83.0,108.0,105.0,31.0,47.0,57.0,49.0,37.0,37.0,57.0,65.0,63.0,67.0,73.0,79.0,...,36.0,48.0,70.0,92.0,87.0,93.0,104.0,108.0,102.0,136.0,196.0,96.0,98.0,96.0,94.0,96.0,87.0,78.0,92.0,85.0,86.0,73.0,50.0,71.0,80.0,74.0,82.0,76.0,78.0,91.0,97.0,107.0,98.0,96.0,113.0,87.0,64.0,79.0,113.0,0


# Data preprocessing

shuffle Data

In [None]:
from sklearn.utils import shuffle
df = shuffle(df)

In [None]:
col = df.columns
X_Data = df[col[:-1]]
Y_Data = df[col[-1]]

Normal Data

In [None]:
X_train , X_ , Y_train , Y_ = train_test_split(X_Data, Y_Data, test_size=0.5, random_state=12)
X_test , X_val , Y_test , Y_val = train_test_split(X_, Y_, test_size=0.5, random_state=12)

Standardize Data

In [None]:
X_scandard = StandardScaler().fit_transform(X_Data)

X_train_std , X_m , Y_train_std , Y_m = train_test_split(X_scandard, Y_Data, test_size=0.5, random_state=12)
X_test_std , X_val_std , Y_test_std , Y_val_std = train_test_split(X_m, Y_m, test_size=0.5, random_state=12)

Dimensionality Reduction

In [None]:
lda = LDA()
lda_data = lda.fit_transform(X_Data, Y_Data)
X_train_lda, X_m_lda, Y_train_lda, Y_m_lda = train_test_split(lda_data, Y_Data, test_size=0.5, random_state=12)
X_test_lda , X_val_lda , Y_test_lda , Y_val_lda = train_test_split(X_m_lda, Y_m_lda, test_size=0.5, random_state=12)

# Test Cases

In [None]:
X_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783
2357,67.0,126.0,150.0,151.0,78.0,78.0,67.0,114.0,160.0,166.0,170.0,138.0,159.0,144.0,100.0,88.0,76.0,73.0,68.0,62.0,58.0,50.0,45.0,43.0,38.0,37.0,34.0,59.0,121.0,156.0,162.0,166.0,83.0,74.0,56.0,138.0,174.0,183.0,186.0,150.0,...,187.0,187.0,184.0,174.0,176.0,167.0,154.0,158.0,127.0,125.0,92.0,45.0,71.0,48.0,36.0,81.0,52.0,136.0,160.0,163.0,165.0,163.0,160.0,158.0,162.0,172.0,189.0,180.0,175.0,168.0,159.0,152.0,157.0,145.0,119.0,91.0,122.0,127.0,66.0,47.0
415,15.0,19.0,30.0,34.0,29.0,33.0,49.0,39.0,36.0,45.0,30.0,41.0,52.0,34.0,66.0,41.0,39.0,58.0,33.0,66.0,33.0,31.0,40.0,30.0,23.0,38.0,52.0,62.0,23.0,22.0,30.0,26.0,21.0,18.0,40.0,33.0,25.0,22.0,25.0,31.0,...,126.0,100.0,86.0,90.0,87.0,83.0,89.0,105.0,106.0,113.0,112.0,114.0,10.0,9.0,10.0,10.0,9.0,8.0,6.0,9.0,19.0,140.0,141.0,141.0,125.0,128.0,133.0,131.0,141.0,142.0,131.0,106.0,94.0,91.0,105.0,112.0,113.0,111.0,113.0,115.0
299,25.0,30.0,36.0,48.0,51.0,41.0,30.0,43.0,42.0,37.0,20.0,36.0,44.0,28.0,11.0,12.0,6.0,9.0,12.0,5.0,14.0,13.0,22.0,37.0,36.0,27.0,23.0,19.0,28.0,36.0,29.0,42.0,50.0,48.0,32.0,28.0,25.0,24.0,31.0,49.0,...,218.0,220.0,212.0,206.0,192.0,153.0,75.0,6.0,5.0,4.0,1.0,2.0,7.0,0.0,5.0,88.0,163.0,162.0,148.0,138.0,145.0,158.0,167.0,187.0,204.0,213.0,221.0,222.0,216.0,214.0,202.0,190.0,147.0,93.0,15.0,4.0,2.0,0.0,0.0,1.0
1537,40.0,41.0,39.0,38.0,39.0,38.0,37.0,36.0,38.0,38.0,36.0,37.0,42.0,34.0,40.0,43.0,43.0,44.0,57.0,46.0,39.0,42.0,46.0,55.0,55.0,49.0,43.0,39.0,40.0,42.0,41.0,42.0,42.0,36.0,38.0,77.0,91.0,129.0,105.0,92.0,...,215.0,207.0,206.0,186.0,135.0,142.0,165.0,178.0,195.0,50.0,43.0,143.0,237.0,235.0,228.0,213.0,183.0,99.0,234.0,183.0,145.0,110.0,138.0,171.0,181.0,189.0,190.0,191.0,191.0,172.0,129.0,131.0,177.0,222.0,241.0,149.0,48.0,45.0,45.0,82.0
3504,83.0,84.0,81.0,76.0,56.0,36.0,73.0,243.0,236.0,249.0,251.0,253.0,254.0,254.0,252.0,252.0,252.0,251.0,246.0,244.0,237.0,197.0,131.0,64.0,55.0,116.0,106.0,147.0,84.0,81.0,87.0,41.0,38.0,195.0,246.0,249.0,250.0,247.0,254.0,254.0,...,53.0,57.0,93.0,123.0,114.0,115.0,101.0,70.0,5.0,5.0,4.0,12.0,49.0,122.0,159.0,163.0,137.0,108.0,98.0,102.0,112.0,122.0,134.0,143.0,161.0,168.0,171.0,163.0,157.0,149.0,147.0,132.0,131.0,120.0,102.0,32.0,5.0,1.0,10.0,47.0


In [None]:
print('True Label of \n1. test case 25: ' , Y_test.iloc[25] , '\n2. test case 56: ' , Y_test.iloc[56] , '\n3. test case 100: ' , Y_test.iloc[100])
Y_true = [Y_test.iloc[25] , Y_test.iloc[56] , Y_test.iloc[100]]

True Label of 
1. test case 25:  1 
2. test case 56:  0 
3. test case 100:  1


In [None]:
X_test_1 = X_test.iloc[25]
X_test_2 = X_test.iloc[56]
X_test_3 = X_test.iloc[100]

X_test_n_case = [X_test_1 , X_test_2 , X_test_3]

In [None]:
X_test_1_std = X_test_std[25]
X_test_2_std = X_test_std[56]
X_test_3_std = X_test_std[100]

X_test_std_case = [X_test_1_std , X_test_2_std , X_test_3_std]

In [None]:
X_test_1_lda = X_test_lda[25]
X_test_2_lda = X_test_lda[56]
X_test_3_lda = X_test_lda[100]

X_test_lda_case = [X_test_1_lda , X_test_2_lda , X_test_3_lda]

# Random Forest

rf on Normal data

In [None]:
rf_n = RandomForestClassifier(max_depth=2, random_state=0).fit(X_train, Y_train)
Y_pred_n_rf = rf_n.predict(X_test)

In [None]:
print('For Random Forest on normal Data -')
print("The Precision :", precision_score(Y_test, Y_pred_n_rf))
print("The Recall :", recall_score(Y_test, Y_pred_n_rf))
print("The F1 score :", f1_score(Y_test, Y_pred_n_rf))
print("The Accuracy :", accuracy_score(Y_test, Y_pred_n_rf))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test,Y_pred_n_rf))
print('\nClassification report : ')
print(classification_report(Y_test, Y_pred_n_rf))

For Random Forest on normal Data -
The Precision : 0.8918269230769231
The Recall : 0.6782449725776966
The F1 score : 0.7705088265835929
The Accuracy : 0.7992733878292462

Confusion matrix :
[[509  45]
 [176 371]]

Classification report : 
              precision    recall  f1-score   support

           0       0.74      0.92      0.82       554
           1       0.89      0.68      0.77       547

    accuracy                           0.80      1101
   macro avg       0.82      0.80      0.80      1101
weighted avg       0.82      0.80      0.80      1101



In [None]:
csv_rf_n = cross_val_score(rf_n,X_val, Y_val, cv=5)

print('Cross validation score: ' , csv_rf_n)
print('Mean csv score: ' , np.mean(csv_rf_n))
print('Standard deviation of csv score: ' , np.std(csv_rf_n))

Cross validation score:  [0.8280543  0.78733032 0.76363636 0.82727273 0.82727273]
Mean csv score:  0.8067132867132868
Standard deviation of csv score:  0.026578716331224127


In [None]:
Y_pred_case_rf_n = rf_n.predict(X_test_n_case)
print('Predicted cases for Random Forest on normal Data: ' , Y_pred_case_rf_n)

Predicted cases for Random Forest on normal Data:  [1, 0, 0]


rf on Standardized data

In [None]:
rf_og = RandomForestClassifier(max_depth=2, random_state=0).fit(X_train_std, Y_train_std)
Y_pred_og_rf = rf_og.predict(X_test_std)

In [None]:
print('For Random Forest on standard Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_og_rf))
print("The Recall :", recall_score(Y_test_std, Y_pred_og_rf))
print("The F1 score :", f1_score(Y_test_std, Y_pred_og_rf))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_og_rf))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_og_rf))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_og_rf))

For Random Forest on standard Data -
The Precision : 0.8918269230769231
The Recall : 0.6782449725776966
The F1 score : 0.7705088265835929
The Accuracy : 0.7992733878292462

Confusion matrix :
[[509  45]
 [176 371]]

Classification report : 
              precision    recall  f1-score   support

           0       0.74      0.92      0.82       554
           1       0.89      0.68      0.77       547

    accuracy                           0.80      1101
   macro avg       0.82      0.80      0.80      1101
weighted avg       0.82      0.80      0.80      1101



In [None]:
csv_rf_s = cross_val_score(rf_og,X_val_std, Y_val_std, cv=5)

print('Cross validation score: ' , csv_rf_s)
print('Mean csv score: ' , np.mean(csv_rf_s))
print('Standard deviation of csv score: ' , np.std(csv_rf_s))

Cross validation score:  [0.8280543  0.78733032 0.76363636 0.82727273 0.82727273]
Mean csv score:  0.8067132867132868
Standard deviation of csv score:  0.026578716331224127


In [None]:
Y_pred_case_rf_std = rf_og.predict(X_test_std_case)
print('Predicted cases for Random Forest on standard Data: ' , Y_pred_case_rf_std)

Predicted cases for Random Forest on standard Data:  [1, 0, 0]


rf By dimensionality reduction


In [None]:
rf_dim = RandomForestClassifier(max_depth=2, random_state=0).fit(X_train_lda , Y_train_lda)
Y_pred_dim_rf = rf_dim.predict(X_test_lda)

In [None]:
print('For Random Forest on dimensionaly reduced Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_dim_rf))
print("The Recall :", recall_score(Y_test_std, Y_pred_dim_rf))
print("The F1 score :", f1_score(Y_test_std, Y_pred_dim_rf))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_dim_rf))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_dim_rf))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_dim_rf))

For Random Forest on dimensionaly reduced Data -
The Precision : 0.8301886792452831
The Recall : 0.8848263254113345
The F1 score : 0.856637168141593
The Accuracy : 0.8528610354223434

Confusion matrix :
[[455  99]
 [ 63 484]]

Classification report : 
              precision    recall  f1-score   support

           0       0.88      0.82      0.85       554
           1       0.83      0.88      0.86       547

    accuracy                           0.85      1101
   macro avg       0.85      0.85      0.85      1101
weighted avg       0.85      0.85      0.85      1101



In [None]:
csv_rf_lda = cross_val_score(rf_dim,X_val_lda, Y_val_lda, cv=5)

print('Cross validation score: ' , csv_rf_lda)
print('Mean csv score: ' , np.mean(csv_rf_lda))
print('Standard deviation of csv score: ' , np.std(csv_rf_lda))

Cross validation score:  [0.85067873 0.88687783 0.84090909 0.90454545 0.84090909]
Mean csv score:  0.8647840394899218
Standard deviation of csv score:  0.02610782116570224


In [None]:
Y_pred_case_rf_lda = rf_dim.predict(X_test_n_case)
print('Predicted cases for Random Forest on dimensionaly reduced Data: ' , Y_pred_case_rf_lda)

Predicted cases for Random Forest on dimensionaly reduced Data:  [1, 0, 1]


# MLP

MLP on Normal data

In [None]:
mlp_n = MLPClassifier(hidden_layer_sizes=200, max_iter=500).fit(X_train, Y_train)
Y_pred_n_mlp = mlp_n.predict(X_test)

In [None]:
print('For Multilayer-perceptron on normal Data -')
print("The Precision :", precision_score(Y_test, Y_pred_n_mlp))
print("The Recall :", recall_score(Y_test, Y_pred_n_mlp))
print("The F1 score :", f1_score(Y_test, Y_pred_n_mlp))
print("The Accuracy :", accuracy_score(Y_test, Y_pred_n_mlp))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test,Y_pred_n_mlp))
print('\nClassification report : ')
print(classification_report(Y_test, Y_pred_n_mlp))

For Multilayer-perceptron on normal Data -
The Precision : 0.8819188191881919
The Recall : 0.8738574040219378
The F1 score : 0.8778696051423324
The Accuracy : 0.8792007266121707

Confusion matrix :
[[490  64]
 [ 69 478]]

Classification report : 
              precision    recall  f1-score   support

           0       0.88      0.88      0.88       554
           1       0.88      0.87      0.88       547

    accuracy                           0.88      1101
   macro avg       0.88      0.88      0.88      1101
weighted avg       0.88      0.88      0.88      1101



In [None]:
csv_mlp_n = cross_val_score(mlp_n,X_val, Y_val, cv=5)

print('Cross validation score: ' , csv_mlp_n)
print('Mean csv score: ' , np.mean(csv_mlp_n))
print('Standard deviation of csv score: ' , np.std(csv_mlp_n))

Cross validation score:  [0.85067873 0.79638009 0.83636364 0.88181818 0.79545455]
Mean csv score:  0.832139037433155
Standard deviation of csv score:  0.0330273800762222


In [None]:
Y_pred_case_mlp_n = mlp_n.predict(X_test_n_case)
print('Predicted cases for Multilayer-perceptron on normal Data: ' , Y_pred_case_mlp_n)

Predicted cases for Multilayer-perceptron on normal Data:  [1, 0, 1]


MLP on Standardized data

In [None]:
mlp_og = MLPClassifier(hidden_layer_sizes=150, max_iter=500).fit(X_train_std, Y_train_std)
Y_pred_og_mlp = mlp_og.predict(X_test_std)

In [None]:
print('For Multilayer-perceptron on standard Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_og_mlp))
print("The Recall :", recall_score(Y_test_std, Y_pred_og_mlp))
print("The F1 score :", f1_score(Y_test_std, Y_pred_og_mlp))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_og_mlp))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_og_mlp))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_og_mlp))

For Multilayer-perceptron on standard Data -
The Precision : 0.9124767225325885
The Recall : 0.8957952468007313
The F1 score : 0.9040590405904059
The Accuracy : 0.9055404178019982

Confusion matrix :
[[507  47]
 [ 57 490]]

Classification report : 
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       554
           1       0.91      0.90      0.90       547

    accuracy                           0.91      1101
   macro avg       0.91      0.91      0.91      1101
weighted avg       0.91      0.91      0.91      1101



In [None]:
csv_mlp_s = cross_val_score(mlp_og,X_val_std, Y_val_std, cv=5)

print('Cross validation score: ' , csv_mlp_s)
print('Mean csv score: ' , np.mean(csv_mlp_s))
print('Standard deviation of csv score: ' , np.std(csv_mlp_s))

Cross validation score:  [0.88687783 0.90497738 0.88181818 0.89090909 0.85909091]
Mean csv score:  0.8847346770876182
Standard deviation of csv score:  0.014957219698201853


In [None]:
Y_pred_case_mlp_std = mlp_og.predict(X_test_std_case)
print('Predicted cases for Multilayer-perceptron on standard Data: ' , Y_pred_case_mlp_std)

Predicted cases for Multilayer-perceptron on standard Data:  [1, 0, 1]


MLP By dimensionality reduction


In [None]:
mlp_dim = MLPClassifier(hidden_layer_sizes=(100 , 5), max_iter=600).fit(X_train_lda , Y_train_lda)
Y_pred_dim_mlp = mlp_dim.predict(X_test_lda)

In [None]:
print('For Multilayer-perceptron on dimensionaly reduced Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_dim_mlp))
print("The Recall :", recall_score(Y_test_std, Y_pred_dim_mlp))
print("The F1 score :", f1_score(Y_test_std, Y_pred_dim_mlp))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_dim_mlp))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_dim_mlp))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_dim_mlp))

For Multilayer-perceptron on dimensionaly reduced Data -
The Precision : 0.8409090909090909
The Recall : 0.8793418647166362
The F1 score : 0.8596961572832886
The Accuracy : 0.857402361489555

Confusion matrix :
[[463  91]
 [ 66 481]]

Classification report : 
              precision    recall  f1-score   support

           0       0.88      0.84      0.86       554
           1       0.84      0.88      0.86       547

    accuracy                           0.86      1101
   macro avg       0.86      0.86      0.86      1101
weighted avg       0.86      0.86      0.86      1101



In [None]:
csv_mlp_lda = cross_val_score(mlp_dim,X_val_lda, Y_val_lda, cv=5)

print('Cross validation score: ' , csv_mlp_lda)
print('Mean csv score: ' , np.mean(csv_mlp_lda))
print('Standard deviation of csv score: ' , np.std(csv_mlp_lda))



Cross validation score:  [0.84615385 0.87782805 0.85454545 0.90454545 0.85454545]
Mean csv score:  0.8675236528177704
Standard deviation of csv score:  0.021308826296572717


In [None]:
Y_pred_case_mlp_lda = mlp_dim.predict(X_test_lda_case)
print('Predicted cases for Multilayer-perceptron on dimensionaly reduced Data: ' , Y_pred_case_mlp_lda)

Predicted cases for Multilayer-perceptron on dimensionaly reduced Data:  [1, 0, 1]


# KNN

KNN on Normal data

In [None]:
KNN_n = KNeighborsClassifier().fit(X_train, Y_train)
Y_pred_n_knn = KNN_n.predict(X_test)

In [None]:
print('For K-Nearest Neighbors on normal Data -')
print("The Precision :", precision_score(Y_test, Y_pred_n_knn))
print("The Recall :", recall_score(Y_test, Y_pred_n_knn))
print("The F1 score :", f1_score(Y_test, Y_pred_n_knn))
print("The Accuracy :", accuracy_score(Y_test, Y_pred_n_knn))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test,Y_pred_n_knn))
print('\nClassification report : ')
print(classification_report(Y_test, Y_pred_n_knn))

For K-Nearest Neighbors on normal Data -
The Precision : 0.9366197183098591
The Recall : 0.7294332723948812
The F1 score : 0.8201438848920863
The Accuracy : 0.8410535876475931

Confusion matrix :
[[527  27]
 [148 399]]

Classification report : 
              precision    recall  f1-score   support

           0       0.78      0.95      0.86       554
           1       0.94      0.73      0.82       547

    accuracy                           0.84      1101
   macro avg       0.86      0.84      0.84      1101
weighted avg       0.86      0.84      0.84      1101



In [None]:
csv_KNN_n = cross_val_score(KNN_n,X_val, Y_val, cv=5)

print('Cross validation score: ' , csv_KNN_n)
print('Mean csv score: ' , np.mean(csv_KNN_n))
print('Standard deviation of csv score: ' , np.std(csv_KNN_n))

Cross validation score:  [0.82352941 0.84162896 0.81363636 0.80909091 0.83181818]
Mean csv score:  0.8239407651172357
Standard deviation of csv score:  0.011847312201013309


In [None]:
Y_pred_case_knn_n = KNN_n.predict(X_test_sn_case)
print('Predicted cases for K-Nearest Neighbors on normal Data: ' , Y_pred_case_knn_n)

Predicted cases for K-Nearest Neighbors on normal Data:  [1, 0, 1]


KNN on Standardized data

In [None]:
KNN_og = KNeighborsClassifier().fit(X_train_std, Y_train_std)
Y_pred_og_knn = KNN_og.predict(X_test_std)

In [None]:
print('For K-Nearest Neighbors on standard Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_og_knn))
print("The Recall :", recall_score(Y_test_std, Y_pred_og_knn))
print("The F1 score :", f1_score(Y_test_std, Y_pred_og_knn))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_og_knn))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_og_knn))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_og_knn))

For K-Nearest Neighbors on standard Data -
The Precision : 0.9303944315545244
The Recall : 0.7330895795246801
The F1 score : 0.820040899795501
The Accuracy : 0.8401453224341507

Confusion matrix :
[[524  30]
 [146 401]]

Classification report : 
              precision    recall  f1-score   support

           0       0.78      0.95      0.86       554
           1       0.93      0.73      0.82       547

    accuracy                           0.84      1101
   macro avg       0.86      0.84      0.84      1101
weighted avg       0.86      0.84      0.84      1101



In [None]:
csv_KNN_s = cross_val_score(KNN_og,X_val_std, Y_val_std, cv=5)

print('Cross validation score: ' , csv_KNN_s)
print('Mean csv score: ' , np.mean(csv_KNN_s))
print('Standard deviation of csv score: ' , np.std(csv_KNN_s))

Cross validation score:  [0.80542986 0.83710407 0.80454545 0.79545455 0.83636364]
Mean csv score:  0.815779514603044
Standard deviation of csv score:  0.017463485175865538


In [None]:
Y_pred_case_knn_std = KNN_og.predict(X_test_std_case)
print('Predicted cases for K-Nearest Neighbors on standard Data: ' , Y_pred_case_knn_std)

Predicted cases for K-Nearest Neighbors on standard Data:  [1, 0, 1]


KNN By dimensionality reduction


In [None]:
KNN_dim = KNeighborsClassifier().fit(X_train_lda , Y_train_lda)
Y_pred_dim_knn = KNN_dim.predict(X_test_lda)

In [None]:
print('For K-Nearest Neighbors on dimensionaly reduced Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_dim_knn))
print("The Recall :", recall_score(Y_test_std, Y_pred_dim_knn))
print("The F1 score :", f1_score(Y_test_std, Y_pred_dim_knn))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_dim_knn))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_dim_knn))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_dim_knn))

For K-Nearest Neighbors on dimensionaly reduced Data -
The Precision : 0.8461538461538461
The Recall : 0.8647166361974405
The F1 score : 0.8553345388788426
The Accuracy : 0.8546775658492279

Confusion matrix :
[[468  86]
 [ 74 473]]

Classification report : 
              precision    recall  f1-score   support

           0       0.86      0.84      0.85       554
           1       0.85      0.86      0.86       547

    accuracy                           0.85      1101
   macro avg       0.85      0.85      0.85      1101
weighted avg       0.85      0.85      0.85      1101



In [None]:
csv_KNN_lda = cross_val_score(KNN_dim,X_val_lda, Y_val_lda, cv=5)

print('Cross validation score: ' , csv_KNN_lda)
print('Mean csv score: ' , np.mean(csv_KNN_lda))
print('Standard deviation of csv score: ' , np.std(csv_KNN_lda))

Cross validation score:  [0.85067873 0.85972851 0.85909091 0.88636364 0.83181818]
Mean csv score:  0.8575359934183464
Standard deviation of csv score:  0.01758798299359688


In [None]:
Y_pred_case_knn_lda = KNN_dim.predict(X_test_lda_case)
print('Predicted cases for K-Nearest Neighbors on dimensionaly reduced Data: ' , Y_pred_case_knn_lda)

Predicted cases for K-Nearest Neighbors on dimensionaly reduced Data:  [1, 0, 1]


# SVM

SVM on Normal data

In [None]:
SVM_n = SVC(kernel='rbf').fit(X_train, Y_train)
Y_pred_n_svm = SVM_n.predict(X_test)

In [None]:
print('For Support Vector Machine (SVM) on normal Data -')
print("The Precision :", precision_score(Y_test, Y_pred_n_svm))
print("The Recall :", recall_score(Y_test, Y_pred_n_svm))
print("The F1 score :", f1_score(Y_test, Y_pred_n_svm))
print("The Accuracy :", accuracy_score(Y_test, Y_pred_n_svm))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test,Y_pred_n_svm))
print('\nClassification report : ')
print(classification_report(Y_test, Y_pred_n_svm))

For Support Vector Machine (SVM) on normal Data -
The Precision : 0.9210526315789473
The Recall : 0.8957952468007313
The F1 score : 0.9082483781278963
The Accuracy : 0.9100817438692098

Confusion matrix :
[[512  42]
 [ 57 490]]

Classification report : 
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       554
           1       0.92      0.90      0.91       547

    accuracy                           0.91      1101
   macro avg       0.91      0.91      0.91      1101
weighted avg       0.91      0.91      0.91      1101



In [None]:
csv_SVM_n = cross_val_score(SVM_n,X_val, Y_val, cv=5)

print('Cross validation score: ' , csv_SVM_n)
print('Mean csv score: ' , np.mean(csv_SVM_n))
print('Standard deviation of csv score: ' , np.std(csv_SVM_n))

Cross validation score:  [0.91855204 0.89140271 0.88181818 0.88636364 0.92272727]
Mean csv score:  0.9001727684080624
Standard deviation of csv score:  0.01703525734643509


In [None]:
Y_pred_case_svm_n = SVM_n.predict(X_test_sn_case)
print('Predicted cases for Support Vector Machine (SVM) on normal Data: ' , Y_pred_case_svm_n)

Predicted cases for Support Vector Machine (SVM) on normal Data:  [1, 0, 1]


SVM on Standardized data

In [None]:
SVM_og = SVC(kernel='rbf').fit(X_train_std, Y_train_std)
Y_pred_og_svm = SVM_og.predict(X_test_std)

In [None]:
print('For Support Vector Machine (SVM) on standard Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_og_svm))
print("The Recall :", recall_score(Y_test_std, Y_pred_og_svm))
print("The F1 score :", f1_score(Y_test_std, Y_pred_og_svm))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_og_svm))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_og_svm))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_og_svm))

For Support Vector Machine (SVM) on standard Data -
The Precision : 0.9212007504690432
The Recall : 0.8976234003656307
The F1 score : 0.9092592592592592
The Accuracy : 0.9109900090826522

Confusion matrix :
[[512  42]
 [ 56 491]]

Classification report : 
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       554
           1       0.92      0.90      0.91       547

    accuracy                           0.91      1101
   macro avg       0.91      0.91      0.91      1101
weighted avg       0.91      0.91      0.91      1101



In [None]:
csv_SVM_s = cross_val_score(SVM_og,X_val_std, Y_val_std, cv=5)

print('Cross validation score: ' , csv_SVM_s)
print('Mean csv score: ' , np.mean(csv_SVM_s))
print('Standard deviation of csv score: ' , np.std(csv_SVM_s))

Cross validation score:  [0.91855204 0.89140271 0.88181818 0.88181818 0.91818182]
Mean csv score:  0.8983545865898807
Standard deviation of csv score:  0.016711014142833387


In [None]:
Y_pred_case_svm_std = SVM_og.predict(X_test_std_case)
print('Predicted cases for Support Vector Machine (SVM) on standard Data: ' , Y_pred_case_svm_std)

Predicted cases for Support Vector Machine (SVM) on standard Data:  [1, 0, 1]


SVM By dimensionality reduction


In [None]:
SVM_dim = SVC(kernel='rbf').fit(X_train_lda , Y_train_lda)
Y_pred_dim_svm = SVM_dim.predict(X_test_lda)

In [None]:
print('For Support Vector Machine (SVM) on dimensionaly reduced Data -')
print("The Precision :", precision_score(Y_test_std, Y_pred_dim_svm))
print("The Recall :", recall_score(Y_test_std, Y_pred_dim_svm))
print("The F1 score :", f1_score(Y_test_std, Y_pred_dim_svm))
print("The Accuracy :", accuracy_score(Y_test_std, Y_pred_dim_svm))
print("\nConfusion matrix :")
print( confusion_matrix(Y_test_std,Y_pred_dim_svm))
print('\nClassification report : ')
print(classification_report(Y_test_std, Y_pred_dim_svm))

For Support Vector Machine (SVM) on dimensionaly reduced Data -
The Precision : 0.8467023172905526
The Recall : 0.8683729433272395
The F1 score : 0.8574007220216606
The Accuracy : 0.8564940962761126

Confusion matrix :
[[468  86]
 [ 72 475]]

Classification report : 
              precision    recall  f1-score   support

           0       0.87      0.84      0.86       554
           1       0.85      0.87      0.86       547

    accuracy                           0.86      1101
   macro avg       0.86      0.86      0.86      1101
weighted avg       0.86      0.86      0.86      1101



In [None]:
csv_SVM_lda = cross_val_score(SVM_dim,X_val_lda, Y_val_lda, cv=5)

print('Cross validation score: ' , csv_SVM_lda)
print('Mean csv score: ' , np.mean(csv_SVM_lda))
print('Standard deviation of csv score: ' , np.std(csv_SVM_lda))

Cross validation score:  [0.84615385 0.87782805 0.85454545 0.91363636 0.85454545]
Mean csv score:  0.8693418346359522
Standard deviation of csv score:  0.024533934706617945


In [None]:
Y_pred_case_svm_lda = SVM_dim.predict(X_test_lda_case)
print('Predicted cases for Support Vector Machine (SVM) on dimensionaly reduced Data: ' , Y_pred_case_svm_lda)

Predicted cases for Support Vector Machine (SVM) on dimensionaly reduced Data:  [1, 0, 1]


# Data Loading & Processing CNN

In [None]:
import os
import pickle
import PIL.Image
import numpy as np
import pandas as pd

import glob
from google.colab import drive
# import dnnlib
# import dnnlib.tflib as tflib
# import config
# from encoder.generator_model import Generator

import matplotlib.pyplot as plt
%matplotlib inline
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import cv2
path = "/content/drive/MyDrive/mask/"
images = glob.glob(path + '/*.png')

In [None]:
img_list = []
for image in images:
  img = PIL.Image.open(image)
  img_list.append(np.asarray(img))

In [None]:
df = pd.DataFrame(img_list)
df.head()

  values = np.array([convert(v) for v in values])


Unnamed: 0,0
0,"[[[38, 51, 59], [37, 50, 58], [37, 50, 58], [3..."
1,"[[[35, 47, 57], [35, 47, 57], [35, 47, 57], [3..."
2,"[[[250, 250, 250], [250, 250, 250], [250, 250,..."
3,"[[[172, 171, 225], [164, 163, 215], [156, 156,..."
4,"[[[55, 54, 56], [55, 54, 57], [56, 55, 57], [5..."


In [None]:
import os
path = 'drive/MyDrive/mask'
mask_image_path = os.listdir(path)[:400]
len(mask_image_path)

In [None]:
import os
path = 'drive/MyDrive/no_mask'
no_mask_image_path = os.listdir(path)[:400]
len(no_mask_image_path)

400

#Applying Cnn

In [None]:
import glob
import shutil
import os

src_dir_mask = "/content/drive/MyDrive/mask"
src_dis_no_mask = "/content/drive/MyDrive/no_mask"
dst_dir_train_m = "/content/drive/MyDrive/minor_proj/train/mask"
dst_dir_valid_m = "/content/drive/MyDrive/minor_proj/validation/mask"
dst_dir_train_nm = "/content/drive/MyDrive/minor_proj/train/no_mask"
dst_dir_valid_ng = "/content/drive/MyDrive/minor_proj/validation/no_mask"
dst_dir_test = "/content/drive/MyDrive/minor_proj/test"
i = 0
train = 1
valid = 0
test = 0
for pngfile in glob.iglob(os.path.join(src_dir_mask, "*.jpg")):
    if train <= 200:
        dst_dir = dst_dir_train_m
    elif 200 < train <= 350 :
        dst_dir = dst_dir_valid_m
    else:
        dst_dir = dst_dir_test
    train += 1
    shutil.copy(pngfile, dst_dir)
print(train+valid)

330


In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
training_set = train_datagen.flow_from_directory('/content/drive/MyDrive/minor_proj/train',
                                                 target_size = (64, 64),
                                                 batch_size = 32,
                                                 class_mode = 'binary')

Found 200 images belonging to 2 classes.


In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow_from_directory('/content/drive/MyDrive/minor_proj/validation',
                                            target_size = (64, 64),
                                            batch_size = 32,
                                            class_mode = 'binary')

Found 129 images belonging to 2 classes.


In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

In [None]:
cnn = tf.keras.models.Sequential()
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[64, 64, 3]))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

cnn.add(tf.keras.layers.Flatten())

cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))



In [None]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 29, 29, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 32)        9248      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 32)          0         
_________________________________________________________________
flatten (Flatten)            (None, 1152)              0

In [None]:
cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

cnn.fit(x = training_set, validation_data = test_set, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7ffa2a619750>

In [None]:
label_map = (training_set.class_indices)
label_map

{'mask': 0, 'no_mask': 1}

In [None]:
cnn.save('/content/drive/MyDrive/minor_proj/cnn_model.h5')