In [2]:
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [3]:
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import cross_val_score

Preprocessing

Here made two lists and then using for loops appeded images which are now change into arrays of numbers and their labels.

Function relpath was used for taking the relative paths of both folder directly from the main folder.

In [5]:
lst = ["with_mask","without_mask"]

In [6]:
data = []
labels = []

In [7]:
for clas in lst:
    path = os.path.relpath(clas)

    for img in os.listdir(path):
    	imgage_path = os.path.join(path, img)

    	image = load_img(imgage_path, target_size=(224, 224))
    	image = img_to_array(image)
    	image = preprocess_input(image)
    	data.append(image)
    	labels.append(clas)
		




Encoding using label encoder.

In [9]:
lb = LabelEncoder()
labels = lb.fit_transform(labels)
print(labels)

[0 0 0 ... 1 1 1]


Changing these into numpy arrays as machine learning models only take them as inputs.

In [12]:
# excution time = around 10 - 20 seconds

data = np.array(data, dtype="float32")
lables = np.array(labels)

Reshaping/ flattening the array of shape (3833,224,224,3) into an array of shape (3833,15058)

In [13]:
arr1 = data.reshape(3833,150528)
print(arr1)

[[-0.45098037 -0.49019605 -0.44313723 ... -0.34117645 -0.54509807
  -0.64705884]
 [ 0.32549024  0.30196083  0.24705887 ...  0.11372554  0.07450986
   0.12156868]
 [-0.41176468 -0.40392154 -0.38823527 ... -0.7490196  -0.7490196
  -0.6862745 ]
 ...
 [-1.         -1.         -1.         ... -1.         -1.
  -1.        ]
 [ 0.67058825  0.6784314   0.70980394 ...  0.8666667   0.8352941
   0.96862745]
 [ 0.90588236  0.8980392   0.88235295 ...  0.58431375  0.81960785
   0.8039216 ]]


Making a dataframe for doing further process easily.

In [16]:
dataframe = pd.DataFrame(arr1)

In [17]:
dataframe.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,150518,150519,150520,150521,150522,150523,150524,150525,150526,150527
0,-0.45098,-0.490196,-0.443137,-0.45098,-0.490196,-0.443137,-0.45098,-0.490196,-0.443137,-0.396078,...,-0.654902,-0.341176,-0.545098,-0.647059,-0.341176,-0.545098,-0.647059,-0.341176,-0.545098,-0.647059
1,0.32549,0.301961,0.247059,0.32549,0.301961,0.247059,0.34902,0.32549,0.270588,0.34902,...,0.105882,0.098039,0.058824,0.105882,0.113726,0.07451,0.121569,0.113726,0.07451,0.121569
2,-0.411765,-0.403922,-0.388235,-0.411765,-0.403922,-0.388235,-0.521569,-0.513726,-0.498039,-0.521569,...,-0.662745,-0.72549,-0.72549,-0.662745,-0.74902,-0.74902,-0.686275,-0.74902,-0.74902,-0.686275
3,-0.929412,-0.945098,-0.921569,-0.929412,-0.945098,-0.921569,-0.921569,-0.937255,-0.913725,-0.945098,...,-0.913725,-0.976471,-0.976471,-0.913725,-0.976471,-0.976471,-0.898039,-0.976471,-0.976471,-0.898039
4,-0.654902,-0.615686,-0.584314,-0.654902,-0.615686,-0.584314,-0.662745,-0.623529,-0.592157,-0.662745,...,0.952941,0.913725,0.945098,0.937255,0.913725,0.945098,0.937255,0.913725,0.945098,0.937255


In [20]:
dataframe['labels'] = labels
dataframe

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,150519,150520,150521,150522,150523,150524,150525,150526,150527,labels
0,-0.450980,-0.490196,-0.443137,-0.450980,-0.490196,-0.443137,-0.450980,-0.490196,-0.443137,-0.396078,...,-0.341176,-0.545098,-0.647059,-0.341176,-0.545098,-0.647059,-0.341176,-0.545098,-0.647059,0
1,0.325490,0.301961,0.247059,0.325490,0.301961,0.247059,0.349020,0.325490,0.270588,0.349020,...,0.098039,0.058824,0.105882,0.113726,0.074510,0.121569,0.113726,0.074510,0.121569,0
2,-0.411765,-0.403922,-0.388235,-0.411765,-0.403922,-0.388235,-0.521569,-0.513726,-0.498039,-0.521569,...,-0.725490,-0.725490,-0.662745,-0.749020,-0.749020,-0.686275,-0.749020,-0.749020,-0.686275,0
3,-0.929412,-0.945098,-0.921569,-0.929412,-0.945098,-0.921569,-0.921569,-0.937255,-0.913725,-0.945098,...,-0.976471,-0.976471,-0.913725,-0.976471,-0.976471,-0.898039,-0.976471,-0.976471,-0.898039,0
4,-0.654902,-0.615686,-0.584314,-0.654902,-0.615686,-0.584314,-0.662745,-0.623529,-0.592157,-0.662745,...,0.913725,0.945098,0.937255,0.913725,0.945098,0.937255,0.913725,0.945098,0.937255,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3828,0.858824,0.882353,0.796078,1.000000,1.000000,0.945098,0.819608,0.827451,0.764706,0.960784,...,0.105882,0.129412,-0.113725,0.239216,0.301961,0.011765,0.262745,0.364706,0.019608,1
3829,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,...,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,1
3830,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,...,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,-1.000000,1
3831,0.670588,0.678431,0.709804,0.835294,0.843137,0.874510,0.654902,0.654902,0.670588,0.968627,...,-0.058824,-0.050980,-0.011765,0.607843,0.600000,0.741176,0.866667,0.835294,0.968627,1


In [21]:
x = dataframe.drop(['labels'],axis=1)
y = dataframe[['labels']]

Train test split

In [22]:
X_train,X_test,Y_train,Y_test = train_test_split(x,y,test_size=0.5,random_state=1)

In [23]:
X_train.shape,Y_train.shape,X_test.shape

((1916, 150528), (1916, 1), (1917, 150528))

Model fitting

Models used - 
                MLPClassifier
                RandomForestClassifier
                GaussianNB

Then calculated their accuracy and classification reports.

Multi layer perceptron - best classfier but high fitting time

In [24]:
mlp = MLPClassifier()

In [26]:
%%time

mlp.fit(X_train,np.ravel(Y_train))

Wall time: 5min 26s


MLPClassifier()

In [27]:
pred = mlp.predict(X_test)

In [28]:
from sklearn.metrics import accuracy_score

In [29]:
print(accuracy_score(Y_test,pred))

0.9269692227438706


In [30]:
print(classification_report(Y_test,pred))

              precision    recall  f1-score   support

           0       0.94      0.91      0.93       973
           1       0.91      0.94      0.93       944

    accuracy                           0.93      1917
   macro avg       0.93      0.93      0.93      1917
weighted avg       0.93      0.93      0.93      1917



Random forest - second best classifier

In [31]:
%%time
rf = RandomForestClassifier(n_estimators=100,n_jobs=-1)
rf.fit(X_train,np.ravel(Y_train))

Wall time: 7.53 s


RandomForestClassifier(n_jobs=-1)

In [32]:
rf_predict = rf.predict(X_test)

In [33]:
print(accuracy_score(Y_test,rf_predict))

0.9290558163797601


In [34]:
print(classification_report(Y_test,rf_predict))

              precision    recall  f1-score   support

           0       0.92      0.94      0.93       973
           1       0.94      0.92      0.93       944

    accuracy                           0.93      1917
   macro avg       0.93      0.93      0.93      1917
weighted avg       0.93      0.93      0.93      1917



Gaussian Naibe Bayes - worst out of three

In [35]:
gauss = GaussianNB()

In [37]:
%%time
gauss.fit(X_train,np.ravel(Y_train))

Wall time: 4.86 s


GaussianNB()

In [38]:
accuracy_gauss = cross_val_score(gauss,X_train,np.ravel(Y_train),scoring='accuracy',cv=10)

In [39]:
print(accuracy_gauss.mean())

0.810544829842932


In [40]:
print(accuracy_gauss)

[0.765625   0.859375   0.78645833 0.81770833 0.828125   0.80208333
 0.85340314 0.76963351 0.79581152 0.82722513]


In [41]:
pred_gauss = gauss.predict(X_test)

In [42]:
print(accuracy_score(Y_test,pred_gauss))

0.810641627543036


In [43]:
print(classification_report(Y_test,pred_gauss))

              precision    recall  f1-score   support

           0       0.81      0.82      0.82       973
           1       0.81      0.80      0.81       944

    accuracy                           0.81      1917
   macro avg       0.81      0.81      0.81      1917
weighted avg       0.81      0.81      0.81      1917



#DONE