# Preprocessing the dataset

In [1]:
#importing all the dependencies

import cv2
import os
import numpy as np

In [2]:
data_path = 'Face Mask Dataset'                      # Name of folder
train_data_path = os.path.join(data_path,'Train')    # train data path
test_data_path = os.path.join(data_path,'Test')      # test data path

train_categories = os.listdir(train_data_path)
test_categories = os.listdir(test_data_path)

#labesl are same in both Test asnd Train
labels = [i for i in range(len(train_categories))]

label_dict = dict(zip(train_categories,labels)) #empty dictionary

print(label_dict)
print(train_categories,test_categories)
print(labels)


{'WithMask': 0, 'WithoutMask': 1}
['WithMask', 'WithoutMask'] ['WithMask', 'WithoutMask']
[0, 1]


## Resizing all images to average image size i.e. 150 x 150 px

In [7]:
# 150 is average of all the images 
img_size = 150

#Training dataset
train_X = []
train_Y = []

# Testing dataset
test_X = []
test_Y = []

for category in train_categories:
    folder_path=os.path.join(train_data_path,category)
    img_names=os.listdir(folder_path)
        
    for img_name in img_names:
        img_path=os.path.join(folder_path,img_name)
        img=cv2.imread(img_path)

        try:
            gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)           
            #Coverting the image into gray scale
            resized=cv2.resize(gray,(img_size,img_size))
            #resizing the gray scale into 50x50, since we need a fixed common size for all the images in the dataset
            train_X.append(resized)
            if label_dict[category]==0:
                train_Y.append([0,1])
            else:
                train_Y.append([1,0])
            #appending the image and the label(categorized) into the list (dataset)

        except Exception as e:
            print('Exception:',e)
            #if any exception rasied, the exception will be printed here. And pass to the next image

for category in test_categories:
    folder_path=os.path.join(test_data_path,category)
    img_names=os.listdir(folder_path)
        
    for img_name in img_names:
        img_path=os.path.join(folder_path,img_name)
        img=cv2.imread(img_path)

        try:
            gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)           
            #Coverting the image into gray scale
            resized=cv2.resize(gray,(img_size,img_size))
            #resizing the gray scale into 50x50, since we need a fixed common size for all the images in the dataset
            test_X.append(resized)
            if label_dict[category]==0:
                test_Y.append([0,1])
            else:
                test_Y.append([1,0])
            #appending the image and the label(categorized) into the list (dataset)

        except Exception as e:
            print('Exception:',e)
            #if any exception rasied, the exception will be printed here. And pass to the next image


### Normalizing the dataset


In [8]:
train_X=np.array(train_X)/255.0
train_X=np.reshape(train_X,(train_X.shape[0],img_size,img_size,1))
train_Y=np.array(train_Y)

test_X=np.array(test_X)/255.0
test_X=np.reshape(test_X,(test_X.shape[0],img_size,img_size,1))
test_Y=np.array(test_Y)


# convert to binary
from keras.utils import np_utils

new_train_Y=np_utils.to_categorical(train_Y)
new_test_Y=np_utils.to_categorical(test_Y)

### Save the processed dataset into the project folder

In [9]:
np.save('train_X',train_X)
np.save('train_Y',train_Y)

np.save('test_X',test_X)
np.save('test_Y',test_Y)

In [10]:
train_X.shape

(10000, 150, 150, 1)

In [13]:
train_Y.shape

(10000, 2)

In [14]:
test_X.shape

(992, 150, 150, 1)

In [16]:
test_Y.shape

(992, 2)

<h1><center> -----Thank-you-----</center></h1>