In [9]:
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import os
from tqdm import tqdm
import random
import pickle

# Providing paths of the dataset

In [10]:
notebook_path = os.path.abspath("face_mask_data.ipynb")
Dataset_path = os.path.join(os.path.dirname(notebook_path),'Dataset')
train_path = os.path.join(Dataset_path,'Train')
test_path = os.path.join(Dataset_path,'Test')
valid_path = os.path.join(Dataset_path,'Validation')

# Collecting the class names in an array

In [11]:
categories = []
IMG_SIZE = 227

for i in os.listdir(train_path):
    categories.append(i)
    
print(categories)

['Mask', 'Non Mask']


# Function to read image from path specified and convert it into an array

In [12]:
def createData(path):
    array = []
    
    for c in categories:
        class_num=[]
        path_name = os.path.join(path,c)
        class_num.append(categories.index(c))
        for img in tqdm(os.listdir(path_name)):
            try:
                img_array = cv.imread(os.path.join(path_name,img))
                img_resized = cv.resize(img_array,(IMG_SIZE,IMG_SIZE))
                array.append([img_resized,class_num])
            except Exception as e:
                pass
    return array

# Converting train,test and validation data into an array suing the function defined

In [13]:
train = createData(train_path)
test = createData(test_path)
valid = createData(valid_path)

100%|██████████| 300/300 [00:03<00:00, 81.91it/s] 
100%|██████████| 300/300 [00:00<00:00, 432.53it/s]
100%|██████████| 50/50 [00:00<00:00, 84.08it/s] 
100%|██████████| 50/50 [00:00<00:00, 82.19it/s]
100%|██████████| 153/153 [00:02<00:00, 58.88it/s] 
100%|██████████| 153/153 [00:01<00:00, 81.56it/s]


# Function to separate class names from the image

- The previous array had the image and the class names in a 2-dimensional array which was appended in anothr empty array. 
- Which created a 3-dimensional array. The inner most array had [image,classname] format for keeping the sequence of the image and the classes the belong to. 
- If kept separately the sequence would have lost due to shuffling. The function below is used to separate the images from their respective classnames. 

In [15]:
def dataLabelSeparation(data):
    X_data = []
    y_label = []
    
    random.shuffle(data) #Shuffling the data before separating the labels/classnames
    
    for img,label in data:
        X_data.append(img)
        y_label.append(label)
        
    X_data = np.array(X_data).astype('float32')
    y_label = np.array(y_label)
    
    return X_data,y_label

# Separating the image and classname for train,test and validation data

In [16]:
X_train,y_train = dataLabelSeparation(train)
X_test,y_test = dataLabelSeparation(test)
X_valid,y_valid = dataLabelSeparation(valid)

# Storing the train, test and validation data and labels in a pickle file

In [17]:
pickle_out = open("./resources/data/face_mask/X_train.pickle","wb")
pickle.dump(X_train, pickle_out)
pickle_out.close()

pickle_out = open("./resources/data/face_mask/y_train.pickle","wb")
pickle.dump(y_train, pickle_out)
pickle_out.close()

pickle_out = open("./resources/data/face_mask/X_valid.pickle","wb")
pickle.dump(X_valid, pickle_out)
pickle_out.close()

pickle_out = open("./resources/data/face_mask/y_valid.pickle","wb")
pickle.dump(y_valid, pickle_out)
pickle_out.close()

pickle_out = open("./resources/data/face_mask/X_test.pickle","wb")
pickle.dump(X_test, pickle_out)
pickle_out.close()

pickle_out = open("./resources/data/face_mask/y_test.pickle","wb")
pickle.dump(y_test, pickle_out)
pickle_out.close()