In [5]:
import pickle
from tqdm import tqdm
import cv2
import os
from keras.preprocessing import image
import numpy as np
import pandas as pd
from keras.utils import to_categorical
from sklearn.utils import shuffle as sk_shuffle
from sklearn.model_selection import train_test_split

In [None]:
#GITHUB
def get_image_value(path, dim, bw): 
    '''This function will read an image and convert to a specified version and resize depending on which algorithm is being used.  If edge is specified as true, it will pass the img array to get_edged which returns a filtered version of the img'''
    img = image.load_img(path, target_size = dim)
    img = image.img_to_array(img)
    if bw == True: 
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img.reshape(img.shape[0], img.shape[1],1)
    return img/255


def get_splits(dim, pick_name, bw = False): 
    
    mask_paths = [f'../GithubData/with_mask/{i}' for i in os.listdir('../GithubData/with_mask')]
    mask_labels = [1 for i in range(len(mask_paths))]
    mask_images = [get_image_value(i, dim, bw) for i in tqdm(mask_paths, desc = 'Getting Images With Mask')]
    
    nomask_paths = [f'../GithubData/without_mask/{i}' for i in os.listdir('../GithubData/without_mask')]
    nomask_labels = [0 for i in range(len(nomask_paths))]
    nomask_images = [get_image_value(i, dim, bw) for i in tqdm(nomask_paths, desc = 'Getting Images With No Mask')]

    image_list = np.array(nomask_images + mask_images)
    labels = np.array(nomask_labels + mask_labels)
    
    x_train, x_test, y_train, y_test = train_test_split(image_list, labels, stratify = labels, random_state = 10, train_size = .8)
    
    print('Train Value Counts')
    print(pd.Series(y_train).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    print('Test Value Counts')
    print(pd.Series(y_test).value_counts())
    
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)    
    
    tts = (x_train, x_test, y_train, y_test)
    
    pickle.dump(tts, open(f'../Pickles/TTS_{pick_name}.p', 'wb'), protocol = 4)
    
    return tts
    

dim = (250,250)
return_dict = get_splits(dim, pick_name = 'GithubNormal', bw = False )

Getting Images With Mask:  80%|█████████████████████████████████████████████████████████████████████████████████████████████▊                        | 1523/1915 [00:05<00:03, 122.22it/s]

In [4]:
def get_image_value(path, dim, bw): 
    '''This function will read an image and convert to a specified version and resize depending on which algorithm is being used.  If edge is specified as true, it will pass the img array to get_edged which returns a filtered version of the img'''
    img = image.load_img(path, target_size = dim)
    img = image.img_to_array(img)
    if bw == True: 
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img.reshape(img.shape[0], img.shape[1],1)
    return img/255


def get_splits(dim, pick_name, bw = False): 
    
    #Train Set
    train_mask_path = [f'../FaceMaskDataset/Train/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Train/WithMask')]
    train_mask_label = [1 for i in range(len(train_mask_path))]
    train_nomask_path = [f'../FaceMaskDataset/Train/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Train/WithoutMask')]
    train_nomask_label = [0 for i in range(len(train_nomask_path))]

    train_paths = train_mask_path + train_nomask_path 
    train_labels = np.array(train_mask_label + train_nomask_label) 
    
    print('Train Value Counts')
    print(pd.Series(train_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    train_labels = to_categorical(train_labels)
    train_images = np.array([get_image_value(i, dim, bw) for i in tqdm(train_paths, desc = 'Getting Train Images')])
    train_images, train_labels = sk_shuffle(train_images, train_labels)
    train_dict = dict(images = train_images, labels = train_labels)
    
    #Test Set
    test_mask_path = [f'../FaceMaskDataset/Test/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Test/WithMask')]
    test_mask_label = [1 for i in range(len(test_mask_path))]
    test_nomask_path = [f'../FaceMaskDataset/Test/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Test/WithoutMask')]
    test_nomask_label = [0 for i in range(len(test_nomask_path))]

    test_paths = test_mask_path + test_nomask_path
    test_labels = np.array(test_mask_label + test_nomask_label) 
    print('Test Value Counts')
    print(pd.Series(test_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    test_labels = to_categorical(test_labels)
    test_images = np.array([get_image_value(i, dim, bw) for i in tqdm(test_paths, desc = 'Getting Test Images')])
    test_images, test_labels = sk_shuffle(test_images, test_labels)
    test_dict = dict(images = test_images, labels = test_labels)
    
    #Validation Set
    val_mask_path = [f'../FaceMaskDataset/Validation/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Validation/WithMask')]
    val_mask_label = [1 for i in range(len(val_mask_path))]
    val_nomask_path = [f'../FaceMaskDataset/Validation/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Validation/WithoutMask')]
    val_nomask_label = [0 for i in range(len(val_nomask_path))]
    
    val_paths = val_mask_path + val_nomask_path 
    val_labels = np.array(val_mask_label + val_nomask_label)
    print('Validation Value Counts')
    print(pd.Series(val_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    val_labels = to_categorical(val_labels)
    val_images = np.array([get_image_value(i, dim, bw) for i in tqdm(val_paths, desc = 'Getting Validation Images')])
    val_images, val_labels = sk_shuffle(val_images, val_labels)
    val_dict = dict(images = val_images, labels = val_labels) 
    
    return_dict = dict(train = train_dict, test = test_dict, validation = val_dict)
    
    print('Pickling The Data')
    pickle.dump(return_dict, open(f'../Pickles/TTSDict_{pick_name}.p', 'wb'), protocol = 4)
    
    return return_dict
    

dim = (250,250)
return_dict = get_splits(dim, pick_name = 'BW', bw = True )

Getting Train Images:   1%|█▌                                                                                                                       | 124/10000 [00:00<00:07, 1239.72it/s]

Train Value Counts
1    5000
0    5000
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Train Images: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:12<00:00, 792.69it/s]
Getting Test Images:   9%|███████████▎                                                                                                                  | 89/992 [00:00<00:01, 889.79it/s]

Test Value Counts
0    509
1    483
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Test Images: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 992/992 [00:01<00:00, 866.18it/s]
Getting Validation Images:  12%|██████████████▌                                                                                                         | 97/800 [00:00<00:00, 969.78it/s]

Validation Value Counts
1    400
0    400
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Validation Images: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 800/800 [00:00<00:00, 876.03it/s]


Pickling The Data


In [6]:
test_img = return_dict['train']['images'][0]
print(test_img.shape)
cv2.imshow('Test', test_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

(250, 250, 1)
