In [None]:
import pickle
from tqdm import tqdm
import cv2
import os
from keras.preprocessing import image
import numpy as np
import pandas as pd

In [None]:
def get_image_value(path, dim): 
    '''This function will read an image and convert to a specified version and resize depending on which algorithm is being used.  If edge is specified as true, it will pass the img array to get_edged which returns a filtered version of the img'''
    img = image.load_img(path, target_size = dim)
    img = image.img_to_array(img)
    return img/255


def get_splits(dim): 
    
    #Train Set
    train_mask_path = [f'../FaceMaskDataset/Train/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Train/WithMask')]
    train_mask_label = [1 for i in range(len(train_mask_path))]
    train_nomask_path = [f'../FaceMaskDataset/Train/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Train/WithoutMask')]
    train_nomask_label = [0 for i in range(len(train_nomask_path))]
    
    train_paths = train_mask_path + train_nomask_path 
    train_labels = np.array(train_mask_label + train_nomask_label) 
    train_images = np.array([get_image_value(i, dim) for i in tqdm(train_paths, desc = 'Getting Train Images')])
    print('Train Value Counts')
    print(pd.Series(train_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    train_dict = dict(images = train_images, labels = train_labels)
    
    #Test Set
    test_mask_path = [f'../FaceMaskDataset/Test/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Test/WithMask')]
    test_mask_label = [1 for i in range(len(test_mask_path))]
    test_nomask_path = [f'../FaceMaskDataset/Test/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Test/WithoutMask')]
    test_nomask_label = [0 for i in range(len(test_nomask_path))]

    test_paths = test_mask_path + test_nomask_path
    test_labels = np.array(test_mask_label + test_nomask_label) 
    
    test_images = np.array([get_image_value(i, dim) for i in tqdm(test_paths, desc = 'Getting Test Images')])
    print('Test Value Counts')
    print(pd.Series(test_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    test_dict = dict(images = test_images, labels = test_labels)
    
    #Validation Set
    val_mask_path = [f'../FaceMaskDataset/Validation/WithMask/{i}' for i in os.listdir('../FaceMaskDataset/Validation/WithMask')]
    val_mask_label = [1 for i in range(len(val_mask_path))]
    val_nomask_path = [f'../FaceMaskDataset/Validation/WithoutMask/{i}' for i in os.listdir('../FaceMaskDataset/Validation/WithoutMask')]
    val_nomask_label = [0 for i in range(len(val_nomask_path))]
    
    val_paths = val_mask_path + val_nomask_path 
    val_labels = np.array(val_mask_label + val_nomask_label)
    
    val_images = np.array([get_image_value(i, dim) for i in tqdm(val_paths, desc = 'Getting Validation Images')])
    print('Validation Value Counts')
    print(pd.Series(val_labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    val_dict = dict(images = val_images, labels = val_labels) 
    
    return_dict = dict(train = train_dict, test = test_dict, validation = val_dict)
    
    print('Pickling The Data')
    pickle.dump(return_dict, open('../Pickles/TTSDict.p', 'wb'), protocol = 4)
    
    return return_dict
    

dim = (250,250)
return_dict = get_splits(dim)