In [1]:
import pickle
from tqdm import tqdm
import cv2
import os
import keras.utils as image
import numpy as np
import pandas as pd
from keras.utils import to_categorical
from sklearn.utils import shuffle as sk_shuffle
from sklearn.model_selection import train_test_split
from keras.applications.mobilenet_v2 import preprocess_input

def get_image_value(path, dim, bw, model_type):
    '''This function will read an imagea nd convert to a specified version and resize depending on which algorithm is being used'''
    img = image.load_img(path, target_size = dim)
    img = image.img_to_array(img)
    if bw == True:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img.reshape(img.shape[0], img.shape[1], 1)
    if model_type == 'mobilenet':
        img = preprocess_input(img)
        return img
    return img / 255

def get_emotion_classes(class_type):
    angry_paths = [f'EmotionDataset/{class_type}/angry/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/angry')]
    angry_labels = [0 for i in range(len(angry_paths))]

    happy_paths = [f'EmotionDataset/{class_type}/happy/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/happy')]
    happy_labels = [1 for i in range(len(happy_paths))]

    neutral_paths = [f'EmotionDataset/{class_type}/neutral/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/neutral')]
    neutral_labels = [2 for i in range(len(neutral_paths))]

    sad_paths = [f'EmotionDataset/{class_type}/sad/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/sad')]
    sad_labels = [3 for i in range(len(sad_paths))]

    disgust_paths = [f'EmotionDataset/{class_type}/disgust/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/disgust')]
    disgust_labels = [4 for i in range(len(disgust_paths))]

    fear_paths = [f'EmotionDataset/{class_type}/fear/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/fear')]
    fear_labels = [5 for i in range(len(fear_paths))]

    surprise_paths = [f'EmotionDataset/{class_type}/surprise/{i}' for i in os.listdir(f'EmotionDataset/{class_type}/surprise')]
    surprise_labels = [6 for i in range(len(surprise_paths))]

    labels = np.array(angry_labels + disgust_labels + fear_labels + happy_labels + neutral_labels + sad_labels + surprise_labels)

    print(f'{class_type.upper()} Value Count')
    print(pd.Series(labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    labels = to_categorical(labels)
    paths = np.array(angry_paths + disgust_paths + fear_paths + happy_paths + neutral_paths + sad_paths + surprise_paths)
    paths, labels = sk_shuffle(paths, labels)
    return paths, labels
# 0: angry || 1: happy || 2: neutral || 3: sad || 4: disgust || 5: fear || 6: surprise

def get_emotion_splits(dim, pick_name, model_type = 'Mobilenet', bw = False):

    # Train

    train_paths, train_labels = get_emotion_classes('train')
    test_paths, test_labels = get_emotion_classes('test')

    train_images = np.array([get_image_value(i, dim, bw, model_type) for i in tqdm(train_paths, desc = 'Getting Emotion Train Images')])
    test_images = np.array([get_image_value(i, dim, bw, model_type) for i in tqdm(test_paths, desc = 'Getting Emotion Test Images')])

    if model_type == 'Mobilenet' and bw == True:
        train_images = np.stack((train_images, ) * 3, axis = -1)
        test_images = np.stack((test_images, ) * 3, axis = -1)

    tts = (train_images, test_images, train_labels, test_labels)

    pickle.dump(tts, open(f'Pickles/TTSEmotion_{pick_name}.p', 'wb'), protocol = 4)
    print('Finished Pickling Emotions')

dim = (48, 48)
tts = get_emotion_splits(dim, pick_name = 'Mobilenet', bw = False)

2023-02-19 13:25:33.313402: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


TRAIN Value Count
1    7215
2    4965
3    4830
5    4097
0    3995
6    3171
4     436
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
TEST Value Count
1    1774
3    1247
2    1233
5    1024
0     958
6     831
4     111
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Emotion Train Images: 100%|██████████| 28709/28709 [00:04<00:00, 6093.67it/s]
Getting Emotion Test Images: 100%|██████████| 7178/7178 [00:01<00:00, 4108.43it/s]


Finished Pickling Emotions


In [2]:
def get_mask_classes(class_type):
    mask_paths = [f'FaceMaskDataset/{class_type}/WithMask/{i}' for i in os.listdir(f'FaceMaskDataset/{class_type}/WithMask')]
    mask_labels = [1 for i in range(len(mask_paths))]

    nomask_paths = [f'FaceMaskDataset/{class_type}/WithoutMask/{i}' for i in os.listdir(f'FaceMaskDataset/{class_type}/WithoutMask')]
    nomask_labels = [0 for i in range(len(nomask_paths))]

    labels = np.array(mask_labels + nomask_labels)
    print(f'{class_type.upper()} Value Counts')
    print(pd.Series(labels).value_counts())
    print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
    paths = np.array(mask_paths + nomask_paths)
    labels = to_categorical(labels)
    paths, labels = sk_shuffle(paths, labels)
    return paths, labels

def get_mask_splits(dim, pick_name = None, model_type = 'Mobilenet', bw = False):

    # Train Set
    train_paths, train_labels = get_mask_classes('Train')
    train_images = np.array([get_image_value(i, dim, bw, model_type) for i in tqdm(train_paths, desc = 'Getting Train Images')])
    train_dict = dict(images = train_images, labels = train_labels)

    # Test Set
    test_paths, test_labels = get_mask_classes('Test')
    test_images = np.array([get_image_value(i, dim, bw, model_type) for i in tqdm(test_paths, desc = 'Getting Test Images')])
    test_dict = dict(images = test_images, labels = test_labels)

    # Validation Set
    val_paths, val_labels = get_mask_classes('Validation')
    val_images = np.array([get_image_value(i, dim, bw, model_type) for i in tqdm(val_paths, desc = 'Getting Validation Images')])
    val_dict = dict(images = val_images, labels = val_labels)

    tts = train_images, test_images, train_labels, test_labels, val_images, val_labels

    print('Pickling The Data')
    pickle.dump(tts, open(f'Pickles/TTSMask_{pick_name}.p', 'wb'), protocol = 4)
    print('Finished Pickling')
    return tts

dim = (150, 150)
return_dict = get_mask_splits(dim, pick_name = 'Normal1', model_type = 'Normal', bw = False) 

TRAIN Value Counts
1    5000
0    5000
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Train Images: 100%|██████████| 10000/10000 [00:12<00:00, 810.91it/s]


TEST Value Counts
0    509
1    483
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Test Images: 100%|██████████| 992/992 [00:01<00:00, 826.66it/s]


VALIDATION Value Counts
1    400
0    400
dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Getting Validation Images: 100%|██████████| 800/800 [00:00<00:00, 880.04it/s]


Pickling The Data
Finished Pickling
