In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import shutil
import random

In [2]:
import os
import sys

isColab = "google.colab" in sys.modules
# this also works:
# isColab = "COLAB_GPU" in os.environ

if isColab:
    os.environ["SDL_VIDEODRIVER"] = "dummy"
    from google.colab import drive
    drive.mount("/content/drive", force_remount=True)

    colab_path = ("/content/drive/My Drive")

Mounted at /content/drive


In [3]:
angry = "/content/drive/My Drive/DL2470 Final Project Group/data/angry"
drowsy = "/content/drive/My Drive/DL2470 Final Project Group/data/drowsy"
drunk = "/content/drive/My Drive/DL2470 Final Project Group/data/drunk"
pain = "/content/drive/My Drive/DL2470 Final Project Group/data/pain"
neutral = "/content/drive/My Drive/DL2470 Final Project Group/data/neutral"

In [4]:
train_precentage = 0.8
test_precentage = 0.2

class1_precentage = 0.6
class0_precentage = 0.4

nums_images = 10000

nums_images_train = nums_images * train_precentage
nums_images_test = nums_images * test_precentage

nums_images_train_1 = int(nums_images * train_precentage * class1_precentage)
nums_images_train_0 = int(nums_images * train_precentage * class0_precentage)

nums_images_test_1 = int(nums_images * test_precentage  * class1_precentage)
nums_images_test_0 = int(nums_images * test_precentage * class0_precentage)

nums_images_per_cat_train = int(nums_images_train_1/4)
nums_images_per_cat_test = int(nums_images_test_1/4)

In [5]:
print("Numbers of images in Train: ", nums_images_train)
print("Numbers of images in Test: ", nums_images_test )
print("-------------------------------------------------------------")
print("Numbers of images for each category in Train: ", nums_images_per_cat_train)
print("Numbers of images for each category in Test: ", nums_images_per_cat_test)
print("-------------------------------------------------------------")
print("Numbers of images for neutral category in Train: ", nums_images_train_0)
print("Numbers of images for neutral category in Test: ", nums_images_test_0)

Numbers of images in Train:  8000.0
Numbers of images in Test:  2000.0
-------------------------------------------------------------
Numbers of images for each category in Train:  1200
Numbers of images for each category in Test:  300
-------------------------------------------------------------
Numbers of images for neutral category in Train:  3200
Numbers of images for neutral category in Test:  800


In [6]:
# List directories in the specified path to confirm their existence
try:
    base_path = "/content/drive/My Drive/DL2470 Final Project Group/data"
    directories = os.listdir(base_path)
    print("Directories in the base path:", directories)
except FileNotFoundError as e:
    print(f"Failed to list directories in the base path due to: {e}")


Directories in the base path: ['neutral', 'drunk', 'angry', 'pain', 'drowsy', 'train', 'test']


In [None]:
# Define paths to directories
base_path = "/content/drive/My Drive/DL2470 Final Project Group/data"
categories = ["angry", "drowsy", "drunk", "pain"]
neutral_path = os.path.join(base_path, "neutral")

# Create destination folders within the data directory
train_abnormal_path = os.path.join(base_path, "train/abnormal")
test_abnormal_path = os.path.join(base_path, "test/abnormal")
train_normal_path = os.path.join(base_path, "train/normal")
test_normal_path = os.path.join(base_path, "test/normal")

os.makedirs(train_abnormal_path, exist_ok=True)
os.makedirs(test_abnormal_path, exist_ok=True)
os.makedirs(train_normal_path, exist_ok=True)
os.makedirs(test_normal_path, exist_ok=True)

# Function to randomly select and copy images
def select_and_copy_images(source_folder, num_select_train, num_select_test, train_dest_folder, test_dest_folder):
    files = os.listdir(source_folder)
    random.shuffle(files)
    selected_train = files[:num_select_train]
    selected_test = files[num_select_train:num_select_train + num_select_test]

    for file in selected_train:
        shutil.copy(os.path.join(source_folder, file), os.path.join(train_dest_folder, file))
    for file in selected_test:
        shutil.copy(os.path.join(source_folder, file), os.path.join(test_dest_folder, file))


# Processing 'abnormal' categories
for category in categories:
    category_path = os.path.join(base_path, category)
    select_and_copy_images(category_path, nums_images_per_cat_train, nums_images_per_cat_test, train_abnormal_path, test_abnormal_path)

# Adjust these numbers if 'neutral' should also follow the same pattern
select_and_copy_images(neutral_path, nums_images_train_0, nums_images_test_0, train_normal_path, test_normal_path)


In [None]:
def count_files_in_directory(directory):
    """Counts the number of files in the specified directory."""
    try:
        files = os.listdir(directory)
        print(f"Total files in {directory}: {len(files)}")
    except FileNotFoundError:
        print(f"Directory not found: {directory}")

# Define paths to the directories
base_path = "/content/drive/My Drive/DL2470 Final Project Group/data"
train_abnormal_path = os.path.join(base_path, "train/abnormal")
train_normal_path = os.path.join(base_path, "train/normal")
test_abnormal_path = os.path.join(base_path, "test/abnormal")
test_normal_path = os.path.join(base_path, "test/normal")

# Print out the number of images in each directory
count_files_in_directory(train_abnormal_path)
count_files_in_directory(train_normal_path)
count_files_in_directory(test_abnormal_path)
count_files_in_directory(test_normal_path)


Total files in /content/drive/My Drive/DL2470 Final Project Group/data/train/abnormal: 4800
Total files in /content/drive/My Drive/DL2470 Final Project Group/data/train/normal: 3200
Total files in /content/drive/My Drive/DL2470 Final Project Group/data/test/abnormal: 1200
Total files in /content/drive/My Drive/DL2470 Final Project Group/data/test/normal: 800


In [None]:
# Paths to train and test directories
base_path = "/content/drive/My Drive/DL2470 Final Project Group/data"
train_dir = os.path.join(base_path, "train")
test_dir = os.path.join(base_path, "test")

# Create an ImageDataGenerator for training data with augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create an ImageDataGenerator for test data (usually without augmentation)
test_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize pixel values
)


In [None]:
# Set parameters for image input
batch_size = 64
image_size = (40, 40)  # Adjust depending on your model requirements

# Prepare training data generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'  # or 'categorical' if you have more than two classes
)

# Prepare test data generator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    class_mode='binary'  # or 'categorical'
)

Found 8000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
