In [None]:
import os
import glob

import cv2
import numpy as np
import pandas as pd

In [2]:
!pwd

/data/cyan/AIDA3/cogmd/video_models/cv_models_code/ViViT


In [3]:
def custom_pad_image(image, target_size):
    """
    Pads an image to a specified size.

    Args:
        image: The image to be padded (numpy array).
        target_size: The desired size of the padded image (tuple of integers (height, width)).

    Returns:
        The padded image (numpy array).
    """

    # Get the original image shape
    h, w = image.shape[:2]
    old_image_height, old_image_width, channels = image.shape

    # Calculate the amount of padding needed for each dimension
    new_image_width = target_size[0]
    new_image_height = target_size[1]
    color = (0,0,0)
    result = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)

    # compute center offset
    x_center = (new_image_width - old_image_width) // 2
    y_center = (new_image_height - old_image_height) // 2

    # copy img image into center of result image
    result[y_center:y_center+old_image_height,
        x_center:x_center+old_image_width] = image

    return result

In [4]:
def split_given_size(a, size, axis):
    return np.split(a, np.arange(size,len(a),size))

COGNITIVE LOAD

In [5]:
df = pd.read_csv("../label_generation/cognitive_load_labels.csv")
# template = np.zeros((2, 3, 224, 224))

In [6]:
def save_dataset(num_frames, folder, pad):
    for user in os.listdir("../../user_data/"):
        for task_name in os.listdir("../../user_data/" + user + "/Video/Task 1/Facial Features/"):
            if task_name == ".DS_Store":
                continue
            to_save = task_name if task_name[-1] != "y" else task_name[:-10]
            c1 = df["User ID"] == int(user)
            c2 = df["Task ID"] == task_name
            filtered_df = df[c1 & c2]
            if len(filtered_df) == 0:
                print(f"User {user} on task {task_name} is not available in the csv file")
            else:
                print(f"Creating data file for user {user}, task {task_name}")
                label = filtered_df['Labels'].iloc[0]
                task_data = []
                video_path = "../../simple_datasets/Task 1/" + folder + "/" + user + "_" + to_save
                for frame in sorted(os.listdir("../../user_data/" + user + "/Video/Task 1/Facial Features/" + task_name + "/" + task_name + "_aligned/")):
                    image_path = "../../user_data/" + user + "/Video/Task 1/Facial Features/" + task_name + "/" + task_name + "_aligned/" + frame
                    image = cv2.imread(image_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    print(image_path)
                    if pad:
                        image = custom_pad_image(image, target_size=(224, 224))
                    numpy_data = np.asarray(image).astype(np.float32)
                    numpy_data = numpy_data.transpose(2, 1, 0)
                    numpy_data = numpy_data / 255
                    task_data.append(numpy_data)
                task_data = np.array(task_data)
                # print(task_data.shape)
                chunks = split_given_size(a=task_data, size=32, axis=0)
                for counter, chunk in enumerate(chunks):
                    # chunk = np.concatenate((chunk, template), axis=0)
                    if chunk.shape[0] != num_frames:
                        print(f"Skipping chunk with {chunk.shape[0]} frames")
                        continue
                    np.savez(video_path + "_chunk_" + str(counter), video=chunk, label=label)

In [None]:
save_dataset(32, "video_dataset_CL_112_32frames", False)

SITUATION AWARENESS

In [5]:
df = pd.read_csv("../label_generation/situation_awareness_labels.csv")
# template = np.zeros((2, 3, 224, 224))

In [6]:
def save_dataset_SA(num_frames, folder, pad):
    for user in os.listdir("../../user_data/"):
        for task_name in os.listdir("../../user_data/" + user + "/Video/Task 1/Facial Features/"):
            if task_name == ".DS_Store":
                continue
            to_save = task_name if task_name[-1] != "y" else task_name[:-10]
            file = f"./{user}/{task_name}.json"
            c = df["name"] == file
            filtered_df = df[c]
            if len(filtered_df) == 0:
                print(f"User {user} on task {task_name} is not available in the csv file")
                continue
            else:
                print(f"Creating data file for user {user}, task {task_name}")
                label = filtered_df['labels'].iloc[0]
                task_data = []
                video_path = "../../simple_datasets/Task 1/" + folder + "/" + user + "_" + to_save
                for frame in sorted(os.listdir("../../user_data/" + user + "/Video/Task 1/Facial Features/" + task_name + "/" + task_name + "_aligned/")):
                    image_path = "../../user_data/" + user + "/Video/Task 1/Facial Features/" + task_name + "/" + task_name + "_aligned/" + frame
                    image = cv2.imread(image_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    print(image_path)
                    if pad:
                        image = custom_pad_image(image, target_size=(224, 224))
                    numpy_data = np.asarray(image).astype(np.float32)
                    numpy_data = numpy_data.transpose(2, 1, 0)
                    numpy_data = numpy_data / 255
                    task_data.append(numpy_data)
                task_data = np.array(task_data)
                print(task_data.shape)
                chunks = split_given_size(a=task_data, size=32, axis=0)
                for counter, chunk in enumerate(chunks):
                    # chunk = np.concatenate((chunk, template), axis=0)
                    if chunk.shape[0] != num_frames:
                        print(f"Skipping chunk with {chunk.shape[0]} frames")
                        continue
                    np.savez(video_path + "_chunk_" + str(counter), video=chunk, label=label)

In [None]:
save_dataset_SA(32, "video_dataset_SA_112_32frames", False)

CODE TO CREATE SEPARATE FOLDS FOR TRAINING, TESTING, AND STORE PATHS IN TXT FILES

In [None]:
paths = glob.glob("../../../video_dataset_CL_112_32frames/*.npz")            # path to the newly created simple dataset folder

In [4]:
%cd ../../../video_dataset_224_32frames/                                              # cd to that folder
%mkdir training
%mv * training
%mkdir testing
%mkdir paths

/data/cyan/AIDA3/cogmd/video_dataset_224_32frames
mv: cannot move 'training' to a subdirectory of itself, 'training/training'


In [5]:
%cd ./training
%mkdir fold_0
%mkdir fold_1
%mkdir fold_2
%mkdir fold_3
%mkdir fold_4
%mkdir fold_5
%mkdir fold_6

/data/cyan/AIDA3/cogmd/video_dataset_224_32frames/training


In [6]:
%mv *8708506757* ./fold_0 
%mv *3485568572* ./fold_0
%mv *4795940856* ./fold_0

In [7]:
%mv *2058939492* ./fold_1
%mv *4217429651* ./fold_1
%mv *2999828357* ./fold_1

In [8]:
%mv *9793764153* ./fold_2
%mv *2651031016* ./fold_2
%mv *1008719828* ./fold_2

In [9]:
%mv *5843924292* ./fold_3
%mv *2325724317* ./fold_3
%mv *3955372865* ./fold_3

In [10]:
%mv *4741106167* ./fold_4
%mv *1566954358* ./fold_4
%mv *8346835623* ./fold_4

In [11]:
%mv *7786511601* ./fold_5
%mv *9115601756* ./fold_5
%mv *7538467423* ./fold_5

In [12]:
%mv *5012461204* ./fold_6
%mv *3437429070* ./fold_6
%mv *7799738638* ./fold_6

In [13]:
%mv *4830336371* ../testing
%mv *1691200944* ../testing

In [2]:
for fold in range(7):
    fold_paths = glob.glob(f"../../../video_dataset_SA_224_32frames/training/fold_{fold}/*.npz")     # path to the dataset files
    with open(f"./fold_{fold}_paths.txt", "w") as f:
        for train_path in fold_paths:
            f.write(train_path + "\n")

In [3]:
test_paths = glob.glob(f"../../../video_dataset_SA_224_32frames/testing/*.npz")                      # path to the dataset files
with open("./test_paths.txt", "w") as f:
    for test_path in test_paths:
        f.write(test_path + "\n")