## Preliminaries
Split image dataset into training and testing segments

In [6]:
from shutil import copyfile
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split

## Data Processing

Choose ten action classes from the total 40 and split the resulting dataset into training and testing segments.

In [7]:
# Read in csv with image info
images_info = pd.read_csv("../Resources/image_info.csv")

In [8]:
# Build list of ten action classes
actions_df = pd.DataFrame(images_info.groupby(['Action']).count())
actions_df = actions_df.sort_values('Filename', ascending=False)
actions_list = actions_df['Filename'].index.values[0:10]

In [9]:
# Train and test split
X = images_info['Filename'].values
y = images_info['Action ID'].values

X_train_n, X_test_n, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

The following function copies images from the main directory into training and testing directories

In [None]:
def copy_images_to_dir(i_df, X_tr, X_te, a_list, num_actions):

    X_train = []
    X_test = []

    for index, row in i_df.iterrows():
        
        if row['Action'] in a_list: 
            # Load image
            img_name = row['Filename']
            img_path = f"../Resources/new/{img_name}"

            # Directory to save images 
            base_path = f"../Output/train_test_split/{num_actions}/images"

            # If image is a training sample, copy to training folder
            if img_name in X_tr.tolist():
                X_train.append(img_name)
                des_path = f"{base_path}/train/{row['Action']}"
                des = f"{des_path}/{img_name}"

                try:
                    os.mkdir(des_path)
                except OSError:
                    pass

            # If image is a testing sample, copy to testing folder
            if img_name in X_te.tolist():
                X_test.append(img_name)
                des_path = f"{base_path}/test/{row['Action']}"
                des = f"{des_path}/{img_name}"

                try:
                    os.mkdir(des_path)
                except OSError:
                    pass

            copyfile(img_path, des)
        
    # Keep a record of training and testing images
    training_df = pd.DataFrame({"Filename": X_train})
    testing_df = pd.DataFrame({"Filename": X_test})
        
    training_df.to_csv(f"../Output/train_test_split/{num_actions}/training_set.csv")
    testing_df.to_csv(f"../Output/train_test_split/{num_actions}/testing_set.csv")

## Function call

In [None]:
copy_images_to_dir(images_info, X_train_n, X_test_n, actions_list, "ten")