In [20]:
import os
import json
import random
from google.colab import drive

### Dividing data into train test and val

In [21]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Testing**

In [22]:
%cd /content/drive/Othercomputers/PFE/PFE 2023/data/Video_Dataset
%ls

/content/drive/Othercomputers/PFE/PFE 2023/data/Video_Dataset
[0m[01;34mbad_back_round[0m/  [01;34mbad_head[0m/         [01;34mbad_innner_thigh[0m/  [01;34mbad_toe[0m/  Readme.md
[01;34mbad_back_warp[0m/   [01;34mbad_inner_thigh[0m/  [01;34mbad_shallow[0m/       [01;34mgood[0m/


**Setup PATHS** 

## Data loading

In [23]:
# Define Path to the Dataset folder
BASE_PATH = '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1'
SUB_CLASSES = os.listdir(BASE_PATH)
print(SUB_CLASSES)

['bad_head', 'good', 'bad_inner_thigh', 'bad_shallow', 'bad_back_round', 'bad_back_warp', 'bad_toe']


In [16]:
data = {}
for label, folder_name in enumerate(SUB_CLASSES):
    folder = os.path.join(BASE_PATH, folder_name)
    files = os.listdir(folder)
    file_paths = [os.path.join(folder, file) for file in files if file.endswith(".json")]
    data[label] = {"file_paths":file_paths}
print(data)

{0: {'file_paths': ['/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000023.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000024.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000025.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000026.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000027.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000028.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0922_squat_000032.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0922_squat_000033.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints

In [19]:
print(data.keys())
print(data.values())

dict_keys([0, 1, 2, 3, 4, 5, 6])
dict_values([{'file_paths': ['/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000023.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000024.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000025.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000026.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000027.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0918_squat_000028.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0922_squat_000032.json', '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/0922_squat_000033.json', '/content/drive/Othercompu

In [30]:
def create_train_val_test_data(folders_dict, train_pct, val_pct):
    assert train_pct +val_pct <= 1.0, "Sum of train and val percentages should not exceed 1.0"

    train_data = []
    val_data = []
    test_data = []

    for label, folder in enumerate(SUB_CLASSES):
        print(folder)
        file_paths = folders_dict[label]["file_paths"]
        random.shuffle(file_paths)
        num_files = len(file_paths)

        train_end = int(num_files * train_pct)
        val_end = int(num_files * (train_pct + val_pct))

        train_files = file_paths[:train_end]
        val_files = file_paths[train_end:val_end]
        test_files = file_paths[val_end:]

        for file_path in train_files:
            train_data.append({"label": label, "file_path": file_path})

        for file_path in val_files:
            val_data.append({"label": label, "file_path": file_path})

        for file_path in test_files:
            test_data.append({"label": label, "file_path": file_path})

    random.shuffle(train_data)
    random.shuffle(val_data)
    random.shuffle(test_data)

    return  train_data,  val_data,  test_data


In [31]:
train_frac = 0.7
val_frac = 0.15
test_frac = 0.15

train_data, val_data, test_data = create_train_val_test_data(data, train_frac, val_frac)
print(len(train_data))
print(train_data)

print(len(val_data))
print(val_data)
print(len(test_data))
print(test_data)

bad_head
good
bad_inner_thigh
bad_shallow
bad_back_round
bad_back_warp
bad_toe
1343
[{'label': 0, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_head/1003_squat_000115.json'}, {'label': 6, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_toe/0918_squat_000041.json'}, {'label': 1, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/good/1022_squat_000200.json'}, {'label': 3, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_shallow/1025_squat_000019.json'}, {'label': 2, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_inner_thigh/1022_squat_000145.json'}, {'label': 2, 'file_path': '/content/drive/Othercomputers/PFE/PFE 2023/data/Processed/Keypoints_Dataset_V1/bad_inner_thigh/1022_squat_000048.json'}, {'label': 2, 'file_path': '/content/drive/Othercomputers/

In [32]:
# Save train data
train_file_path = os.path.join(BASE_PATH, "train.json")
with open(train_file_path, 'w') as f:
    json.dump(train_data, f)

# Save test data
test_file_path = os.path.join(BASE_PATH, "test.json")
with open(test_file_path, 'w') as f:
    json.dump(test_data, f)

# Save validation data
val_file_path = os.path.join(BASE_PATH, "val.json")
with open(val_file_path, 'w') as f:
    json.dump(val_data, f)
