In [2]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import torch
from torch.nn import MSELoss

from model_builder import VanillaRNN
from engine import train
from data_setup import create_dataloaders

In [3]:
# Prepare paths
data_path = '../data/'
processed_data_path = os.path.join(data_path, 'processed')

# Read personal data from excel
personal_data = pd.read_excel(os.path.join(data_path, 'PersonalData.xlsx'))

# Read the data that is the result of the converted videos
data = pd.read_csv(os.path.join(processed_data_path, 'AllSquats.csv'))

# Merge personal and video data
data = pd.merge(data, personal_data, on='Id')

In [4]:
# Calculate the maximum load that was passed
max_load = data.loc[data['Lifted'] == 1, ['Id', 'Load']].groupby(by='Id', as_index=False).max()
max_load = max_load.rename(columns={'Load': 'MaxLoad'})
data = pd.merge(data, max_load, on='Id')

# Calculate what percentage of the maximum load is the current load
data['PercentageMaxLoad'] = data['Load'] / data['MaxLoad']

del data['MaxLoad']


# Get only lifted approaches
data = data.loc[data['Lifted'] == 1]

# Variables that aren't needed in the first run
to_drop = [
    'Id', 'Age', 'Height', 'Weight', 'PastInjuries', 'LastInjury', 'PainDuringTraining', 'SquatRecord',
    'BenchPressRecord', 'DeadliftRecord', 'PhysicalActivities', 'SetNumber', 'Load', 'Lifted', 'Timestamp']

data = data.drop(columns=to_drop)

# Categorical variables that need to be one hot encoded
to_one_hot = [
    'ProficiencyLevel', 'EquipmentAvailability', 'TrainingProgram', 'TrainingFrequency', 'CameraPosition']

dataframe = pd.get_dummies(data, columns=to_one_hot, dtype=int)

# Move the PercentageMaxLoad column to the end of the dataframe
percentage = dataframe.pop('PercentageMaxLoad')
dataframe['PercentageMaxLoad'] = percentage

In [5]:
# Get unique file IDs
file_ids = dataframe['FileId'].unique()

# Split the files into three lists in an 8:1:1 ratio
train_ids, ids_to_split = train_test_split(file_ids, test_size=0.2)

valid_ids, test_ids = train_test_split(ids_to_split, test_size=0.5)

# Put ids into dictionary
file_ids = {
    "train": train_ids,
    "validation": valid_ids,
    "test": test_ids}

In [6]:
# dataloaders
batch_size = 16
num_workers = 0
pin_memory = False

train_dataloader, valid_dataloader, test_dataloader = create_dataloaders(
    data=dataframe,
    file_ids=file_ids,
    batch_size=batch_size,
    num_workers=num_workers,
    pin_memory=pin_memory
)

# model
input_size = 78
hidden_size = 128
device = 'cuda' if torch.cuda.is_available() else 'cpu'
dtype = torch.float64

model = VanillaRNN(
    input_size=78,
    hidden_size=128,
    device=device,
    dtype=dtype
).to(device)

# optimizer, loss function
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

loss_fn = MSELoss()

In [7]:
n_epochs = 10

results = train(model, train_dataloader, valid_dataloader, optimizer, loss_fn, n_epochs, device)

 10%|█         | 1/10 [00:06<00:57,  6.39s/it]

Epoch: 1 | train loss: 0.0897 | validation loss: 0.0514


 20%|██        | 2/10 [00:10<00:42,  5.28s/it]

Epoch: 2 | train loss: 0.0571 | validation loss: 0.0480


 30%|███       | 3/10 [00:15<00:34,  4.92s/it]

Epoch: 3 | train loss: 0.0543 | validation loss: 0.0446


 40%|████      | 4/10 [00:19<00:28,  4.77s/it]

Epoch: 4 | train loss: 0.0528 | validation loss: 0.0421


 50%|█████     | 5/10 [00:24<00:23,  4.66s/it]

Epoch: 5 | train loss: 0.0476 | validation loss: 0.0404


 60%|██████    | 6/10 [00:28<00:18,  4.64s/it]

Epoch: 6 | train loss: 0.0536 | validation loss: 0.0466


 70%|███████   | 7/10 [00:34<00:14,  4.80s/it]

Epoch: 7 | train loss: 0.0482 | validation loss: 0.0478


 80%|████████  | 8/10 [00:38<00:09,  4.81s/it]

Epoch: 8 | train loss: 0.0476 | validation loss: 0.0394


 90%|█████████ | 9/10 [00:44<00:04,  4.93s/it]

Epoch: 9 | train loss: 0.0446 | validation loss: 0.0381


100%|██████████| 10/10 [00:49<00:00,  4.91s/it]

Epoch: 10 | train loss: 0.0479 | validation loss: 0.0378



