In [1]:
import os
import torch
import numpy as np
import pandas as pd

from mediapipe import solutions
from sklearn.model_selection import train_test_split

from video_converter import Video2DataFrame
from custom_pose_landmarks import CustomPoseLandmark

In [2]:
# Selected values of pose landmarks corresponding to PoseLandmark class from MediaPipe library
values = [0, 11, 12, 13, 14, 15, 16, 19, 20, 23, 24, 25, 26, 27, 28, 31, 32]

# Custom pose landmark names and their connections
landmarks = {
    'THORAX': ['NOSE'],
    'PELVIS': ['LEFT_HIP', 'RIGHT_HIP'],}

# MediaPipe solutions
mp_drawing = solutions.drawing_utils
mp_pose = solutions.pose

custom_pose = CustomPoseLandmark(mp_pose, values, landmarks)
conv = Video2DataFrame(mp_pose, mp_drawing, custom_pose)

In [3]:
# Prepare paths
main_path = '../data/raw/'
squat_path = os.path.join(main_path, 'squat')
personal_data_path = os.path.join(main_path, 'PersonalData.xlsx')

# Read personal data from excel
personal_data = pd.read_excel(personal_data_path)

In [9]:
# Convert videos to dataframe
df = conv.get_dataframe(
    squat_path,
    detection=0.9,
    tracking=0.9)

Converting 001_01_05_01_040_1_C file to dataframe...
Converting 001_01_05_01_040_1_L file to dataframe...
Converting 001_01_05_02_040_1_C file to dataframe...
Converting 001_01_05_02_040_1_L file to dataframe...
Converting 001_01_05_03_040_1_C file to dataframe...
Converting 001_01_05_03_040_1_L file to dataframe...
Converting 001_01_05_03_040_1_R file to dataframe...
Converting 001_01_05_04_040_1_C file to dataframe...
Converting 001_01_05_04_040_1_L file to dataframe...
Converting 001_01_05_04_040_1_R file to dataframe...
Converting 001_01_05_05_040_1_C file to dataframe...
Converting 001_01_05_05_040_1_L file to dataframe...
Converting 001_01_05_05_040_1_R file to dataframe...
Converting 001_02_03_01_070_1_C file to dataframe...
Converting 001_02_03_01_070_1_L file to dataframe...
Converting 001_02_03_01_070_1_R file to dataframe...
Converting 001_02_03_02_070_1_C file to dataframe...
Converting 001_02_03_02_070_1_L file to dataframe...
Converting 001_02_03_02_070_1_R file to datafr

In [None]:
# df.to_csv('allsquats.csv')

In [4]:
df = pd.read_csv('allsquats.csv', index_col=[0])

In [5]:
# Merge personal and video data
data = pd.merge(df, personal_data, on='Id')

# Calculate the maximum load that was passed
max_load = data.loc[data['Lifted'] == 1, ['Id', 'Load']].groupby(by='Id', as_index=False).max()
max_load = max_load.rename(columns={'Load': 'MaxLoad'})
data = pd.merge(data, max_load, on='Id')

# Calculate what percentage of the maximum load is the current load
data['PercentageMaxLoad'] = data['Load'] / data['MaxLoad']

del data['MaxLoad']

In [6]:
# Get only lifted approaches
data = data.loc[data['Lifted'] == 1]

# Variables that aren't needed in the first run
to_drop = [
    'Id', 'Age', 'Height', 'Weight', 'PastInjuries', 'LastInjury', 'PainDuringTraining', 'SquatRecord',
    'BenchPressRecord', 'DeadliftRecord', 'PhysicalActivities', 'SetNumber', 'Load', 'Lifted']

data = data.drop(columns=to_drop)

# Categorical variables that need to be one hot encoded
to_one_hot = [
    'ProficiencyLevel', 'EquipmentAvailability', 'TrainingProgram', 'TrainingFrequency', 'CameraPosition']

dataframe = pd.get_dummies(data, columns=to_one_hot, dtype=int)

# Move the PercentageMaxLoad column to the end of the dataframe
percentage = dataframe.pop('PercentageMaxLoad')
dataframe['PercentageMaxLoad'] = percentage

In [22]:
# dataframe.to_csv('test_dataframe.csv', index=False)

# dataframe = pd.read_csv('test_dataframe.csv')

In [7]:
# Get unique file IDs
FileIds = dataframe['FileId'].unique()

# Split the files into three lists in an 8:1:1 ratio
train, to_split = train_test_split(FileIds, test_size=0.2, random_state=42)

valid, test = train_test_split(to_split, test_size=0.5, random_state=42)

# 
train_data = dataframe.loc[dataframe['FileId'].isin(train)]
valid_data = dataframe.loc[dataframe['FileId'].isin(valid)]
test_data = dataframe.loc[dataframe['FileId'].isin(test)]

In [8]:
def sort_and_assign(data, batch_size, n_groups, ascending=True):
    """
    
    """

    data = data.sort_values(by='Frequency', ascending=ascending).reset_index(drop=True)

    data['GroupNumber'] = pd.cut(
        data.index + 1,
        bins = range(0, len(data) + batch_size, batch_size),
        labels = range(n_groups)
    )

    tmp = data.groupby(by='GroupNumber', as_index=False)['Frequency'].max()
    tmp = tmp.rename(columns={'Frequency': 'MaxFrequency'})

    data = pd.merge(data, tmp, on='GroupNumber')

    # Calculate how many frames should be added to each file on average
    mean = (data['MaxFrequency'] - data['Frequency']).mean()
    
    return data, mean

In [9]:
def assign_groups(data, batch_size):
    """
    
    """
    # Create a list of file IDs
    file_ids = data['FileId'].unique()
    # Calculate the number of groups
    n_groups = int(np.ceil(len(file_ids) / batch_size))

    #
    freq_data = data.groupby(by='FileId', as_index=False).size()
    freq_data = freq_data.rename(columns={'size': 'Frequency'})
    
    df_1, mean_1 = sort_and_assign(freq_data, batch_size, n_groups)
    df_2, mean_2 = sort_and_assign(freq_data, batch_size, n_groups, ascending=False)

    # Choose a better sorting option
    if mean_1 > mean_2:
        freq_data = df_2
        # mean = mean_2
    else:
        freq_data = df_1
        # mean = mean_1

    return pd.merge(data, freq_data, on='FileId')

In [10]:
def floor_ceil(x):
    """
    
    """
    return int(np.floor(x)), int(np.ceil(x))

In [11]:
def adjust_dataframe(data):
    """
    Adjust dataframe to the group max frequency
    """
    # Reset index
    data = data.reset_index(drop=True)

    # Calculate how many records are missing to the maximum frequency
    difference = data.loc[0, 'MaxFrequency'] - data.loc[0, 'Frequency']

    if difference > 1:
        # Calculate how many records should be added to the beginning and to the end
        front, back = floor_ceil(difference / 2)

        # Get the first and last record
        first_record, last_record = data.iloc[0], data.iloc[-1]

        # Prepare data frames
        to_beginning = pd.concat(front * [pd.DataFrame([first_record])])
        to_end = pd.concat(back * [pd.DataFrame([last_record])])

        # Return concatenated data frames
        return pd.concat([to_beginning, data, to_end], ignore_index=True)

    elif difference == 1:
        # Get only the last record
        last_record = data.iloc[-1]

        # Return concatenated data frames
        return pd.concat([data, pd.DataFrame([last_record])], ignore_index=True)

    else:
        return data

In [12]:
batch_size = 64
data = assign_groups(train_data, batch_size)

for _, group_data in data.groupby(by='GroupNumber'):
    # Drop the GroupNumber column
    group_data = group_data.drop(columns='GroupNumber')

    # Prepare group tensor storage
    group_tensors = torch.tensor([])

    for _, file_data in group_data.groupby(by='FileId'):
        # Drop the FileId column
        file_data = file_data.drop(columns='FileId')

        # Adjust dataframe to MaxFrequency in group
        adjusted = adjust_dataframe(file_data)
        # Pick columns to drop
        to_drop = ['Timestamp', 'Frequency', 'MaxFrequency']
        # Drop unnecessary columns and convert the dataframe to a numpy array
        array = adjusted.drop(columns=to_drop).to_numpy()

        # Convert numpy array to pytorch tensor
        tensor = torch.from_numpy(array).unsqueeze(dim=0)
        # Concatenate to other tensors in the group
        group_tensors = torch.cat((group_tensors, tensor), dim=0)
    
    print(group_tensors.shape)

torch.Size([64, 64, 79])
torch.Size([64, 75, 79])
torch.Size([64, 84, 79])
torch.Size([64, 90, 79])
torch.Size([64, 96, 79])
torch.Size([64, 105, 79])
torch.Size([64, 111, 79])
torch.Size([64, 120, 79])
torch.Size([64, 124, 79])
torch.Size([64, 132, 79])
torch.Size([64, 141, 79])
torch.Size([64, 150, 79])
torch.Size([64, 162, 79])
torch.Size([64, 180, 79])
torch.Size([64, 210, 79])
torch.Size([40, 238, 79])


In [None]:
grouped_data = data.groupby(by='GroupNumber', as_index=False)

for _, group in grouped_data:
    
    print(group)
    break

In [None]:
grouped = df.groupby('Id')

# Inicjalizacja listy na tensory
tensory = []

# Znalezienie maksymalnej długości grupy
max_length = grouped.size().max()

# Iteracja przez grupy i przekształcenie ich do tensorów
for _, group in grouped:
    # Usunięcie kolumny 'Id'
    group = group.drop(columns=['Id'])
    
    # Konwersja DataFrame na tablicę numpy
    array = group.to_numpy()
    
    # Przekształcenie tablicy numpy na tensor
    tensor = np.zeros((1, max_length, len(group.columns)))
    tensor[:, :len(group), :] = array.reshape(1, len(group), len(group.columns))
    
    # Dodanie tensora do listy
    tensory.append(tensor)

# Konkatenacja wszystkich tensorów wzdłuż pierwszej osi
final_tensor = np.concatenate(tensory, axis=0)

# Wyświetlenie finalnego tensora
print(final_tensor)



In [48]:
from torch.utils.data import Dataset

In [None]:
class ImageFolderCustom(Dataset):
    def __init__(self, target_dir, transform=None):
        
        self.data = pd.read_csv(target_dir)
        self.transform = transform

        # self.paths = list(pathlib.Path(target_dir).glob("*/*.jpg"))
        # self.classes, self.class_to_idx = find_classes(target_dir)

    # def load_image(self, index):
    #     image_path = self.paths[index]
    #     return Image.open(image_path)
    
    def __len__(self):
        return len(self.data.groupby(by='FileId'))
    
    def __getitem__(self, index):
        video = self.data.loc[self.data['FileId'] == index]

        img = self.load_image(index)
        class_name = self.paths[index].parent.name
        class_idx = self.class_to_idx[class_name]

        if self.transform:
            return self.transform(img), class_idx
        else:
            return img, class_idx

In [56]:
import pandas as pd
import numpy as np

# Tworzenie przykładowego DataFrame'a z kolumną 'Id'
data = {
    'Id': [1, 1, 2, 2, 2],
    'Timestamp': [1, 2, 3, 4, 5],
    'RightFootIndexX': [0.1, 0.2, 0.3, 0.4, 0.5],
    'RightFootIndexY': [0.5, 0.6, 0.7, 0.8, 0.9],
    'RightFootIndexZ': [0.9, 1.0, 1.1, 1.2, 1.3]
}
df = pd.DataFrame(data)

# Grupowanie danych według wartości w kolumnie 'Id'
grouped = df.groupby('Id')

# Inicjalizacja listy na tensory
tensory = []

# Znalezienie maksymalnej długości grupy
max_length = grouped.size().max()

# Iteracja przez grupy i przekształcenie ich do tensorów
for _, group in grouped:
    # Usunięcie kolumny 'Id'
    group = group.drop(columns=['Id'])
    
    # Konwersja DataFrame na tablicę numpy
    array = group.to_numpy()
    
    # Przekształcenie tablicy numpy na tensor
    tensor = np.zeros((1, max_length, len(group.columns)))
    tensor[:, :len(group), :] = array.reshape(1, len(group), len(group.columns))
    
    # Dodanie tensora do listy
    tensory.append(tensor)

# Konkatenacja wszystkich tensorów wzdłuż pierwszej osi
final_tensor = np.concatenate(tensory, axis=0)

# Wyświetlenie finalnego tensora
print(final_tensor)



[[[1.  0.1 0.5 0.9]
  [2.  0.2 0.6 1. ]
  [0.  0.  0.  0. ]]

 [[3.  0.3 0.7 1.1]
  [4.  0.4 0.8 1.2]
  [5.  0.5 0.9 1.3]]]


In [57]:
final_tensor.shape

(2, 3, 4)