# Action recognition

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pickle
import os

## Dataloader 

In [125]:
DATA_PATH = "data/"

In [132]:
def get_data( name, data_path = DATA_PATH):
# label = 0: roll_droite, 1: roll_gauche, 2: salut_droite, 3: salut_gauche (determiné apar l'ordre des pkl dans le dossier data)
    master_df = pd.DataFrame(columns = ["frame","left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "label"])
    label = 0
    for pickle_file in os.listdir(DATA_PATH):
        if pickle_file.endswith(name+".pkl"):
            temp_df = pd.read_pickle(DATA_PATH + pickle_file)
            temp_df["label"] = label
            label += 1
            master_df = pd.concat([master_df, temp_df], axis = 0)
    return master_df

In [133]:
master_df = pd.concat([get_data("gui"), get_data("val")], axis = 0)

In [134]:
temp_df = master_df[["left_shoulder", "right_shoulder", "left_elbow", "right_elbow"]]

min max normalization per angle

In [135]:
master_df[["left_shoulder", "right_shoulder", "left_elbow", "right_elbow"]] = (temp_df-temp_df.min())/(temp_df.max()-temp_df.min())

drop nan to avoid instability

In [136]:
master_df = master_df.dropna()

In [137]:
class TimeSeriesDataset(Dataset):
    def __init__(self, df, window_size):
        self.df = df
        self.window_size = window_size

    def __len__(self):
        n_vids = 8 #number of videos - we cannot take frames that are not in the same video
        return len(self.df) - self.window_size*n_vids + 2*n_vids

    def __getitem__(self, idx): #for now we take window_size consecutive frames
        labels = self.df.iloc[idx:idx+self.window_size, 5].values
        same = True
        for label in labels:
            if label != labels[0]:
                same = False
        if same == False:
            return self.__getitem__(idx+1) #if the labels are not the same, we skip this window
        else:
            label = labels[0]
        
        data = self.df.iloc[idx:idx+self.window_size, 1:5].values
        # data = (data-data.mean(axis=0))/data.std(axis=0)
        return torch.tensor(data).float(), torch.tensor(label)

In [138]:
batch_size = 4
window_size = 5
df = master_df
dataset = TimeSeriesDataset(df, window_size)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

## Model

In [170]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.batch_norm = nn.BatchNorm1d(input_size)

    def forward(self, x):
        # h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # = self.batch_norm(x)
        out, _ = self.lstm(x)
        out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))
        return out

In [171]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


In [172]:
dataloader.__len__()

1831

In [173]:
input_size = 4 # Size of each time step in the input window
hidden_size = 64 # Number of features in the hidden state of the LSTM
num_layers = 2 # Number of LSTM layers
num_classes = 4 # Number of output classes (i.e. number of possible labels)
learning_rate = 0.0001
num_epochs = 20
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Record the number of correct predictions and total loss


debug = False

for epoch in range(num_epochs):
    num_correct = 0
    running_epoch_loss = 0.0
    for i, (inputs, labels) in enumerate(dataloader):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # print(inputs)
        # print(i)
        # print(outputs)
        # print(labels)

        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #print(outputs)

        y_pred = outputs.argmax(dim = -1)

        if debug:
            # print(f"predicted labesl = {y_pred}")
            # print(f"true labels = {labels}")
            print(f"correct = {(y_pred == labels).sum().item()}")

        num_correct += (y_pred == labels).sum().item()
        running_epoch_loss += loss.item()

    epoch_loss = running_epoch_loss / len(dataloader)
    epoch_acc = num_correct / len(dataloader.dataset)
    print(f'Training => Loss: {epoch_loss:.4f} | Accuracy: {epoch_acc:.4f}')


  out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))


Training => Loss: 1.0888 | Accuracy: 0.6683
Training => Loss: 0.9306 | Accuracy: 0.8203
Training => Loss: 0.9194 | Accuracy: 0.8253
Training => Loss: 0.9149 | Accuracy: 0.8285
Training => Loss: 0.9124 | Accuracy: 0.8318
Training => Loss: 0.9116 | Accuracy: 0.8315
Training => Loss: 0.9106 | Accuracy: 0.8324
Training => Loss: 0.9083 | Accuracy: 0.8335
Training => Loss: 0.9084 | Accuracy: 0.8327
Training => Loss: 0.9074 | Accuracy: 0.8341
Training => Loss: 0.9083 | Accuracy: 0.8318
Training => Loss: 0.9063 | Accuracy: 0.8363
Training => Loss: 0.9060 | Accuracy: 0.8361
Training => Loss: 0.9038 | Accuracy: 0.8380
Training => Loss: 0.9032 | Accuracy: 0.8382
Training => Loss: 0.9042 | Accuracy: 0.8354
Training => Loss: 0.9016 | Accuracy: 0.8395
Training => Loss: 0.9013 | Accuracy: 0.8390
Training => Loss: 0.9000 | Accuracy: 0.8412
Training => Loss: 0.8991 | Accuracy: 0.8412


In [182]:
for x,y in dataloader:
    print(x.shape)
    print(y)
    break

torch.Size([4, 5, 4])
tensor([2, 0, 2, 1])


In [183]:
x[0,:,:]

tensor([[0.1460, 0.2625, 0.9405, 0.2407],
        [0.1467, 0.2676, 0.9486, 0.1800],
        [0.1398, 0.2688, 0.9595, 0.1429],
        [0.1324, 0.2731, 0.9608, 0.1297],
        [0.1346, 0.2773, 0.9588, 0.1440]])

In [187]:
model(x[0,:,:].unsqueeze(0).to(device)).argmax(dim = -1)

  out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))


tensor([2], device='cuda:0')

In [185]:
y

tensor([2, 0, 2, 1])