# Action recognition

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.utils.data as data
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import pickle
import os

## Dataloader 

In [2]:
DATA_TRAIN_PATH = "data/train/"
DATA_TEST_PATH = "data/test/"

In [3]:
def get_data( name, data_path):
# label = 0: idle 1: roll_droite, 2: roll_gauche, 3: salut_droite, 4: salut_gauche (determiné apar l'ordre des pkl dans le dossier data)
    master_df = pd.DataFrame(columns = ["frame","left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "label"])
    label = 0
    for pickle_file in os.listdir(data_path):
        if pickle_file.endswith(name+".pkl"):
            temp_df = pd.read_pickle(data_path + pickle_file)
            temp_df["label"] = label
            label += 1
            master_df = pd.concat([master_df, temp_df], axis = 0)
    return master_df

In [4]:
def create_dataset(data_path):
    master_df = pd.concat([get_data("gui", data_path), get_data("val", data_path)], axis = 0)
    temp_df = master_df[["left_shoulder", "right_shoulder", "left_elbow", "right_elbow"]]
    # min max normalization per angle
    # master_df[["left_shoulder", "right_shoulder", "left_elbow", "right_elbow"]] = (temp_df-temp_df.min())/(temp_df.max()-temp_df.min()
    master_df[["left_shoulder", "right_shoulder", "left_elbow", "right_elbow"]] = ((temp_df-90)/180)
    # drop nan to avoid instability
    master_df = master_df.dropna()

    return master_df

In [5]:
class TimeSeriesDataset(Dataset):
    def __init__(self, df, window_size):
        self.df = df
        self.window_size = window_size

    def __len__(self):
        n_vids = 8 #number of videos - we cannot take frames that are not in the same video
        return len(self.df) - self.window_size*n_vids + 2*n_vids

    def __getitem__(self, idx): #for now we take window_size consecutive frames
        labels = self.df.iloc[idx:idx+self.window_size, 5].values
        same = True
        for label in labels:
            if label != labels[0]:
                same = False
        if same == False:
            return self.__getitem__(idx+1) #if the labels are not the same, we skip this window
        else:
            label = labels[0]
        
        data = self.df.iloc[idx:idx+self.window_size, 1:5].values
        # data = (data-data.mean(axis=0))/data.std(axis=0)
        return torch.tensor(data).float(), torch.tensor(label)

In [6]:
batch_size = 4
window_size = 5

df_train  = create_dataset(DATA_TRAIN_PATH)
df_test = create_dataset(DATA_TEST_PATH)

dataset_train = TimeSeriesDataset(df_train, window_size)
dataset_test = TimeSeriesDataset(df_test, window_size)

train_dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

In [9]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.batch_norm = nn.BatchNorm1d(input_size)

    def forward(self, x):
        # h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # = self.batch_norm(x)
        out, _ = self.lstm(x)
        out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))
        return out

In [10]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(device)

cuda


In [11]:
def test(model, test_loader):
    print("Testing model")
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            predicted = outputs.argmax(dim = -1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    test_accuracy = 100 * correct / total
    return test_accuracy

In [12]:
input_size = 4 # Size of each time step in the input window
hidden_size = 64 # Number of features in the hidden state of the LSTM
num_layers = 2 # Number of LSTM layers
num_classes = 5 # Number of output classes (i.e. number of possible labels)
learning_rate = 0.0001
num_epochs = 25
model = LSTMModel(input_size, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    print(f'Epoch {epoch+1}/{num_epochs}')
    num_correct = 0
    running_epoch_loss = 0.0
    for i, (inputs, labels) in enumerate(train_dataloader):
        model.train()

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred = outputs.argmax(dim = -1)

        num_correct += (y_pred == labels).sum().item()
        running_epoch_loss += loss.item()
    
    epoch_loss = running_epoch_loss / len(train_dataloader)
    epoch_acc = num_correct / len(train_dataloader.dataset)
    test_accuracy = test(model, test_dataloader)
    print(f'Training => Loss: {epoch_loss:.4f} | Train Accuracy: {epoch_acc:.4f}| Test Accuracy: {test_accuracy:.4f}')


Epoch 1/25


  out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))


Testing model
Training => Loss: 1.3727 | Train Accuracy: 0.5506| Test Accuracy: 57.9315
Epoch 2/25
Testing model
Training => Loss: 1.2132 | Train Accuracy: 0.7090| Test Accuracy: 57.1631
Epoch 3/25
Testing model
Training => Loss: 1.1847 | Train Accuracy: 0.7273| Test Accuracy: 56.4655
Epoch 4/25
Testing model
Training => Loss: 1.1710 | Train Accuracy: 0.7384| Test Accuracy: 59.5491
Epoch 5/25
Testing model
Training => Loss: 1.1592 | Train Accuracy: 0.7475| Test Accuracy: 58.2853
Epoch 6/25
Testing model
Training => Loss: 1.1550 | Train Accuracy: 0.7520| Test Accuracy: 57.6686
Epoch 7/25
Testing model
Training => Loss: 1.1506 | Train Accuracy: 0.7539| Test Accuracy: 58.8212
Epoch 8/25
Testing model
Training => Loss: 1.1461 | Train Accuracy: 0.7594| Test Accuracy: 55.8488
Epoch 9/25
Testing model
Training => Loss: 1.1466 | Train Accuracy: 0.7564| Test Accuracy: 54.8681
Epoch 10/25
Testing model
Training => Loss: 1.1433 | Train Accuracy: 0.7591| Test Accuracy: 54.7063
Epoch 11/25
Testing 

In [23]:
WEIGHTS_PATH = "pytorch_weights/LSTM/"
torch.save(model.state_dict(), WEIGHTS_PATH + "lstm_model_v1.pt")

In [24]:
for x,y in train_dataloader:
    print(x.shape)
    print(y)
    break

torch.Size([4, 5, 4])
tensor([4, 2, 1, 1])


In [25]:
x[0,:,:]

tensor([[0.3975, 0.1500, 0.5010, 0.9925],
        [0.3988, 0.1477, 0.4307, 0.9963],
        [0.3920, 0.1464, 0.3441, 0.9975],
        [0.3980, 0.1483, 0.2525, 0.9982],
        [0.3989, 0.1478, 0.2173, 0.9945]])

In [26]:
model(x[0,:,:].unsqueeze(0).to(device)).argmax(dim = -1)

  out = torch.nn.functional.softmax(self.fc(out[:, -1, :]))


tensor([4], device='cuda:0')

In [27]:
y

tensor([4, 2, 1, 1])