In [1]:
import torch
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [51]:
from torch.nn import Conv3d, LSTM, MaxPool3d, BatchNorm1d, BatchNorm3d, ZeroPad3d, Dropout, Linear, Flatten, Module, Sequential
from torch.nn import functional as F

class Conv3D_LSTM(Module):
    def __init__(self, out_classes):
        super().__init__()

        # conv input: (None,3,20,128,128)
        # lstm input: (None,20,132)

        self.conv = Sequential(

            Conv3d(in_channels=3,
                   out_channels=16,
                   kernel_size=(3,3,3),
                   stride=(1,1,1),
                   padding=(1,1,1)
                   ),
            MaxPool3d(kernel_size=(1,2,2),
                      stride=(1,2,2)),
            BatchNorm3d(16),

            Conv3d(in_channels=16,
                   out_channels=32,
                   kernel_size=(3,3,3),
                   stride=(1,1,1),
                   padding=(1,1,1)
                   ),
            MaxPool3d(kernel_size=(2,2,2),
                      stride=(2,2,2)),
            BatchNorm3d(32),

            Conv3d(in_channels=32,
                   out_channels=64,
                   kernel_size=(3,3,3),
                   stride=(1,1,1),
                   padding=(1,1,1)
                   ),
            MaxPool3d(kernel_size=(2,2,2),
                      stride=(2,2,2)),
            BatchNorm3d(64),

            Conv3d(in_channels=64,
                   out_channels=128,
                   kernel_size=(3,3,3),
                   stride=(1,1,1),
                   padding=(1,1,1)
                   ),
            MaxPool3d(kernel_size=(1,2,2),
                      stride=(1,2,2)),
            BatchNorm3d(128),

            ZeroPad3d((0,0,0,0,1,2)), 
            MaxPool3d(kernel_size=(2,2,2), # 128,4,4,4
                      stride=(2,2,2)),
            Flatten() # 2**13 = 8192 features
        )
        # input is a tensor of shape (sequence_length (L), input_size (Hin))
        # LSTM(input_size = 10 (Hin), hidden_size = 20 (H_out), num_layers = 3)
        # input = tensor(5,3,10)
        # h0 = tensor(2,3,20)
        # c0 = tensor(2,3,20)
        # out =>

        self.lstm1 = LSTM(input_size=132,hidden_size=66,num_layers=1)
            # this require h_0 and c_0 of [1,20,66]
            # return None,20,66
        self.bn1 = BatchNorm1d(20)
        self.drop = Dropout(.2)
        self.lstm2 = LSTM(input_size=66,hidden_size=22,num_layers=1)
            # return None, 20, 22
        self.bn2 = BatchNorm1d(20)
        self.flat = Flatten()
        
        self.final = Sequential(
            Linear(in_features=8192,out_features=out_classes)
            # Linear(in_features=100,out_features=out_classes)
        )

    def forward(self, frames, marks):
        frames = frames.permute(0,4,1,2,3)

        # h0 = torch.zeros((1,20,66)).cuda()
        # c0 = torch.zeros((1,20,66)).cuda()

        # h1 = torch.zeros((1,20,22)).cuda()
        # c1 = torch.zeros((1,20,22)).cuda()

        branch1 = self.conv(frames)

        # branch2, (h0,c0) = self.lstm1(marks,(h0,c0))
        # branch2 = self.bn1(branch2)
        # branch2 = self.drop(branch2)

        # branch2, (h1,c1) = self.lstm2(branch2,(h1,c1))
        # branch2 = self.flat(branch2)
        # del h0, c0, h1, c1
        # out1 = torch.concat([branch1,branch2],dim=1)

        out1 = F.relu_(branch1)
        out1 = self.final(out1)
        return F.softmax(out1)

In [52]:
from torchinfo import summary
model = Conv3D_LSTM(2).to(device)
input_frame = torch.randn(16,20,128,128,3).to(device)
input_mark = torch.randn(16,20,132).to(device)
output = model(input_frame,input_mark)
summary(model,input_data=(input_frame,input_mark))

  return F.softmax(out1)


Layer (type:depth-idx)                   Output Shape              Param #
Conv3D_LSTM                              [16, 2]                   60,800
├─Sequential: 1-1                        [16, 8192]                --
│    └─Conv3d: 2-1                       [16, 16, 20, 128, 128]    1,312
│    └─MaxPool3d: 2-2                    [16, 16, 20, 64, 64]      --
│    └─BatchNorm3d: 2-3                  [16, 16, 20, 64, 64]      32
│    └─Conv3d: 2-4                       [16, 32, 20, 64, 64]      13,856
│    └─MaxPool3d: 2-5                    [16, 32, 10, 32, 32]      --
│    └─BatchNorm3d: 2-6                  [16, 32, 10, 32, 32]      64
│    └─Conv3d: 2-7                       [16, 64, 10, 32, 32]      55,360
│    └─MaxPool3d: 2-8                    [16, 64, 5, 16, 16]       --
│    └─BatchNorm3d: 2-9                  [16, 64, 5, 16, 16]       128
│    └─Conv3d: 2-10                      [16, 128, 5, 16, 16]      221,312
│    └─MaxPool3d: 2-11                   [16, 128, 5, 8, 8]     

In [54]:
# load data
import os
import pandas as pd
import cv2
import numpy as np

num_frames = 20 # changing requires model refactoring

def load_video(directory):
    cap = cv2.VideoCapture(directory)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frames.append(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
        else:
            break
    cap.release()
    return np.array(frames)/255

def load_txt(directory):
    df = pd.read_csv(directory)
    dataset = df.iloc[1:,1:]
    return np.array(dataset)

def load_data(directory):
    label_count = 0
    X_train = []
    y_train = []
    list_label = os.listdir(directory)
    for label in list_label:
        count = 0
        subpath = os.path.join(directory,label)
        files = os.listdir(subpath)

        files_txt = [txt for txt in files if txt.endswith(".txt")]
        files_mp4 = [mp4 for mp4 in files if mp4.endswith(".mp4")]

        if len(files_txt)!=len(files_mp4):
            raise RuntimeError("The amount of .txt and .mp4 files are not equal. Found {} .mp4 but {} .txt".format(len(files_mp4),len(files_txt)))
        
        n = len(files_txt)
        
        for i in range(n):
            frames = load_video(os.path.join(subpath,files_mp4[i]))
            marks = load_txt(os.path.join(subpath,files_txt[i]))
            n_samples = marks.shape[0]
            if count>=1000:
                break
            for j in range(num_frames, n_samples, num_frames):
                count+=1
                X_train.append( [frames[j-num_frames:j,::], marks[j-num_frames:j,:]] )
                y_train.append(label_count)
        label_count+=1
    return X_train, y_train, list_label

In [55]:
directory = "Dual data processed/"
X_train, y_train, list_label = load_data(directory)
print(len(X_train),len(y_train))
print(list_label)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.4)

1216 1216
['barbell biceps curl', 'bench press']


In [56]:
from torch.utils.data import Dataset

class SelfDataset(Dataset):
    # self define a dataset
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)

    def __getitem__(self,index):
        return self.X[index], self.y[index]    

In [57]:
# training preparation
from torch import optim
optimizer = optim.Adamax(model.parameters(),lr=0.001,weight_decay=1e-4)
loss = torch.nn.CrossEntropyLoss()

from torch.utils.data import DataLoader
batch_size = 16
train_set = SelfDataset(X_train,y_train)
train_loader = DataLoader(train_set, batch_size = batch_size, shuffle=True)
test_set = SelfDataset(X_test,y_test)
test_loader = DataLoader(test_set, batch_size = batch_size, shuffle=True)

In [58]:
epoch_count = 0
best_val_loss = 1000.
save_checkpoint = "test_model_convlstm"

In [59]:
test_count = 0
for input,label in train_loader:
    test_count +=1
    print(input[0].shape)
    print(input[1].shape)
    # print(input.shape)
    if test_count == 1:
        break


torch.Size([16, 20, 128, 128, 3])
torch.Size([16, 20, 132])


In [36]:
device = torch.device("cpu")

In [60]:
# training loop:
from torchmetrics import Accuracy

acc = Accuracy(task="multiclass",num_classes = 2).cuda()
epoch_train = 10
model.to(device)

for epoch in range(epoch_train):
    print("Epoch " + str(epoch_count+1))
    
    model.train(True)
    # train section
    train_loss = 0.
    for input, label in train_loader:

        label = label.to(device)
        inp_frame = torch.tensor(input[0],dtype=torch.float32).to(device)
        inp_mark = torch.tensor(input[1],dtype=torch.float32).to(device)
        optimizer.zero_grad()
        # input (frames, mark)
        output = model(inp_frame,inp_mark)
        
        los = loss(output,label)
        los.backward()
        optimizer.step()
        acc.update(output,label)
        train_loss += los

    print("Train accuracy: {}, train loss: {}".format(acc.compute(), train_loss))
    
    model.eval()
    acc.reset()
    val_loss = 0.
    with torch.no_grad():
        for vinput, vlabel in test_loader:
            
            vlabel = vlabel.to(device)
            vinp_frame = torch.tensor(vinput[0],dtype=torch.float32).to(device)
            vinp_mark = torch.tensor(vinput[1],dtype=torch.float32).to(device)
            
            voutput = model(vinp_frame,vinp_mark)
            vloss = loss(output,label)
            acc.update(output,label)
            val_loss += vloss

    print("Validation accuracy: {}, validation loss: {}".format(acc.compute(), val_loss))
    acc.reset()
    if val_loss<best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), save_checkpoint+"_"+str(epoch_count+1))
    
    epoch_count+=1

Epoch 1


  inp_frame = torch.tensor(input[0],dtype=torch.float32).to(device)
  inp_mark = torch.tensor(input[1],dtype=torch.float32).to(device)
  return F.softmax(out1)


Train accuracy: 0.7805212736129761, train loss: 23.688514709472656


  vinp_frame = torch.tensor(vinput[0],dtype=torch.float32).to(device)
  vinp_mark = torch.tensor(vinput[1],dtype=torch.float32).to(device)


Validation accuracy: 0.8888888955116272, validation loss: 13.252412796020508
Epoch 2
Train accuracy: 0.9382715821266174, train loss: 17.55124855041504
Validation accuracy: 0.8888888955116272, validation loss: 11.855677604675293
Epoch 3
Train accuracy: 0.957476019859314, train loss: 16.481412887573242
Validation accuracy: 1.0, validation loss: 9.837172508239746
Epoch 4
Train accuracy: 0.9670782089233398, train loss: 16.112485885620117
Validation accuracy: 1.0, validation loss: 10.404129981994629
Epoch 5
Train accuracy: 0.991769552230835, train loss: 14.974200248718262
Validation accuracy: 1.0, validation loss: 9.787976264953613
Epoch 6
Train accuracy: 0.9986282587051392, train loss: 14.592756271362305
Validation accuracy: 1.0, validation loss: 9.930070877075195
Epoch 7
Train accuracy: 0.9986282587051392, train loss: 14.542840957641602
Validation accuracy: 1.0, validation loss: 9.726542472839355
Epoch 8
Train accuracy: 1.0, train loss: 14.50676155090332
Validation accuracy: 1.0, validati

In [62]:
# model.load_state_dict(torch.load("test_model_convlstm_9"))
count = 5
for input, label in test_loader:
    print("Test no: " + str(6-count))
    print(label)
    input_fr = torch.tensor(input[0],dtype=torch.float32).to(device)
    input_mk = torch.tensor(input[1],dtype=torch.float32).to(device)
    output = model(input_fr,input_mk)
    output = output.cpu().detach().numpy()
    print(np.argmax(output,axis=1))
    count-=1
    if count == 0:
        break
     

Test no: 1
tensor([0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0])


  input_fr = torch.tensor(input[0],dtype=torch.float32).to(device)
  input_mk = torch.tensor(input[1],dtype=torch.float32).to(device)
  return F.softmax(out1)


[0 0 1 0 0 0 0 1 1 1 0 0 1 1 1 0]
Test no: 2
tensor([1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1])
[1 1 1 0 0 1 1 1 0 1 0 0 0 0 1 1]
Test no: 3
tensor([0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0])
[0 0 1 1 0 0 0 1 0 1 1 0 0 0 0 0]
Test no: 4
tensor([0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1])
[0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 1]
Test no: 5
tensor([0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0])
[0 0 0 0 1 1 0 0 0 1 0 1 1 0 1 0]
