In [None]:
import pandas as pd
import numpy as np
import os

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [None]:
!wget https://raw.githubusercontent.com/shahroudy/NTURGB-D/master/Matlab/NTU_RGBD_samples_with_missing_skeletons.txt

--2020-11-16 19:02:57--  https://raw.githubusercontent.com/shahroudy/NTURGB-D/master/Matlab/NTU_RGBD_samples_with_missing_skeletons.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6554 (6.4K) [text/plain]
Saving to: ‘NTU_RGBD_samples_with_missing_skeletons.txt’


2020-11-16 19:02:57 (63.7 MB/s) - ‘NTU_RGBD_samples_with_missing_skeletons.txt’ saved [6554/6554]



In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [None]:
!unzip -q /content/gdrive/My\ Drive/nturgb+d_skeletons.zip

In [None]:
data_path = "nturgb+d_skeletons/"
broken_files_path = "NTU_RGBD_samples_with_missing_skeletons.txt"

In [None]:
class Skeleton_Dataset(Dataset):
    joints_framework = ['neck', 'nose', 'mid_hip',
                         'l_sho', 'l_elb',
                         'l_wri', 'l_hip',
                         'l_knee', 'l_ank',
                         'r_sho', 'r_elb',
                         'r_wri', 'r_hip',
                         'r_kne', 'r_ank',
                         'r_eye', 'l_eye',
                         'r_ear', 'l_ear']


        joints_framework_in_work = ['nose','l_sho', 'l_elb','l_wri','r_sho','r_elb', 'r_wri', 'l_hip','l_knee','l_ank','r_hip','r_kne','r_ank','neck']
        upper_joints_framework = ['nose','l_sho', 'l_elb','l_wri','r_sho','r_elb', 'r_wri', 'l_hip','l_knee','l_ank','r_hip','r_kne','r_ank','neck']



        SKELETON_EDGES = np.array([[11, 10], [10, 9], [9, 0], [0, 3], [3, 4], [4, 5], [0, 6], [6, 7], [7, 8], [0, 12],
                                       [12, 13], [13, 14], [1, 14], [1, 15], [15, 16], [1, 17], [17, 18]])
        
        
        
        bone_pairs = (
            (1, 2), (2, 21), (3, 21), (4, 3), (5, 21), (6, 5),
            (7, 6), (8, 7), (9, 21), (10, 9), (11, 10), (12, 11),
            (13, 1), (14, 13), (15, 14), (16, 15), (17, 1), (18, 17),
            (19, 18), (20, 19), (22, 23), (21, 21), (23, 8), (24, 25),(25, 12)
        )

        bone_pairs_in_work = (
            (1, 14), 
            (14, 2), (2, 3), (3, 4),
            (14, 5), (5, 6), (6, 7), 
            (14, 8), (8, 9), (9, 10),
            (14, 11), (11, 12), (12, 13))


        joints_names = ['spinebase', 'spinemid', 'neck', 'head','l_sho', 'l_elb','l_wri','l_hand','r_sho','r_elb', 'r_wri', 'r_hand','l_hip','l_knee','l_ank','l_fool','r_hip','r_knee','r_ank','r_foot','spineshoulder','l_tip','l_thumb','r_tip','r_thunb']
        joints_in_work = [ 'head','l_sho', 'l_elb','l_wri','r_sho','r_elb', 'r_wri', 'l_hip','l_knee','l_ank','r_hip','r_knee','r_ank','spineshoulder']
        upper_joints = [ 'head','l_sho', 'l_elb','l_wri','r_sho','r_elb', 'r_wri', 'l_hip','l_knee','l_ank','r_hip','r_knee','r_ank','spineshoulder']
        
        
        ##### список файлов с лейблами на каждый файл 
        working_files_with_labels, action_classes = read_data(data_path, broken_files_path)
        
        LABELS = {v: k for k, v in action_classes.items()}
    
    
    def __init__(self, data_path, broken_files_path=None, training_classes=None,
                 num_joint = 25, max_frame = 300, transform=None):
        self.data_path = data_path
        self.broken_files_path = broken_files_path
        self.training_classes = training_classes
        self.training_subjects = training_subjects
        self.training_cameras = training_cameras
        self.transform = transform
        self.read_data(data_path, broken_files_path)
        self.build_dataframe()
        self.labels = self.data.iloc[:,-1]
        
        
    def read_data(self, data_path, broken_files_path):
        labels = []
        files = []
        action_classes = {}
        counter = 0
        files_counter = {}

        with open(broken_files_path, 'r') as f:
            broken_files = f.read().split("\n")

        raw_files = os.listdir(data_path)
        num_frames = 0

        for filename in raw_files:
            if filename not in broken_files:
                action_class = int(filename[filename.find('A') + 1:filename.find('A') + 4])
                subject_id = int(filename[filename.find('P') + 1:filename.find('P') + 4])
                camera_id = int(filename[filename.find('C') + 1:filename.find('C') + 4])
                if action_class in training_classes and camera_id in training_cameras: 
                    if action_class in action_classes:
                        if files_counter[action_class] < 120:
                            files.append([filename,action_classes[action_class]])
                            files_counter[action_class] = files_counter[action_class] + 1
                    else:
                        action_classes.update({action_class : counter})
                        files_counter.update({action_class : 1})
                        counter+=1
                        files.append([filename,action_classes[action_class]])
    #                     labels.append([action_class])
        print("action classes: ", action_classes)
        print("action files: ", files_counter)
        
        self.files = files
        self.action_classes = action_classes

#         return files, action_classes

    def get_nonzero_std(self, s): 
        index = s.sum(-1).sum(-1) != 0  
        s = s[index]
        if len(s) != 0:
            s = s[:, :, 0].std() + s[:, :, 1].std() + s[:, :, 2].std()  
        else:
            s = 0
        return s


    def read_skeleton_filter(self, file):
        with open(file, 'r') as f:
            skeleton_sequence = {}
            skeleton_sequence['numFrame'] = int(f.readline())
            skeleton_sequence['frameInfo'] = []
            for t in range(skeleton_sequence['numFrame']):
                frame_info = {}
                frame_info['numBody'] = int(f.readline())
                frame_info['bodyInfo'] = []

                for m in range(frame_info['numBody']):
                    body_info = {}
                    body_info_key = [
                        'bodyID', 'clipedEdges', 'handLeftConfidence',
                        'handLeftState', 'handRightConfidence', 'handRightState',
                        'isResticted', 'leanX', 'leanY', 'trackingState'
                    ]
                    body_info = {
                        k: float(v)
                        for k, v in zip(body_info_key, f.readline().split())
                    }
                    body_info['numJoint'] = int(f.readline())
                    body_info['jointInfo'] = []
                    for v in range(body_info['numJoint']):
                        joint_info_key = [
                            'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
                            'orientationW', 'orientationX', 'orientationY',
                            'orientationZ', 'trackingState'
                        ]
                        joint_info = {
                            k: float(v)
                            for k, v in zip(joint_info_key, f.readline().split())
                        }
                        body_info['jointInfo'].append(joint_info)
                    frame_info['bodyInfo'].append(body_info)
                skeleton_sequence['frameInfo'].append(frame_info)
        return skeleton_sequence


    def read_xyz(self, file, max_body=1, num_joint=25):
        seq_info = self.read_skeleton_filter(file)
        data = np.zeros((max_body, seq_info['numFrame'], num_joint, 3))
        for n, f in enumerate(seq_info['frameInfo']):
            for m, b in enumerate(f['bodyInfo']):
                for j, v in enumerate(b['jointInfo']):
                    if m < max_body and j < num_joint:
                        data[m, n, j, :] = [v['x'], v['y'], v['z']]

                    else:
                        pass
        return data


    def create_coords_blocks(self, test_file, chonk_len = 45):   
        frame_counter = 0
        new_labels = []
        new_frames = []
        blocks = []

        test_frames = self.read_xyz(data_path + test_file[0])[0]
        label = test_file[1]
        slice_len = chonk_len * int(len(test_frames)/chonk_len)


        for index in range(len(test_frames[:slice_len])):
            frame_counter += 1
            new_frames.append(test_frames[index].flatten())
            if frame_counter == chonk_len:
                frame_counter = 0
                blocks.append(np.array(new_frames))
                new_labels = new_labels + [label]
                new_frames = []
        return blocks, new_labels
        
    
    def build_dataframe(self):
        
        data = []
        labels = []
        numbers = {v: 0 for k, v in self.action_classes.items()}
        for file in self.files:
            frames_blocks, label = self.create_coords_blocks(file)
#             print(frames_blocks, label)
            if label != [] and numbers[label[0]] <= 150:
                numbers[label[0]] = numbers[label[0]] + len(label)
                data = data + frames_blocks
                labels = labels + label
        data_np = np.asarray(data)
        labels_np = np.asarray(labels)

        data_sq = data_np.reshape(len(data_np), -1)
        test_data = pd.DataFrame(data_sq)
        test_labels = pd.DataFrame(labels_np)
        test_data['labels'] = test_labels
        self.LABELS = {v: k for k, v in self.action_classes.items()}
        self.data = test_data
           
    def __len__(self):
         return len(self.data)
        
        
    def __getitem__(self, idx):
        item = np.asarray(self.data.iloc[idx,:-1]).reshape(45,75)
        label = self.labels[idx]
        if self.transform != None:
            item = transform(item)
        return (item, label)

IndentationError: ignored

In [None]:
class LSTM_net(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim,layer_num):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lstm = torch.nn.LSTM(input_dim, hidden_dim,layer_num,batch_first=True)
        self.dr = torch.nn.Dropout2d(0.1)
        self.fc = torch.nn.Linear(hidden_dim,output_dim)
        
        
    def forward(self,inputs):
        x = inputs
        lstm_out,(hn,cn) = self.lstm(x)
        out = self.fc(lstm_out[:,-1,:])
        return out

In [None]:
training_subjects = list(range(0, 28)) #количество людей выполняющих действия
training_classes = [8, 10, 22, 23, 27, 21, 32, 5, 3, 16] #классы которые будем использовать для обучения, полный список прдставлен тут https://github.com/shahroudy/NTURGB-D
training_cameras = [1, 2, 3] 

max_body_true = 1
max_body_kinect = 1

num_joint = 25
max_frame = 300

In [None]:
dataset = Skeleton_Dataset(data_path, broken_files_path, training_classes, transform=None)

action classes:  {16: 0, 21: 1, 22: 2, 27: 3, 8: 4, 5: 5, 10: 6, 32: 7, 3: 8, 23: 9}
action files:  {16: 120, 21: 120, 22: 120, 27: 120, 8: 120, 5: 120, 10: 120, 32: 120, 3: 120, 23: 120}


In [None]:
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(0.75*len(dataset)), len(dataset) - int(0.75*len(dataset))])
train_loader = DataLoader(train_dataset, batch_size = 16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle=True)

In [None]:
n_hidden = 128
n_joints = 25*3
LABELS = {x: training_classes[x] for x in range(len(training_classes))}
n_categories = len(LABELS)
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm): LSTM(75, 128, num_layers=2, batch_first=True)
  (dr): Dropout2d(p=0.1, inplace=False)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

In [79]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
#     print(output.topk(5))
    return LABELS[category_i], category_i

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [80]:
from torch import optim
import time
import math

criterion = nn.CrossEntropyLoss()
learning_rate = 0.0007
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(60):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device).type(torch.LongTensor).to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = {x: training_classes[x] for x in range(len(training_classes))}[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d  %.4f  / %s %s' % (epoch, i, loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0  1.6038  / 8 ✓
epoch : 7 iter : 52  1.9564  / 27 ✓
epoch : 15 iter : 40  1.6127  / 8 ✗ (21)
epoch : 23 iter : 28  1.4999  / 3 ✗ (32)
epoch : 31 iter : 16  1.4899  / 22 ✓
epoch : 39 iter : 4  1.8485  / 22 ✓
epoch : 46 iter : 56  2.0498  / 27 ✓
epoch : 54 iter : 44  1.4646  / 8 ✓


In [81]:
class LSTM_net(nn.Module):
    def __init__(self,input_dim,hidden_dim,output_dim,layer_num):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.lstm1 = torch.nn.LSTM(input_dim, hidden_dim, layer_num,batch_first=True)
        self.dr1 = torch.nn.Dropout2d(0.5)
        self.lstm2 = torch.nn.LSTM(input_dim, hidden_dim)
        self.dr2 = torch.nn.Dropout2d(0.5)
        self.fc = torch.nn.Linear(hidden_dim,output_dim)
        
        
    def forward(self,inputs):
        x = inputs
        lstm_out1,(hn,cn) = self.lstm1(x)
        lstm_out2,(hn,cn) = self.lstm2(x)
        out = self.fc(lstm_out2[:,-1,:])
        return out

In [82]:
n_hidden = 324
n_joints = 25*3
n_categories = len({x: training_classes[x] for x in range(len(training_classes))})
n_layer = 2
rnn = LSTM_net(n_joints,n_hidden,n_categories,n_layer)
rnn.to(device)

LSTM_net(
  (lstm1): LSTM(75, 324, num_layers=2, batch_first=True)
  (dr1): Dropout2d(p=0.5, inplace=False)
  (lstm2): LSTM(75, 324)
  (dr2): Dropout2d(p=0.5, inplace=False)
  (fc): Linear(in_features=324, out_features=10, bias=True)
)

In [83]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = optim.SGD(rnn.parameters(),lr=learning_rate,momentum=0.9)

all_losses = []
start = time.time()
counter = 0
for epoch in range(60):  
    current_loss = 0
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        
        inputs, labels = data[0].to(device), data[1].to(device).type(torch.LongTensor).to(device)
        optimizer.zero_grad()
    
        output = rnn(inputs.float())
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step() 


        current_loss += loss.item()
        category = {x: training_classes[x] for x in range(len(training_classes))}[int(labels[0])]

        if counter % 500 == 0:
            guess, guess_i = categoryFromOutput(output)
            correct = '✓' if guess == category else '✗ (%s)' % category
            print('epoch : %d iter : %d (%s) %.4f  / %s %s' % (epoch, i, timeSince(start), loss, guess, correct))

        
        counter = counter + 1
    if counter % 100 == 0:
        all_losses.append(current_loss / 25)
        current_loss = 0

epoch : 0 iter : 0 (0m 0s) 2.3250  / 32 ✗ (27)
epoch : 7 iter : 52 (1m 19s) 2.2545  / 5 ✗ (3)
epoch : 15 iter : 40 (2m 37s) 2.2597  / 5 ✗ (23)
epoch : 23 iter : 28 (3m 56s) 2.2841  / 3 ✓
epoch : 31 iter : 16 (5m 14s) 2.0788  / 22 ✓
epoch : 39 iter : 4 (6m 32s) 2.1603  / 22 ✗ (32)
epoch : 46 iter : 56 (7m 52s) 1.9841  / 3 ✓
epoch : 54 iter : 44 (9m 11s) 1.6194  / 22 ✓
