## Imports / Globals

In [1]:
import pickle
import pandas as pd
from os import listdir
from os.path import isfile, join
from tqdm import tqdm
import numpy as np
from sklearn.model_selection import train_test_split

np.random.seed(42)

In [2]:
# path = '/Users/thomas/Downloads/nturgb+d_skeletons'
path = 'D:\\Datasets\\Motion Privacy\\NTU RGB+D 120\\Skeleton Data'
ntu_120 = False
is_privacy = False
if is_privacy: num_classes = 106 if ntu_120 else 40
else: num_classes = 120 if ntu_120 else 60

## Data organization

In [3]:
with open('../NTU/X.pkl', 'rb') as f:
    X = pickle.load(f)

In [4]:
if not ntu_120:
    to_del = [] 
    for key in X:
        if int(key[17:20]) > 60:
            to_del.append(key)
    for key in to_del:
        del X[key]

In [5]:
def process_data_optimized():
    # Preallocate memory for x and y, assuming we know the length of X
    x = np.zeros((len(X), 300, 150), dtype=np.float32)  # 300 timesteps, 150 features (75 * 2 skeletons)
    y = np.zeros((len(X), num_classes), dtype=np.float32)

    for i, (file, data) in enumerate(X.items()):
        # Get the y index
        if is_privacy: y_idx = int(file[9:12]) - 1
        else: y_idx = int(file[17:20]) - 1
        
        # Set the correct class index
        y[i, y_idx] = 1
        
        # Process the data
        data = data[:, :, :3]  # Assuming data is a 3D array
        num_timesteps = min(data.shape[0], 300)
        
        # Assign the data to the preallocated array (flattening last dimensions)
        x[i, :num_timesteps, :75] = data[:num_timesteps].reshape(num_timesteps, -1)
        # The second skeleton remains zero due to preallocation

    return x, y

X_, Y_ = process_data_optimized()   

In [6]:
# Split into train and test
train_x, test_x, train_y, test_y = train_test_split(X_, Y_, test_size=0.3, random_state=42)
# Split into validation and test
val_x, test_x, val_y, test_y = train_test_split(test_x, test_y, test_size=0.5, random_state=42)

del X_, Y_

## SGN

All code in this section is adapted from Microsoft's SGN. [Github](https://github.com/microsoft/SGN)

In [7]:
import time
import shutil
import os
import os.path as osp
import csv
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import MultiStepLR
from model import SGN
from data import NTUDataLoaders, AverageMeter
from util import make_dir, get_num_classes

In [8]:
# Hyperparameters/Tuning Parameters
network='SGN'
dataset='NTU'
start_epoch=0
case= 0 if is_privacy else 1 # 0 = privacy, 1 = Action
batch_size=64
max_epochs=120
monitor='val_acc'
lr=0.001
weight_decay=0.0001
lr_factor=0.1
workers=16
print_freq = 20
do_train=1
seg=20

In [9]:
def train(train_loader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    acces = AverageMeter()
    model.train()

    for i, (inputs, target) in enumerate(train_loader):

        output = model(inputs.cuda())
        target = target.cuda()
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc = accuracy(output.data, target)
        losses.update(loss.item(), inputs.size(0))
        acces.update(acc[0], inputs.size(0))

        # backward
        optimizer.zero_grad()  # clear gradients out before each mini-batch
        loss.backward()
        optimizer.step()

        if (i + 1) % print_freq == 0:
            print('Epoch-{:<3d} {:3d} batches\t'
                  'loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'accu {acc.val:.3f} ({acc.avg:.3f})'.format(
                      epoch + 1, i + 1, loss=losses, acc=acces))

    return losses.avg, acces.avg


def validate(val_loader, model, criterion):
    losses = AverageMeter()
    acces = AverageMeter()
    model.eval()

    for i, (inputs, target) in enumerate(val_loader):
        with torch.no_grad():
            output = model(inputs.cuda())
        target = target.cuda()
        with torch.no_grad():
            loss = criterion(output, target)

        # measure accuracy and record loss
        acc = accuracy(output.data, target)
        losses.update(loss.item(), inputs.size(0))
        acces.update(acc[0], inputs.size(0))

    return losses.avg, acces.avg


def test(test_loader, model, checkpoint, lable_path, pred_path):
    acces = AverageMeter()
    # load learnt model that obtained best performance on validation set
    model.load_state_dict(torch.load(checkpoint)['state_dict'])
    model.eval()

    label_output = list()
    pred_output = list()

    t_start = time.time()
    for i, t in enumerate(test_loader):
        inputs = t[0]
        target = t[1]
        with torch.no_grad():
            output = model(inputs.cuda())
            output = output.view(
                (-1, inputs.size(0)//target.size(0), output.size(1)))
            output = output.mean(1)

        label_output.append(target.cpu().numpy())
        pred_output.append(output.cpu().numpy())

        acc = accuracy(output.data, target.cuda())
        acces.update(acc[0], inputs.size(0))

    label_output = np.concatenate(label_output, axis=0)
    np.savetxt(lable_path, label_output, fmt='%d')
    pred_output = np.concatenate(pred_output, axis=0)
    np.savetxt(pred_path, pred_output, fmt='%f')

    print('Test: accuracy {:.3f}, time: {:.2f}s'
          .format(acces.avg, time.time() - t_start))


def accuracy(output, target):
    batch_size = target.size(0)
    _, pred = output.topk(1, 1, True, True)
    pred = pred.t()
    target = torch.argmax(target, dim=1)  # Add this line to convert one-hot targets to class indices
    correct = pred.eq(target.view(1, -1).expand_as(pred))
    correct = correct.view(-1).float().sum(0, keepdim=True)
    return correct.mul_(100.0 / batch_size)



def save_checkpoint(state, filename='checkpoint.pth.tar', is_best=False):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')


def get_n_params(model):
    pp = 0
    for p in list(model.parameters()):
        nn = 1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp


class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            target = torch.argmax(target, dim=1)  # Add this line to convert one-hot targets to class indices
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))



In [10]:
def main():
    model = SGN(num_classes, dataset, seg, batch_size, do_train)

    total = get_n_params(model)
    # print(model)
    print('The number of parameters: ', total)
    print('The modes is:', network)

    if torch.cuda.is_available():
        print('It is using GPU!')
        model = model.cuda()

    criterion = LabelSmoothingLoss(num_classes, smoothing=0.1).cuda()
    optimizer = optim.Adam(model.parameters(), lr=lr,
                           weight_decay=weight_decay)

    if monitor == 'val_acc':
        mode = 'max'
        monitor_op = np.greater
        best = -np.Inf
        str_op = 'improve'
    elif monitor == 'val_loss':
        mode = 'min'
        monitor_op = np.less
        best = np.Inf
        str_op = 'reduce'

    scheduler = MultiStepLR(optimizer, milestones=[60, 90, 110], gamma=0.1)
    # Data loading
    ntu_loaders = NTUDataLoaders(dataset, case, seg=seg, train_X=train_x, train_Y=train_y, test_X=test_x, test_Y=test_y, val_X=val_x, val_Y=val_y, aug=0)
    train_loader = ntu_loaders.get_train_loader(batch_size, workers)
    val_loader = ntu_loaders.get_val_loader(batch_size, workers)
    train_size = ntu_loaders.get_train_size()
    val_size = ntu_loaders.get_val_size()

    test_loader = ntu_loaders.get_test_loader(32, workers)

    print('Train on %d samples, validate on %d samples' %
          (train_size, val_size))

    best_epoch = 0
    output_dir = make_dir(dataset)

    save_path = os.path.join(output_dir, network)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    checkpoint = osp.join(save_path, '%s_best.pth' % case)
    earlystop_cnt = 0
    csv_file = osp.join(save_path, '%s_log.csv' % case)
    log_res = list()

    lable_path = osp.join(save_path, '%s_lable.txt' % case)
    pred_path = osp.join(save_path, '%s_pred.txt' % case)

    # Training
    if do_train == 1:
        for epoch in range(start_epoch, max_epochs):

            print(epoch, optimizer.param_groups[0]['lr'])

            t_start = time.time()
            train_loss, train_acc = train(
                train_loader, model, criterion, optimizer, epoch)
            val_loss, val_acc = validate(val_loader, model, criterion)
            log_res += [[train_loss, train_acc.cpu().numpy(),
                         val_loss, val_acc.cpu().numpy()]]

            print('Epoch-{:<3d} {:.1f}s\t'
                  'Train: loss {:.4f}\taccu {:.4f}\tValid: loss {:.4f}\taccu {:.4f}'
                  .format(epoch + 1, time.time() - t_start, train_loss, train_acc, val_loss, val_acc))

            current = val_loss if mode == 'min' else val_acc

            # store tensor in cpu
            current = current.cpu()

            if monitor_op(current, best):
                print('Epoch %d: %s %sd from %.4f to %.4f, '
                      'saving model to %s'
                      % (epoch + 1, monitor, str_op, best, current, checkpoint))
                best = current
                best_epoch = epoch + 1
                save_checkpoint({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best': best,
                    'monitor': monitor,
                    'optimizer': optimizer.state_dict(),
                }, checkpoint)
                earlystop_cnt = 0
            else:
                print('Epoch %d: %s did not %s' % (epoch + 1, monitor, str_op))
                earlystop_cnt += 1

            scheduler.step()

        print('Best %s: %.4f from epoch-%d' % (monitor, best, best_epoch))
        with open(csv_file, 'w') as fw:
            cw = csv.writer(fw)
            cw.writerow(['loss', 'acc', 'val_loss', 'val_acc'])
            cw.writerows(log_res)
        print('Save train and validation log into into %s' % csv_file)

    # Test
    model = SGN(num_classes, dataset, seg, batch_size, 0)
    model = model.cuda()
    test(test_loader, model, checkpoint, lable_path, pred_path)

main()

The number of parameters:  691048
The modes is: SGN
It is using GPU!
Train on 31742 samples, validate on 6802 samples
0 0.001
Epoch-1    20 batches	loss 3.8490 (4.1781)	accu 7.812 (4.062)
Epoch-1    40 batches	loss 3.6485 (3.9858)	accu 10.938 (6.211)
Epoch-1    60 batches	loss 3.4706 (3.8575)	accu 15.625 (8.125)
Epoch-1    80 batches	loss 3.3913 (3.7393)	accu 14.062 (10.117)
Epoch-1   100 batches	loss 3.1283 (3.6242)	accu 26.562 (12.531)
Epoch-1   120 batches	loss 2.8137 (3.5167)	accu 23.438 (14.492)
Epoch-1   140 batches	loss 2.8730 (3.4351)	accu 34.375 (16.317)
Epoch-1   160 batches	loss 2.8446 (3.3557)	accu 29.688 (18.467)
Epoch-1   180 batches	loss 2.6352 (3.2836)	accu 31.250 (20.399)
Epoch-1   200 batches	loss 2.7443 (3.2234)	accu 28.125 (21.898)
Epoch-1   220 batches	loss 2.4158 (3.1614)	accu 37.500 (23.473)
Epoch-1   240 batches	loss 2.2927 (3.1091)	accu 48.438 (24.844)
Epoch-1   260 batches	loss 2.6028 (3.0638)	accu 42.188 (26.010)
Epoch-1   280 batches	loss 2.5930 (3.0187)	acc