In [1]:
import argparse
import time
import shutil
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = '1'
import os.path as osp
import csv
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
from model import SGN
from data import NTUDataLoaders, AverageMeter
from util import make_dir, get_num_classes
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import random

np.random.seed(42)

In [2]:
X = pickle.load(open('data/X.pkl', 'rb'))

In [3]:
connected_joints = [
    (1,2), # Base spine to mid spine
    (2,3), # Mid spine to neck
    (3,4), # Neck to head
    (5,9), # Left shoulder to Right Shoulder
    (13,17), # Left hip to Right hip
    (5,6), # Left shoulder to Left elbow
    (6,7), # Left elbow to Left wrist
    (7,8), # Left wrist to Left hand
    (9,10), # Right shoulder to Right elbow
    (10,11), # Right elbow to Right wrist
    (11,12), # Right wrist to Right hand
    (13,14), # Left hip to Left knee
    (14,15), # Left knee to Left ankle
    (15,16), # Left ankle to Left foot
    (17,18), # Right hip to Right knee
    (18,19), # Right knee to Right ankle
    (19,20), # Right ankle to Right foot
    (1,5), # Base spine to Left shoulder
    (1,9), # Base spine to Right shoulder
    (1,13), # Base spine to Left hip
    (1,17), # Base spine to Right hip
    (21,5), # Spine to Left shoulder
    (21,9), # Spine to Right shoulder
]

# Remove 1 from each joint to get 0-indexed
connected_joints = [(x-1, y-1) for x, y in connected_joints]

In [4]:
samples = 500
train_videos = {}
test_videos = {}

for video in X.keys():
    actor = int(video[9:12])
    action = int(video[17:20])
    
    if len(X[video]) == 0: continue
    if action > 60:
        if actor not in test_videos:
            test_videos[actor] = []
        test_videos[actor].append(X[video])    
    else:
        if actor not in train_videos:
            train_videos[actor] = []
        train_videos[actor].append(X[video])

In [23]:
def calculate_distances(video, frame_limit=50):
    dist = []
    for idx, frame in enumerate(video):
            if idx >= frame_limit: break
            dist.append([])
            for j in connected_joints:
                dist[idx].append(np.linalg.norm(np.array([frame[j[0] * 3], frame[j[0] * 3 + 1], frame[j[0] * 3 + 2]]) - np.array([frame[j[1] * 3], frame[j[1] * 3 + 1], frame[j[1] * 3 + 2]])))
    return np.array(dist, dtype=np.float32)

In [21]:
train_x = []
val_x = []
test_x = []

train_y = np.concatenate((np.ones(samples), np.zeros(samples)))
val_y = np.concatenate((np.ones(samples), np.zeros(samples)))
test_y = np.concatenate((np.ones(samples), np.zeros(samples)))

# convert y to one-hot
train_y = np.eye(2)[train_y.astype(int)]
val_y = np.eye(2)[val_y.astype(int)]
test_y = np.eye(2)[test_y.astype(int)]

# Same
for i in range(samples):
    actor = random.choice(list(train_videos.keys()))
    vid1 = calculate_distances(random.choice(train_videos[actor]))
    vid2 = calculate_distances(random.choice(train_videos[actor]))
    train_x.append(np.concatenate((vid1, vid2), axis=1))

    actor = random.choice(list(test_videos.keys()))
    vid1 = calculate_distances(random.choice(test_videos[actor]))
    vid2 = calculate_distances(random.choice(test_videos[actor]))
    val_x.append(np.concatenate((vid1, vid2), axis=1))

    actor = random.choice(list(test_videos.keys()))
    vid1 = calculate_distances(random.choice(test_videos[actor]))
    vid2 = calculate_distances(random.choice(test_videos[actor]))
    test_x.append(np.concatenate((vid1, vid2), axis=1))
    

# Diff
for i in range(samples):
    actor = random.choice(list(train_videos.keys()))
    vid1 = calculate_distances(random.choice(train_videos[actor]))
    actor2 = random.choice(list(train_videos.keys()))
    while actor != actor2:
        actor2 = random.choice(list(train_videos.keys()))
    vid2 = calculate_distances(random.choice(train_videos[actor2]))
    train_x.append(np.concatenate((vid1, vid2), axis=1))

    actor = random.choice(list(test_videos.keys()))
    vid1 = calculate_distances(random.choice(test_videos[actor]))
    actor2 = random.choice(list(test_videos.keys()))
    while actor != actor2:
        actor2 = random.choice(list(test_videos.keys()))
    vid2 = calculate_distances(random.choice(test_videos[actor2]))
    val_x.append(np.concatenate((vid1, vid2), axis=1))

    actor = random.choice(list(test_videos.keys()))
    vid1 = calculate_distances(random.choice(test_videos[actor]))
    actor2 = random.choice(list(test_videos.keys()))
    while actor != actor2:
        actor2 = random.choice(list(test_videos.keys()))
    vid2 = calculate_distances(random.choice(test_videos[actor2]))
    test_x.append(np.concatenate((vid1, vid2), axis=1))

train_x = np.array(train_x, dtype=np.float32)
val_x = np.array(val_x, dtype=np.float32)
test_x = np.array(test_x, dtype=np.float32)

In [24]:
train_x = train_x[:, :50, :]
val_x = val_x[:, :50, :]
test_x = test_x[:, :50, :]

In [25]:
import torch.nn as nn
import torch.nn.functional as F
torch.set_printoptions(precision=3, sci_mode=False, linewidth=400)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = nn.Linear(50*46, 2000)
        self.linear2 = nn.Linear(2000, 1000)
        self.linear3 = nn.Linear(1000, 500)
        self.linear4 = nn.Linear(500, 250)
        self.linear5 = nn.Linear(250, 100)
        self.linear6 = nn.Linear(100, 2)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        x = F.relu(self.linear5(x))
        x = self.linear6(x)
        x = F.softmax(x, dim=1)
        return x

In [9]:
class dataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [10]:
train = dataset(train_x, train_y)
val = dataset(val_x, val_y)
test = dataset(test_x, test_y)

train_loader = torch.utils.data.DataLoader(train, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=32, shuffle=True)

In [16]:
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)

In [17]:
model = Model().cuda()
model.apply(init_weights)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()

In [29]:
for epoch in range(100):
    model.train()
    for i, (x, y) in enumerate(train_loader):
        x = x.cuda()
        y = y.long().cuda()
        optimizer.zero_grad()
        output = model(x).squeeze()
        loss = criterion(output, y.float())
        loss.backward()
        optimizer.step()

    model.eval()
    correct = 0
    total = 0
    for i, (x, y) in enumerate(val_loader):
        x = x.cuda()
        y = y.long().cuda()
        output = model(x).squeeze()
        output = output.unsqueeze(1)  # unsqueeze along the class dimension
        _, pred = output.max(dim=1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    accuracy = correct / total
    print('Epoch: {}, Accuracy: {:.2f}%'.format(epoch, 100 * accuracy))


tensor([0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1], device='cuda:0') tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0', grad_fn=<SqueezeBackward0>)
tensor([0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1], device='cuda:0') tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0', grad_fn=<SqueezeBackward0>)
tensor([0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1], device='cuda:0') tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], device='cuda:0', grad_fn=<SqueezeBackward0>)
tensor([1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,

KeyboardInterrupt: 