In [1]:
import os
import torch 
import pickle as pkl
from torch.utils.data import Dataset, DataLoader 
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
from tqdm import tqdm 
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
def collate_fn(batch):
    label_list, data_list = [], []
    for (data, label) in batch:

        label_list.append(label)
        data_list.append(data.squeeze())
    
    data_len = [d.shape[0] for d in data_list]
    data_list = pad_sequence(data_list, batch_first=True, padding_value=0)
    return (data_list, torch.Tensor(data_len)), torch.Tensor(label_list)
    


In [53]:
#TODO: Add N_Shot Feature
class PoseDataset(Dataset):
    def __init__(self, annotation_file, pose_dir, ids_file, subset=None, n_shot=np.inf):
            #[(name:side:start_frame:end_frame, action_label)]
            #TODO: Implemenet N Shot Format
            if subset is None:
                self.subset = {line.split(' ', 1)[1].replace('\n', '') for line in open(annotation_file, 'r').readlines()}
            else: 
                self.subset = subset
            #if subset is not None:
            self.ids = [line.replace('\n', '') for line in open(ids_file, 'r').readlines()]
            self.counter = {action:0 for action in self.subset}
            
            self.label_dict = {lab:i for lab, i in zip(self.subset, range(len(self.subset)))}
            self.pose_labels = []
            for line in open(annotation_file, 'r'):
                name, action = line.split(' ',1)
                action = action.replace('\n', '')
                if action in self.subset and name in self.ids:
                    if self.counter[action] < n_shot:
                        self.counter[action] += 1
                        self.pose_labels.append((name, action))

            print(f'Action Item Counts {self.counter}')
            self.pose_dir = pose_dir
            self.n_shot = n_shot

    def __len__(self):
        return len(self.pose_labels)


    def __getitem__(self, idx):
        basename, side, start_frame, end_frame = self.pose_labels[idx][0].split(':')
        pose_path = os.path.join(self.pose_dir,''.join([basename, '__', side, '.emb.pkl']))
        with open(pose_path, 'rb') as f:
            pose_emb =  pkl.load(f)
        f.close()
        poses = torch.Tensor([tup[1].reshape(-1, 128) for tup in pose_emb[int(start_frame) +1:int(end_frame) + 1]])
        label = self.label_dict[self.pose_labels[idx][1]]
        
        return poses, label

In [56]:
ann_file = 'C:\\Users\\liamc\\Desktop\\fencing_vision\\src\\vpd_fencing\\action_dataset\\fencing\\all.txt'
pose_path = 'C:\\Users\\liamc\\Desktop\\fencing_vision\\data\\embeddings\\vivpd_res'
val_ids = 'C:\\Users\\liamc\\Desktop\\fencing_vision\\src\\vpd_fencing\\action_dataset\\fencing\\val.ids.txt'
dataset = PoseDataset(ann_file, pose_path, val_ids, subset = ['advancing', 'retreating', 'fleche', 'lunge'])

Action Item Counts {'advancing': 8, 'retreating': 6, 'fleche': 2, 'lunge': 7}


In [57]:
dataset.pose_labels

[('GREEN_BIDA_RUS_vs_BERTA_HUN__22:right:24:32', 'advancing'),
 ('GREEN_BIDA_RUS_vs_BERTA_HUN__17:left:14:29', 'retreating'),
 ('GREEN_BIDA_RUS_vs_BERTA_HUN__14:left:24:34', 'retreating'),
 ('GREEN_BIDA_RUS_vs_BERTA_HUN__6:right:29:36', 'lunge'),
 ('GREEN_BIDA_RUS_vs_BERTA_HUN__2:left:31:39', 'retreating'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__15:left:26:35', 'lunge'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__12:right:22:40', 'advancing'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__10:left:39:46', 'fleche'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__8:right:13:20', 'lunge'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__5:right:0:25', 'advancing'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__4:right:0:14', 'retreating'),
 ('BLUE_ALEXANIN_KAZ_vs_BOREL_FRA__4:left:29:37', 'advancing'),
 ('BARDENET_FRA_vs_CANNONE_FRA__4:left:20:28', 'advancing'),
 ('BARDENET_FRA_vs_CANNONE_FRA__3:left:23:32', 'lunge'),
 ('BARDENET_FRA_vs_LIMARDO_GASCON_VEN__8:right:38:50', 'lunge'),
 ('BARDENET_FRA_vs_LIMARDO_GASCON_VEN__2:left:6:31', 'advancing')

(tensor([[[ 2.1810e-01, -1.8150e-01, -5.1929e-01,  4.4498e-01, -3.8933e-01,
            1.1003e-01,  6.6490e-01,  4.3513e-02,  1.4381e+00, -3.6151e-01,
           -1.0721e+00, -3.4841e-01, -7.3302e-03,  3.8814e-01,  3.5094e-01,
           -1.6550e-04,  4.4455e-01,  1.2414e+00,  1.2185e-02, -2.6826e-01,
            3.1255e-01,  3.0913e-01, -3.6451e-01, -8.4437e-02, -1.2006e-01,
            1.9031e-01,  2.4545e-01,  6.6353e-02, -6.3859e-01,  5.1915e-01,
           -4.6797e-01,  1.1918e+00, -3.1145e-01, -8.1848e-01,  8.8264e-01,
           -2.2600e-02,  5.8654e-03, -4.0354e-03,  6.8581e-02, -7.4934e-01,
            1.2965e-02,  1.3655e+00,  3.3225e-01, -3.1337e-01, -1.0402e-01,
           -1.3228e-01,  6.4482e-01,  1.2166e-01, -1.9875e-01, -6.2479e-02,
            1.3898e-01,  3.1026e-01,  5.9983e-01, -5.4693e-01, -5.2268e-01,
            4.7994e-01, -1.7369e-01,  6.1162e-02, -3.0826e-01,  1.3331e-02,
           -2.9080e-01,  3.1978e-01, -2.4767e-01,  6.7538e-01, -4.6452e-03,
            

In [6]:
train = DataLoader(dataset, collate_fn=collate_fn, batch_size=10)
data_pack, labels = next(iter(train))
packed_seq = torch.nn.utils.rnn.pack_padded_sequence(data_pack[0], data_pack[1], batch_first=True, enforce_sorted=False)
packed_seq.data.shape, packed_seq.batch_sizes

#90 is the sum of the true sequence lengths
#It aggregates the sequences and batches them along elements of the sequence while keeping track of how many elements are in each batch 

  poses = torch.Tensor([tup[1].reshape(-1, 128) for tup in pose_emb[int(start_frame) +1:int(end_frame) + 1]])


(torch.Size([90, 128]),
 tensor([10, 10, 10, 10, 10, 10,  9,  7,  4,  3,  2,  2,  1,  1,  1]))

In [7]:
class GRUNet(nn.Module):
    def __init__(self, emb_dim, hidden_dim, layers, num_classes, dropout):
        super().__init__()
        self.gru = nn.GRU(emb_dim, hidden_dim, num_layers=layers, batch_first=True,
        bidirectional=True)
        self.fc = nn.Sequential(
            nn.BatchNorm1d(hidden_dim*2),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim*2, hidden_dim*2),
            nn.ReLU(), 
            nn.BatchNorm1d(hidden_dim*2),
            nn.Dropout(p=dropout), 
            nn.Linear(hidden_dim*2, num_classes)


        )

    def forward(self, x_pack):
        x, x_len = x_pack
        packed_x = nn.utils.rnn.pack_padded_sequence(x, x_len, batch_first=True, enforce_sorted=False)
        encoded_x = self.gru(packed_x)[0]
        unpacked_encoded_x, lens = nn.utils.rnn.pad_packed_sequence(encoded_x, batch_first=True)
        out = F.max_pool1d(unpacked_encoded_x.permute(0, 2, 1), unpacked_encoded_x.shape[1]).squeeze(2)
        decoded_x = self.fc(out)
        
        return F.softmax(decoded_x)


In [8]:
#model parameters
hidden_size = 200
input_size = 128
num_layers = 50
num_classes = 4
dropout =.1
model = GRUNet(input_size, hidden_size, num_layers, num_classes, dropout)
#training parameters
train_dataloader = DataLoader(dataset, collate_fn=collate_fn, batch_size=10)
validation_dataloader = None
N_EPOCH = 40
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
device = "cuda:0"

In [9]:

def get_loss_and_correct(model, batch, criterion, device):
    data, target = batch
    target = target.long().to(device)
    data = (data[0].to(device), data[1])
    pred = model(data)
    classes = pred.max(dim = 1)[1]
    loss = criterion(pred, target)
    total = len(target) 
    correct = (classes == target).sum()
    size = total
    return loss, correct, size


def step(loss, optimizer):
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    pass

In [10]:
import wandb

In [11]:
train_losses = []
train_accuracies = []
validation_losses = []
validation_accuracies = []


model = model.to(device)
criterion = criterion.to(device)
pbar = tqdm(range(N_EPOCH))

def refresh_loss(l, a):
    pbar.set_description(f'Train Loss {l}, Train Acc {a}')
    pbar.refresh()

wandb.init(project="test-project", entity='carpfire')

wandb.config = {
    'learning_rate':.001,
    'epochs':N_EPOCH,
    'batch_size':10
}


for e in pbar:
    total_train_loss = 0.0
    train_correct = []
    total_validation_loss = 0.0
    validation_correct = []
    model.train()
    for batch in train_dataloader:
        loss, correct, size = get_loss_and_correct(model, batch, criterion, device)
        step(loss, optimizer)
        total_train_loss += loss.item()
        train_correct.append((correct/size).item())
    # with torch.no_grad():
    #     for batch in validation_dataloader:
    #         loss, correct, size = get_loss_and_correct(model, batch, criterion, device)
    #         total_validation_loss += loss.item()
    #         validation_correct.append((correct/size).item())

    train_len = len(train_correct)
    #val_len = len(validation_correct)    
    mean_train_loss = total_train_loss / (len(train_dataloader))
    train_accuracy = sum(train_correct)/train_len
    refresh_loss(mean_train_loss, train_accuracy)
    wandb.log({"loss":mean_train_loss})
    #mean_validation_loss = total_validation_loss / (len(test_ds))
    #validation_accuracy = sum(validation_correct) /val_len
    train_losses.append(mean_train_loss)
    #validation_losses.append(mean_validation_loss)

    train_accuracies.append(train_accuracy)
    # validation_accuracies.append(validation_accuracy)
    wandb.watch(model)
    

  0%|          | 0/40 [00:00<?, ?it/s]Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: carpfire. Use `wandb login --relogin` to force relogin


  return F.softmax(decoded_x)
Train Loss 1.30840060540608, Train Acc 0.42500001032437595: 100%|██████████| 40/40 [03:20<00:00,  5.02s/it]  


Minimal Example with the data loaders set. What do I want know that I didn't have before.

- Ease of tweaking, experimentation 
    - Modeling:
        - Easy to experiment with different models/hyperparameters
    - Data Loading
        - Easy Refinement and One Shot Design 
- Easy Documentation


