In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time
import argparse
import networkx as nx
import os
import pickle

import warnings
warnings.filterwarnings("ignore")

from common_utils.stg_utils import Node, Edge, load_video_params, load_dataset_params
from common_utils.process import graph_to_feature_vector, adj_to_normalized_tensor
from anomaly_generation.graph_corruption import Corruptor
from embedding_training.transformer_classifier import MyNet

VIDEOS_FOLDER = "../videos/"
GRAPH_FOLDER = "../graphs/"

TRAINING_GRAPH_FOLDER = "../data/training_graphs/"
VIDEO_PARAMS_FOLDER = "../data/video_parameters/"

TRAINING_PICKLE = "../data/train_dataset.pt"

In [21]:
ANOMALY_LABEL = 1.
NORMAL_LABEL = 0.
NUM_NODE_FEATURES = 85 #TODO see if you can dynamically infer it
FRAMES_PER_VIDEOCLIP = 500 # about 15 seconds per clip
BATCH_SIZE = 1 #TODO batching problems, not working with > 1

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class GraphDataset(Dataset):
    def __init__(self, training_folder: str, videoparams_folder: str, frames_per_videoclip: int = None):
        self.frames_per_videoclip = frames_per_videoclip #to cut the video in videoclips
        self.__load_data(training_folder, videoparams_folder)
    
    #TODO merge two methods for loading data (__load_data and __load_data_jasmine)
    def __load_data(self, training_folder: str, videoparams_folder: str): 
        # TODO code for reading the graphs saved by Jasmine (make this function more readable?)

        self.tensored_videoclips = list()
        self.labels = list()

        for dataset_path in [file for file in os.listdir(training_folder)]:
            print(dataset_path)
            with open(os.path.join(training_folder, dataset_path), 'rb') as f:
                try:
                    dataset = pickle.load(f)
                except:
                    print(f"File {dataset_path} skipped")
                    continue
            
            dataset_name = dataset_path.split("_")[0]
            
            videoparams_path = os.path.join(videoparams_folder, dataset_name + "_video_params.pickle")
            video2params = load_dataset_params(videoparams_path)
            
            for videoname, graphs in dataset.items():
                video_width, video_height = video2params[videoname]["width"], video2params[videoname]["height"]

                # divide the video in videoclips if specified (many videoclips per videoname), otherwise one video per videoname
                if self.frames_per_videoclip and len(graphs) > self.frames_per_videoclip:
                    graph_seqs = [graphs[x:x+self.frames_per_videoclip] for x in range(0, len(graphs), self.frames_per_videoclip)]
                else:
                    graph_seqs = [graphs]
                
                # each videoclip in pytorch
                for graph_seq in graph_seqs:                
                    self.__process_and_corrupt_video_clip(graph_seq, video_width, video_height)

    def __process_and_corrupt_video_clip(self, graph_seq, width, height):
        #TODO currently, a starting empty token is added to each sequence

        # NORMAL GRAPHS FOR CURRENT VIDEO
        features_and_adjs = [self.__get_empty_token()] + [self.__tensorize(graph) for graph in graph_seq]
        # padding
        while len(features_and_adjs) != self.frames_per_videoclip + 1: # +1 because of the starting token
            features_and_adjs.append(self.__get_empty_token())

        self.tensored_videoclips.append(features_and_adjs)
        self.labels.append(torch.tensor(NORMAL_LABEL))
        
        # CORRUPTED GRAPHS FOR CURRENT VIDEO
        #TODO so far, is_stg must be true because only the distance is implemented on the edge as 'weight'
        corruptor = Corruptor(frame_width=width, frame_height=height)
        corrupted_graph_seq = [corruptor.corrupt_graph(graph) for graph in graph_seq]

        corr_features_and_adjs = [self.__get_empty_token()] + [self.__tensorize(graph) for graph in corrupted_graph_seq]
        # padding
        while len(corr_features_and_adjs) != self.frames_per_videoclip + 1: # +1 because of the starting token
            corr_features_and_adjs.append(self.__get_empty_token())

        self.tensored_videoclips.append(corr_features_and_adjs)
        self.labels.append(torch.tensor(ANOMALY_LABEL))
                
    def __tensorize(self, graph):
        if len(graph.nodes)==0:
            return self.__get_empty_token()
        features = graph_to_feature_vector(graph)
        adj = adj_to_normalized_tensor(nx.adjacency_matrix(graph))
        return features, adj
    
    def __get_empty_token(self):
        num_nodes = 20 #TODO this number in normal/abnormal graphs change everytime, see if defining a constant one is fine
        empty_features = torch.zeros(num_nodes, NUM_NODE_FEATURES)
        empty_adj = torch.zeros(num_nodes, num_nodes)
        return empty_features, empty_adj

    def __len__(self):
        return len(self.tensored_videoclips)

    def __getitem__(self, idx):
        # returning features, adjacency for each frame in the videoclip and the videoclip label
        return self.tensored_videoclips[idx], self.labels[idx]



In [None]:
train_set = GraphDataset(TRAINING_GRAPH_FOLDER, VIDEO_PARAMS_FOLDER, frames_per_videoclip=FRAMES_PER_VIDEOCLIP)
torch.save(train_set, TRAINING_PICKLE)
#train_set = GraphDataset(?????)
print(f"Number of clips: {len(train_set)}")

train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=False)
#test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
train_set = torch.load(TRAINING_PICKLE)
print(f"Number of clips: {len(train_set)}")

Number of clips: 1202


In [28]:
def collate_fn_multi(batch):
    seq_features_and_adjs = [[(features.to(DEVICE), adj.to(DEVICE)) for features, adj in item[0]] for item in batch]
    label = [item[1].to(DEVICE) for item in batch]
    #seq_features_and_adjs = [item[0] for item in batch]
    #label = [item[1] for item in batch]
    return [seq_features_and_adjs, label]


train_dataloader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)#, collate_fn=collate_fn) #TODO fix batching problem
# this collate function works but the problem is when I need to encode one frame at a time (How to do that?)

'''
for batch in iter(train_dataloader):
    seq, label = batch #len(seq) = FRAMES_PER_VIDEOCLIP + 1
    #print(label.shape)
    features, adj = seq[0]
    #print(features.shape)
    #print(adj.shape)
    #break

torch.Size([2])
torch.Size([2, 20, 85])
torch.Size([2, 20, 20])
'''

'\nfor batch in iter(train_dataloader):\n    seq, label = batch #len(seq) = FRAMES_PER_VIDEOCLIP + 1\n    #print(label.shape)\n    features, adj = seq[0]\n    #print(features.shape)\n    #print(adj.shape)\n    #break\n\ntorch.Size([2])\ntorch.Size([2, 20, 85])\ntorch.Size([2, 20, 20])\n'

In [53]:
''' TRAINING PART [WIP]'''
# source (TODO delete) https://n8henrie.com/2021/08/writing-a-transformer-classifier-in-pytorch/


epochs = 10
model = MyNet(
    features_size=NUM_NODE_FEATURES,
    embedding_size=8, #TODO decide a value for embedding vector (MUST BE EVEN and dividible by nhead) a low number is probably enough since most of the features are one-hot encoded ones
    max_length=FRAMES_PER_VIDEOCLIP + 1 #TODO because of the starting token
).to(DEVICE)

criterion = nn.BCELoss()
lr = 1e-4
optimizer = torch.optim.Adam(
    (p for p in model.parameters() if p.requires_grad), lr=lr
)
torch.manual_seed(0)
model.train()

print("starting training")
for epoch in range(epochs):
    epoch_loss = 0
    epoch_correct = 0
    epoch_count = 0
    for batch in iter(train_dataloader):
        seq_features_and_adjs, label = batch #len(seq) = FRAMES_PER_VIDEOCLIP + 1 (if no batches)
        seq_features_and_adjs = [(features.to(DEVICE), adj.to(DEVICE)) for features, adj in seq_features_and_adjs]
        label = label.to(DEVICE)

        prediction = model.forward(seq_features_and_adjs)

        label = label.unsqueeze(1) # or prediction = prediction.squeeze(1)
        loss = criterion(prediction, label)
        label = label.squeeze(1)

        prediction = torch.round(prediction) #0 or 1 
        correct = (prediction == label).sum()
    
        '''
        y_pred_tag = torch.round(torch.sigmoid(y_pred))

        correct_results_sum = (y_pred_tag == y_test).sum().float()
        acc = correct_results_sum/y_test.shape[0]
        acc = torch.round(acc * 100)
        '''

        epoch_correct += correct
        epoch_count += label.size(0)

        epoch_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) #TODO

        optimizer.step()

    print(f"{epoch=} - {epoch_loss=}, epoch accuracy: {epoch_correct / epoch_count}")
    #print(f"{test_epoch_loss=}, "test epoch accuracy: {test_epoch_correct / test_epoch_count}")

starting training
epoch=0 - epoch_loss=918.3763313889503, epoch accuracy: 0.5024958252906799
epoch=1 - epoch_loss=846.5465056747198, epoch accuracy: 0.5174708962440491
epoch=2 - epoch_loss=786.5813430249691, epoch accuracy: 0.559900164604187
epoch=3 - epoch_loss=708.4190086275339, epoch accuracy: 0.6439267992973328
epoch=4 - epoch_loss=611.618703328073, epoch accuracy: 0.7138103246688843
epoch=5 - epoch_loss=414.3290413506329, epoch accuracy: 0.9193011522293091
