# MOT16 Challenge

### Imports

In [1]:
root_dir = "./cv3dst_exercise/"
gnn_root_dir = "./cv3dst_gnn_exercise/"

%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
sys.path.append(os.path.join(gnn_root_dir, 'src'))


import matplotlib.pyplot as plt
import numpy as np
import time
from tqdm.autonotebook import tqdm

import torch
from torch.utils.data import DataLoader

from tracker.data_track import MOT16Sequences
from tracker.tracker import Tracker, ReIDTracker
from tracker.utils import run_tracker, cosine_distance, colors
from scipy.optimize import linear_sum_assignment as linear_assignment
from collections import defaultdict
import os.path as osp

import motmetrics as mm
mm.lap.default_solver = 'lap'

train_db = torch.load(osp.join(gnn_root_dir, 'data/preprocessed_data_train_2.pth')) # training set with pre-computed boxes and reid embeddings
test_db = torch.load(osp.join(gnn_root_dir, 'data/preprocessed_data_test_2.pth'))

  app.launch_new_instance()


### MPNTracker Class
overriding from the base class:
- data_association: now uses AssignmentSimilarityNet to compute distance matrix between current frame detection and existing tracks
- update_tracks: taking into account the tolerance, meaning that the unmatched tracks are not deleted immediately but kept for up to tol frames
- update_results: only store active track boxes

In [2]:
_UNMATCHED_COST=255

class MPNTracker(ReIDTracker):
    def __init__(self, patience, assign_net, *args, **kwargs):
        self.assign_net = assign_net
        self.patience = patience
        super().__init__(*args, **kwargs)

    def data_association(self, boxes, scores, pred_features):
        if self.tracks:
            track_boxes = torch.stack([t.box for t in self.tracks], axis=0).cuda()
            track_features = torch.stack([t.get_feature() for t in self.tracks], axis=0).cuda()

            # Hacky way to recover the timestamps of boxes and tracks
            curr_t = self.im_index * torch.ones((pred_features.shape[0],)).cuda()
            track_t = torch.as_tensor([self.im_index - t.inactive - 1 for t in self.tracks]).cuda()

            ########################
            #### TODO starts
            ########################

            # Do a forward pass through self.assign_net to obtain our costs.
            # Note: self.assign_net will return unnormalized probabilities.
            # Make sure to apply the sigmoid function to them!

            pred_sim = self.assign_net(track_features, pred_features.cuda(), track_boxes, boxes.cuda(), track_t, curr_t)
            pred_sim = torch.sigmoid(pred_sim)

            ########################
            #### TODO ends
            ########################

            pred_sim = pred_sim[-1].cpu().numpy()  # Use predictions at last message passing step
            distance = (1- pred_sim)

            # Do not allow matches when sim < 0.5, to avoid low-confident associations
            distance = np.where(pred_sim < 0.5, _UNMATCHED_COST, distance)

            # Perform Hungarian matching.
            row_idx, col_idx = linear_assignment(distance)
            self.update_tracks(row_idx, col_idx,distance, boxes, scores, pred_features)


        else:
            # No tracks exist.
            self.add(boxes, scores, pred_features)

    def update_results(self):
        """Only store boxes for tracks that are active"""
        for t in self.tracks:
            if t.id not in self.results.keys():
                self.results[t.id] = {}
            if t.inactive == 0: # Only change
                self.results[t.id][self.im_index] = np.concatenate([t.box.cpu().numpy(), np.array([t.score])])

        self.im_index += 1

    def update_tracks(self, row_idx, col_idx, distance, boxes, scores, pred_features):
        track_ids = [t.id for t in self.tracks]

        unmatched_track_ids = []
        seen_track_ids = []
        seen_box_idx = []
        for track_idx, box_idx in zip(row_idx, col_idx):
            costs = distance[track_idx, box_idx]
            internal_track_id = track_ids[track_idx]
            seen_track_ids.append(internal_track_id)
            if costs == _UNMATCHED_COST:
                unmatched_track_ids.append(internal_track_id)

            else:
                self.tracks[track_idx].box = boxes[box_idx]
                self.tracks[track_idx].add_feature(pred_features[box_idx])

                # Note: the track is matched, therefore, inactive is set to 0
                self.tracks[track_idx].inactive=0
                seen_box_idx.append(box_idx)


        unseen_track_ids = set(track_ids) - set(seen_track_ids)
        unmatched_track_ids.extend(list(unseen_track_ids))
        ##################
        ### TODO starts
        ##################

        # Update the `inactive` attribute for those tracks that have
        # not been matched. kill those for which the inactive parameter
        # is > self.patience
        new_tracks = [];
        for t in self.tracks:
            if t.id in unmatched_track_ids:
                t.inactive += 1
            if t.inactive <= self.patience:
                new_tracks.append(t)

        self.tracks = new_tracks
        ##################
        ### TODO ends
        ##################

        new_boxes_idx = set(range(len(boxes))) - set(seen_box_idx)
        new_boxes = [boxes[i] for i in new_boxes_idx]
        new_scores = [scores[i] for i in new_boxes_idx]
        new_features = [pred_features[i] for i in new_boxes_idx]
        self.add(new_boxes, new_scores, new_features)

    # new data_association for full MLP network
    def data_association_new(self, boxes, scores, pred_features, isLastFrame):
        if self.tracks:
            track_boxes = torch.stack([t.box for t in self.tracks], axis=0).cuda()
            track_features = torch.stack([t.get_feature() for t in self.tracks], axis=0).cuda()

            # Hacky way to recover the timestamps of boxes and tracks
            curr_t = self.im_index * torch.ones((pred_features.shape[0],)).cuda()
            track_t = torch.as_tensor([self.im_index - t.inactive - 1 for t in self.tracks]).cuda()

            ########################
            #### TODO starts
            ########################

            # Do a forward pass through self.assign_net to obtain our costs.
            # Note: self.assign_net will return unnormalized probabilities.
            # Make sure to apply the sigmoid function to them!

            pred_sim = self.assign_net(track_features, pred_features.cuda(), track_boxes, boxes.cuda(), track_t, curr_t)
            pred_sim = torch.sigmoid(pred_sim)

            ########################
            #### TODO ends
            ########################

            pred_sim = pred_sim[-1].cpu().numpy()  # Use predictions at last message passing step
            distance = (1- pred_sim)

            # Do not allow matches when sim < 0.5, to avoid low-confident associations
            distance = np.where(pred_sim < 0.5, _UNMATCHED_COST, distance)

            # Perform Hungarian matching.
            row_idx, col_idx = linear_assignment(distance)
            self.update_tracks(row_idx, col_idx,distance, boxes, scores, pred_features)


        else:
            # No tracks exist.
            self.add(boxes, scores, pred_features)

    # new step for full MLP network
    def step_new(self, frame, isLastFrame=False):
        """This function should be called every timestep to perform tracking with a blob
        containing the image information.
        """
        boxes = frame['det']['boxes']
        scores = frame['det']['scores']
        reid_feats= frame['det']['reid'].cpu()
        self.data_association_new(boxes, scores, reid_feats, isLastFrame)
        # new update_results()
        for t in self.tracks:
            if t.id not in self.results.keys():
                self.results[t.id] = {}
            if t.inactive == 0: # Only change
                self.results[t.id][self.im_index] = np.concatenate([t.box.cpu().numpy(), np.array([t.score])])

            self.im_index += 1


## AssignNet
### BipartiteNeuralMessagePassingLayer
network for one message passing step

In [3]:
from torch import nn

class BipartiteNeuralMessagePassingLayer(nn.Module):
    def __init__(self, node_dim, edge_dim, dropout=0.):
        super().__init__()

        edge_in_dim  = 2*node_dim + 2*edge_dim # 2*edge_dim since we always concatenate initial edge features
        self.edge_mlp = nn.Sequential(*[nn.Linear(edge_in_dim, int(edge_in_dim/2)), nn.ReLU(), nn.Dropout(dropout),
                                        nn.Linear(int(edge_in_dim/2), edge_dim), nn.ReLU(), nn.Dropout(dropout)])

        node_in_dim  = node_dim + edge_dim
        self.node_mlp = nn.Sequential(*[nn.Linear(node_in_dim, int(node_dim/2)), nn.ReLU(), nn.Dropout(dropout),
                                        nn.Linear(int(node_dim/2), node_dim), nn.ReLU(), nn.Dropout(dropout)])

    def edge_update(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        """
        Node-to-edge updates, as descibed in slide 71, lecture 5.
        Args:
            edge_embeds: torch.Tensor with shape (|A|, |B|, 2 x edge_dim)
            nodes_a_embeds: torch.Tensor with shape (|A|, node_dim)
            nodes_b_embeds: torch.Tensor with shape (|B|, node_dim)

        returns:
            updated_edge_feats = torch.Tensor with shape (|A|, |B|, edge_dim)
        """

        n_nodes_a, n_nodes_b, _  = edge_embeds.shape
        _, node_dim = nodes_a_embeds.shape

        ########################
        #### TODO starts
        ########################

        nodes_a_embeds_repeated =  nodes_a_embeds.reshape((n_nodes_a, 1, node_dim)).repeat_interleave(n_nodes_b, axis = 1)  # |A|, |B|, node_dim
        nodes_b_embeds_repeated =  nodes_b_embeds.reshape((1, n_nodes_b, node_dim)).repeat_interleave(n_nodes_a, axis = 0)  # |A|, |B|, node_dim
        edge_in = torch.cat((nodes_a_embeds_repeated, edge_embeds, nodes_b_embeds_repeated), 2) # |A|, |B|, 2*edge_dim + 2*node_dim

        ########################
        #### TODO ends
        ########################


        return self.edge_mlp(edge_in)

    def node_update(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        """
        Edge-to-node updates, as descibed in slide 75, lecture 5.

        Args:
            edge_embeds: torch.Tensor with shape (|A|, |B|, edge_dim)
            nodes_a_embeds: torch.Tensor with shape (|A|, node_dim)
            nodes_b_embeds: torch.Tensor with shape (|B|, node_dim)

        returns:
            tuple(
                updated_nodes_a_embeds: torch.Tensor with shape (|A|, node_dim),
                updated_nodes_b_embeds: torch.Tensor with shape (|B|, node_dim)
                )
        """

        ########################
        #### TODO starts
        ########################
        nodes_a, nodes_b, edge_dim = edge_embeds.shape

        # NOTE: Use 'sum' as aggregation function
        a_edges = torch.sum(edge_embeds, 1).reshape((nodes_a, edge_dim))
        b_edges = torch.sum(edge_embeds, 0).reshape((nodes_b, edge_dim))

        nodes_a_in = torch.cat((nodes_a_embeds, a_edges), 1)
        nodes_b_in = torch.cat((nodes_b_embeds, b_edges), 1)

        ########################
        #### TODO ends
        ########################

        nodes_a = self.node_mlp(nodes_a_in)
        nodes_b = self.node_mlp(nodes_b_in)

        return nodes_a, nodes_b

    def forward(self, edge_embeds, nodes_a_embeds, nodes_b_embeds):
        edge_embeds_latent = self.edge_update(edge_embeds, nodes_a_embeds, nodes_b_embeds)
        nodes_a_latent, nodes_b_latent = self.node_update(edge_embeds_latent, nodes_a_embeds, nodes_b_embeds)

        return edge_embeds_latent, nodes_a_latent, nodes_b_latent

### AssignmentSimilarityNet
Given reid embedding, box coordinate and timestamp of past track and current frame detections; classify edges.

In [4]:
def euclidean_distance(input1, input2):
        """Computes l2 distance.
        Args:
            input1 (torch.Tensor): 2-D feature matrix.
            input2 (torch.Tensor): 2-D feature matrix.
        Returns:
            torch.Tensor: distance matrix.
        """
        input1 = torch.nn.functional.normalize(input1, p=2, dim=1)
        input2 = torch.nn.functional.normalize(input2, p=2, dim=1)
        input2_t = input2.t()
        distmat = torch.zeros(size=(input1.shape[0], input2_t.shape[1])).cuda()
        distmat += torch.mm(torch.square(input1), torch.ones(size=input2_t.shape).cuda())
        distmat += -2*torch.mm(input1, input2_t)
        distmat += torch.mm(torch.ones(size=input1.shape).cuda(), torch.square(input2_t))
        return distmat

In [5]:
from torch.nn import functional as F

class AssignmentSimilarityNet(nn.Module):
    def __init__(self, reid_network, node_dim, edge_dim, reid_dim, edges_in_dim, edges_hidden_dim, num_steps, dropout=0.):
        super().__init__()
        self.reid_network = reid_network
        self.graph_net = BipartiteNeuralMessagePassingLayer(node_dim=node_dim, edge_dim=edge_dim, dropout=dropout)
        self.num_steps = num_steps
        self.cnn_linear = nn.Sequential(*[nn.Linear(reid_dim, int(reid_dim/4)), nn.ReLU(), nn.Dropout(dropout), nn.Linear(int(reid_dim/4), node_dim), nn.ReLU(),nn.Dropout(dropout)])
        self.edge_in_mlp = nn.Sequential(*[nn.Linear(edges_in_dim, edges_hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(edges_hidden_dim, edges_hidden_dim), nn.ReLU(),nn.Dropout(dropout), nn.Linear(edges_hidden_dim, edge_dim), nn.ReLU(),nn.Dropout(dropout)])
        self.classifier = nn.Sequential(*[nn.Linear(edge_dim, int(edge_dim/2)), nn.ReLU(), nn.Linear(int(edge_dim/2), 1)])


    def compute_edge_feats(self, track_coords, current_coords, track_t, curr_t):
        """
        Computes initial edge feature tensor

        Args:
            track_coords: track's frame box coordinates, given by top-left and bottom-right coordinates
                          torch.Tensor with shape (num_tracks, 4)
            current_coords: current frame box coordinates, given by top-left and bottom-right coordinates
                            has shape (num_boxes, 4)

            track_t: track's timestamps, torch.Tensor with with shape (num_tracks, )
            curr_t: current frame's timestamps, torch.Tensor withwith shape (num_boxes,)


        Returns:
            tensor with shape (num_trakcs, num_boxes, 5) containing pairwise
            position and time difference features
        """

        ########################
        #### TODO starts
        ########################
        n_a, _ = track_coords.shape
        n_b, _ = current_coords.shape

        # NOTE 1: we recommend you to use box centers to compute distances
        # in the x and y coordinates.

        # NOTE 2: Check out the code inside train_one_epoch function and
        # LongTrackTrainingDataset class a few cells below to debug this

        # convert to shape (num_trakcs, num_boxes, 5)
        track_coords_reshaped = track_coords.reshape((n_a, 1, 4)).repeat_interleave(n_b, axis = 1)
        current_coords_reshaped = current_coords.reshape((1, n_b, 4)).repeat_interleave(n_a, axis = 0)
        track_t_reshaped = track_t.reshape((n_a, 1, 1)).repeat_interleave(n_b, axis = 1)
        curr_t_reshaped = curr_t.reshape((1, n_b, 1)).repeat_interleave(n_a, axis = 0)

        current_coords_reshaped_h = current_coords_reshaped[:,:,3] - current_coords_reshaped[:,:,1]
        current_coords_reshaped_w = current_coords_reshaped[:,:,2] - current_coords_reshaped[:,:,0]
        track_coords_reshaped_h = track_coords_reshaped[:,:,3] - track_coords_reshaped[:,:,1]
        track_coords_reshaped_w = track_coords_reshaped[:,:,2] - track_coords_reshaped[:,:,0]

        edge_feats = torch.zeros((n_a,n_b,5))
        edge_feats[:, :, 0] = 2*(current_coords_reshaped[:,:,0]-track_coords_reshaped[:,:,0])/(track_coords_reshaped_h+current_coords_reshaped_h)
        edge_feats[:, :, 1] = 2*(current_coords_reshaped[:,:,1]-track_coords_reshaped[:,:,1])/(track_coords_reshaped_h+current_coords_reshaped_h)
        edge_feats[:, :, 2] = torch.log(track_coords_reshaped_h/current_coords_reshaped_h)
        edge_feats[:, :, 3] = torch.log(track_coords_reshaped_w/current_coords_reshaped_w)
        edge_feats[:, :, 4] = curr_t_reshaped[:,:,0]-track_t_reshaped[:,:,0]

        ########################
        #### TODO ends
        ########################

        return edge_feats # has shape (num_trakcs, num_boxes, 5)

    def forward(self, track_app, current_app, track_coords, current_coords, track_t, curr_t):
        """
        Args:
            track_app: track's reid embeddings, torch.Tensor with shape (num_tracks, 512)
            current_app: current frame detections' reid embeddings, torch.Tensor with shape (num_boxes, 512)
            track_coords: track's frame box coordinates, given by top-left and bottom-right coordinates
                          torch.Tensor with shape (num_tracks, 4)
            current_coords: current frame box coordinates, given by top-left and bottom-right coordinates
                            has shape (num_boxes, 4)

            track_t: track's timestamps, torch.Tensor with shape (num_tracks, )
            curr_t: current frame's timestamps, torch.Tensor with shape (num_boxes,)

        Returns:
            classified edges: torch.Tensor with shape (num_steps, num_tracks, num_boxes),
                             containing at entry (step, i, j) the unnormalized probability that track i and
                             detection j are a match, according to the classifier at the given neural message passing step
        """

        # initial edge embeddings
        dist_reid = euclidean_distance(track_app, current_app)
        pos_edge_feats = self.compute_edge_feats(track_coords, current_coords, track_t, curr_t)
        edge_feats = torch.cat((pos_edge_feats.cuda(), dist_reid.unsqueeze(-1)), dim=-1)
        edge_embeds = self.edge_in_mlp(edge_feats)
        initial_edge_embeds = edge_embeds.clone()

        # Get initial node embeddings, reduce dimensionality from 512 to node_dim
        track_embeds = F.relu(self.cnn_linear(track_app))
        curr_embeds =F.relu(self.cnn_linear(current_app))

        classified_edges = []
        for _ in range(self.num_steps):
            edge_embeds = torch.cat((edge_embeds, initial_edge_embeds), dim=-1)
            edge_embeds, track_embeds, curr_embeds = self.graph_net(edge_embeds=edge_embeds,
                                                                    nodes_a_embeds=track_embeds,
                                                                    nodes_b_embeds=curr_embeds)

            classified_edges.append(self.classifier(edge_embeds))

        return torch.stack(classified_edges).squeeze(-1)

In [6]:
from gnn.dataset import LongTrackTrainingDataset
from torch.utils.data import DataLoader
from gnn.trainer import train_one_epoch

MAX_PATIENCE = 20
MAX_EPOCHS = 20
EVAL_FREQ = 1


# Define our model, and init
assign_net = AssignmentSimilarityNet(reid_network=None, # Not needed since we work with precomputed features
                                     node_dim=32,
                                     edge_dim=16,
                                     reid_dim=512,
                                     edges_in_dim=6,
                                     edges_hidden_dim=18,
                                     num_steps=15).cuda()

# We only keep two sequences for validation. You can
dataset = LongTrackTrainingDataset(dataset='MOT16-train',
                                   db=train_db,
                                   root_dir= osp.join(root_dir, 'data/MOT16'),
                                   max_past_frames = MAX_PATIENCE,
                                   vis_threshold=0.25)

data_loader = DataLoader(dataset, batch_size=8, collate_fn = lambda x: x,
                         shuffle=True, num_workers=0, drop_last=True)
device = torch.device('cuda')
optimizer = torch.optim.Adam(assign_net.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5)

In [7]:
best_idf1 = 0.
for epoch in range(1, MAX_EPOCHS + 1):
    print(f"-------- EPOCH {epoch:2d} --------")
    train_one_epoch(model = assign_net, data_loader=data_loader, optimizer=optimizer, print_freq=100)
    scheduler.step()

    if epoch % EVAL_FREQ == 0:
        tracker =  MPNTracker(assign_net=assign_net.eval(), obj_detect=None, patience=MAX_PATIENCE)
        val_sequences = MOT16Sequences('MOT16-val2', osp.join(root_dir, 'data/MOT16'), vis_threshold=0.)
        res = run_tracker(val_sequences, db=train_db, tracker=tracker, output_dir=None)
        idf1 = res.loc['OVERALL']['idf1']
        if idf1 > best_idf1:
            best_idf1 = idf1
            torch.save(assign_net.state_dict(), osp.join(root_dir, 'output', 'best_ckpt.pth'))


-------- EPOCH  1 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 1.338. Accuracy: 0.938. Recall: 0.448. Precision: 0.345
Iter 200. Loss: 0.163. Accuracy: 0.993. Recall: 0.978. Precision: 0.917
Iter 300. Loss: 0.066. Accuracy: 0.996. Recall: 0.992. Precision: 0.946
Iter 400. Loss: 0.033. Accuracy: 0.999. Recall: 0.996. Precision: 0.981
Iter 500. Loss: 0.025. Accuracy: 0.999. Recall: 0.997. Precision: 0.988
Iter 600. Loss: 0.018. Accuracy: 0.999. Recall: 0.997. Precision: 0.989

Tracking: MOT16-02
Tracks found: 117
Runtime for MOT16-02: 15.9 s.
Tracking: MOT16-11
Tracks found: 97
Runtime for MOT16-11: 18.9 s.
Runtime for all sequences: 34.8 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 48.8% 69.2% 37.6% 52.2% 96.1%  62 11 38 13 390  8873 108  216 49.6% 0.094
MOT16-11 70.5% 77.7% 64.5% 80.2% 96.6%  75 44 24  7 266  1871  37   89 77.0% 0.083
OVERALL  56.9% 72.9% 46.7% 61.7% 96.3% 137 55 62 20 656 10744 145  305 58.8% 0.089
-------- EPOCH  2 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.019. Accuracy: 0.999. Recall: 0.997. Precision: 0.990
Iter 200. Loss: 0.016. Accuracy: 0.999. Recall: 0.999. Precision: 0.992
Iter 300. Loss: 0.013. Accuracy: 0.999. Recall: 0.998. Precision: 0.993
Iter 400. Loss: 0.018. Accuracy: 0.999. Recall: 0.998. Precision: 0.990
Iter 500. Loss: 0.016. Accuracy: 0.999. Recall: 0.997. Precision: 0.992
Iter 600. Loss: 0.013. Accuracy: 0.999. Recall: 0.998. Precision: 0.991

Tracking: MOT16-02
Tracks found: 111
Runtime for MOT16-02: 15.5 s.
Tracking: MOT16-11
Tracks found: 88
Runtime for MOT16-11: 20.0 s.
Runtime for all sequences: 35.5 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 48.5% 68.9% 37.4% 52.2% 96.1%  62 11 38 13 390  8873 102  219 49.6% 0.095
MOT16-11 71.6% 78.9% 65.5% 80.2% 96.6%  75 44 24  7 266  1871  34   90 77.0% 0.083
OVERALL  57.2% 73.2% 46.9% 61.7% 96.3% 137 55 62 20 656 10744 136  309 58.8% 0.090
-------- EPOCH  3 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.013. Accuracy: 0.999. Recall: 0.998. Precision: 0.990
Iter 200. Loss: 0.012. Accuracy: 0.999. Recall: 0.998. Precision: 0.988
Iter 300. Loss: 0.012. Accuracy: 0.999. Recall: 0.999. Precision: 0.993
Iter 400. Loss: 0.011. Accuracy: 0.999. Recall: 0.999. Precision: 0.992
Iter 500. Loss: 0.008. Accuracy: 1.000. Recall: 0.999. Precision: 0.994
Iter 600. Loss: 0.012. Accuracy: 0.999. Recall: 0.998. Precision: 0.993

Tracking: MOT16-02
Tracks found: 92
Runtime for MOT16-02: 15.8 s.
Tracking: MOT16-11
Tracks found: 80
Runtime for MOT16-11: 18.8 s.
Runtime for all sequences: 34.6 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 45.9% 65.1% 35.4% 52.2% 96.1%  62 11 38 13 390  8873 104  221 49.6% 0.095
MOT16-11 71.3% 78.6% 65.2% 80.2% 96.6%  75 44 24  7 266  1871  33   90 77.0% 0.083
OVERALL  55.4% 71.0% 45.4% 61.7% 96.3% 137 55 62 20 656 10744 137  311 58.8% 0.090
-------- EPOCH  4 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.012. Accuracy: 1.000. Recall: 0.999. Precision: 0.994
Iter 200. Loss: 0.013. Accuracy: 0.999. Recall: 0.999. Precision: 0.992
Iter 300. Loss: 0.010. Accuracy: 0.999. Recall: 0.998. Precision: 0.993
Iter 400. Loss: 34042.116. Accuracy: 0.995. Recall: 0.956. Precision: 0.948
Iter 500. Loss: 1.345. Accuracy: 0.903. Recall: 0.668. Precision: 0.375
Iter 600. Loss: 0.511. Accuracy: 0.916. Recall: 0.961. Precision: 0.543

Tracking: MOT16-02
Tracks found: 53
Runtime for MOT16-02: 12.3 s.
Tracking: MOT16-11
Tracks found: 531
Runtime for MOT16-11: 15.0 s.
Runtime for all sequences: 27.3 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN  IDs   FM  MOTA  MOTP
MOT16-02 32.5% 46.2% 25.1% 52.2% 96.1%  62 12 38 12 390  8873 1127  293 44.1% 0.093
MOT16-11 27.6% 30.5% 25.3% 80.2% 96.6%  75 44 25  6 266  1871 5142  103 22.9% 0.083
OVERALL  30.7% 39.3% 25.2% 61.7% 96.3% 137 56 63 18 656 10744 6269  396 36.9% 0.089
-------- EPOCH  5 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.258. Accuracy: 0.961. Recall: 0.981. Precision: 0.688
Iter 200. Loss: 0.237. Accuracy: 0.963. Recall: 0.984. Precision: 0.709
Iter 300. Loss: 0.240. Accuracy: 0.964. Recall: 0.982. Precision: 0.715
Iter 400. Loss: 0.194. Accuracy: 0.971. Recall: 0.985. Precision: 0.749
Iter 500. Loss: 0.201. Accuracy: 0.974. Recall: 0.985. Precision: 0.777
Iter 600. Loss: 0.190. Accuracy: 0.975. Recall: 0.985. Precision: 0.790

Tracking: MOT16-02
Tracks found: 71
Runtime for MOT16-02: 12.9 s.
Tracking: MOT16-11
Tracks found: 65
Runtime for MOT16-11: 15.5 s.
Runtime for all sequences: 28.4 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 36.6% 51.9% 28.2% 52.2% 96.1%  62 12 38 12 390  8873 385  239 48.1% 0.095
MOT16-11 59.1% 65.2% 54.1% 80.2% 96.6%  75 44 25  6 267  1872 581   95 71.2% 0.083
OVERALL  45.0% 57.7% 36.9% 61.6% 96.3% 137 56 63 18 657 10745 966  334 55.9% 0.090
-------- EPOCH  6 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.189. Accuracy: 0.979. Recall: 0.984. Precision: 0.814
Iter 200. Loss: 0.187. Accuracy: 0.979. Recall: 0.987. Precision: 0.815
Iter 300. Loss: 0.173. Accuracy: 0.979. Recall: 0.989. Precision: 0.824
Iter 400. Loss: 0.190. Accuracy: 0.978. Recall: 0.986. Precision: 0.820
Iter 500. Loss: 0.182. Accuracy: 0.980. Recall: 0.986. Precision: 0.822
Iter 600. Loss: 0.172. Accuracy: 0.981. Recall: 0.986. Precision: 0.823

Tracking: MOT16-02
Tracks found: 68
Runtime for MOT16-02: 13.3 s.
Tracking: MOT16-11
Tracks found: 65
Runtime for MOT16-11: 17.1 s.
Runtime for all sequences: 30.4 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 35.8% 50.8% 27.6% 52.2% 96.1%  62 12 38 12 390  8873 372  240 48.1% 0.095
MOT16-11 60.6% 66.8% 55.4% 80.2% 96.6%  75 44 25  6 267  1872 570   92 71.3% 0.083
OVERALL  45.1% 57.8% 37.0% 61.6% 96.3% 137 56 63 18 657 10745 942  332 55.9% 0.090
-------- EPOCH  7 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.181. Accuracy: 0.978. Recall: 0.986. Precision: 0.821
Iter 200. Loss: 0.176. Accuracy: 0.980. Recall: 0.985. Precision: 0.831
Iter 300. Loss: 0.186. Accuracy: 0.980. Recall: 0.983. Precision: 0.826
Iter 400. Loss: 0.184. Accuracy: 0.979. Recall: 0.985. Precision: 0.816
Iter 500. Loss: 0.174. Accuracy: 0.980. Recall: 0.985. Precision: 0.825
Iter 600. Loss: 0.176. Accuracy: 0.980. Recall: 0.988. Precision: 0.827

Tracking: MOT16-02
Tracks found: 68
Runtime for MOT16-02: 12.6 s.
Tracking: MOT16-11
Tracks found: 66
Runtime for MOT16-11: 15.6 s.
Runtime for all sequences: 28.2 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 35.9% 51.0% 27.7% 52.2% 96.1%  62 12 38 12 390  8873 311  239 48.5% 0.095
MOT16-11 62.0% 68.4% 56.8% 80.2% 96.6%  75 44 25  6 267  1872 534   92 71.7% 0.083
OVERALL  45.7% 58.6% 37.5% 61.6% 96.3% 137 56 63 18 657 10745 845  331 56.3% 0.090
-------- EPOCH  8 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.176. Accuracy: 0.981. Recall: 0.986. Precision: 0.826
Iter 200. Loss: 0.169. Accuracy: 0.982. Recall: 0.985. Precision: 0.836
Iter 300. Loss: 0.173. Accuracy: 0.980. Recall: 0.986. Precision: 0.829
Iter 400. Loss: 0.174. Accuracy: 0.981. Recall: 0.987. Precision: 0.828
Iter 500. Loss: 0.180. Accuracy: 0.980. Recall: 0.983. Precision: 0.830
Iter 600. Loss: 0.170. Accuracy: 0.982. Recall: 0.989. Precision: 0.835

Tracking: MOT16-02
Tracks found: 70
Runtime for MOT16-02: 14.1 s.
Tracking: MOT16-11
Tracks found: 67
Runtime for MOT16-11: 15.1 s.
Runtime for all sequences: 29.2 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 36.9% 52.4% 28.5% 52.2% 96.1%  62 12 38 12 390  8873 301  241 48.5% 0.096
MOT16-11 62.7% 69.1% 57.3% 80.2% 96.6%  75 44 25  6 267  1872 505   92 72.0% 0.083
OVERALL  46.6% 59.7% 38.2% 61.6% 96.3% 137 56 63 18 657 10745 806  333 56.4% 0.090
-------- EPOCH  9 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.168. Accuracy: 0.981. Recall: 0.988. Precision: 0.835
Iter 200. Loss: 0.164. Accuracy: 0.982. Recall: 0.987. Precision: 0.835
Iter 300. Loss: 0.175. Accuracy: 0.981. Recall: 0.987. Precision: 0.833
Iter 400. Loss: 0.181. Accuracy: 0.980. Recall: 0.987. Precision: 0.832
Iter 500. Loss: 0.164. Accuracy: 0.983. Recall: 0.989. Precision: 0.838
Iter 600. Loss: 0.171. Accuracy: 0.980. Recall: 0.987. Precision: 0.830

Tracking: MOT16-02
Tracks found: 74
Runtime for MOT16-02: 12.4 s.
Tracking: MOT16-11
Tracks found: 68
Runtime for MOT16-11: 14.6 s.
Runtime for all sequences: 27.0 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 36.5% 51.8% 28.1% 52.2% 96.1%  62 12 38 12 390  8873 304  240 48.5% 0.096
MOT16-11 63.9% 70.5% 58.5% 80.2% 96.6%  75 44 25  6 267  1872 482   92 72.2% 0.083
OVERALL  46.8% 59.9% 38.4% 61.6% 96.3% 137 56 63 18 657 10745 786  332 56.5% 0.090
-------- EPOCH 10 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.168. Accuracy: 0.981. Recall: 0.988. Precision: 0.838
Iter 200. Loss: 0.167. Accuracy: 0.982. Recall: 0.991. Precision: 0.840
Iter 300. Loss: 0.183. Accuracy: 0.979. Recall: 0.986. Precision: 0.831
Iter 400. Loss: 0.164. Accuracy: 0.983. Recall: 0.986. Precision: 0.844
Iter 500. Loss: 0.166. Accuracy: 0.982. Recall: 0.988. Precision: 0.838
Iter 600. Loss: 0.170. Accuracy: 0.982. Recall: 0.986. Precision: 0.842

Tracking: MOT16-02
Tracks found: 73
Runtime for MOT16-02: 12.5 s.
Tracking: MOT16-11
Tracks found: 68
Runtime for MOT16-11: 15.6 s.
Runtime for all sequences: 28.1 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 37.0% 52.5% 28.5% 52.2% 96.1%  62 12 38 12 390  8873 303  239 48.5% 0.095
MOT16-11 63.4% 69.9% 58.0% 80.2% 96.6%  75 44 24  7 267  1872 462   91 72.4% 0.083
OVERALL  46.9% 60.1% 38.4% 61.6% 96.3% 137 56 62 19 657 10745 765  330 56.6% 0.090
-------- EPOCH 11 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.155. Accuracy: 0.984. Recall: 0.988. Precision: 0.842
Iter 200. Loss: 0.171. Accuracy: 0.981. Recall: 0.989. Precision: 0.835
Iter 300. Loss: 0.180. Accuracy: 0.981. Recall: 0.984. Precision: 0.830
Iter 400. Loss: 0.167. Accuracy: 0.983. Recall: 0.986. Precision: 0.839
Iter 500. Loss: 0.179. Accuracy: 0.979. Recall: 0.987. Precision: 0.838
Iter 600. Loss: 0.170. Accuracy: 0.981. Recall: 0.987. Precision: 0.840

Tracking: MOT16-02
Tracks found: 73
Runtime for MOT16-02: 12.0 s.
Tracking: MOT16-11
Tracks found: 67
Runtime for MOT16-11: 14.7 s.
Runtime for all sequences: 26.7 s.
          IDF1   IDP   IDR  Rcll  Prcn  GT MT PT ML  FP    FN IDs   FM  MOTA  MOTP
MOT16-02 37.1% 52.6% 28.6% 52.2% 96.1%  62 12 38 12 390  8873 302  239 48.5% 0.095
MOT16-11 63.0% 69.4% 57.6% 80.2% 96.6%  75 44 24  7 267  1872 459   91 72.5% 0.083
OVERALL  46.8% 60.0% 38.4% 61.6% 96.3% 137 56 62 19 657 10745 761  330 56.6% 0.090
-------- EPOCH 12 --------


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Iter 100. Loss: 0.181. Accuracy: 0.980. Recall: 0.986. Precision: 0.830
Iter 200. Loss: 0.167. Accuracy: 0.981. Recall: 0.988. Precision: 0.836
Iter 300. Loss: 0.167. Accuracy: 0.981. Recall: 0.986. Precision: 0.843
Iter 400. Loss: 0.165. Accuracy: 0.984. Recall: 0.987. Precision: 0.843
Iter 500. Loss: 0.165. Accuracy: 0.982. Recall: 0.988. Precision: 0.833
Iter 600. Loss: 0.177. Accuracy: 0.979. Recall: 0.987. Precision: 0.831

Tracking: MOT16-02


KeyboardInterrupt: 

In [8]:
best_ckpt = torch.load(osp.join(root_dir, 'output', 'best_ckpt.pth'))
assign_net.load_state_dict(best_ckpt)
tracker =  MPNTracker(assign_net=assign_net.eval(), obj_detect=None, patience=MAX_PATIENCE)

## Checking Performance
Save images with gt and tracking result

In [34]:
from cycler import cycler as cy
cyl = cy('ec', colors)
loop_cy_iter = cyl()
styles = defaultdict(lambda: next(loop_cy_iter))
def plotSequences(seq_name, show_gt=True, show_prediction=False, tracker=None, db=None):
    data_dir = os.path.join(root_dir, 'data/MOT16')
    sequences = MOT16Sequences(seq_name, data_dir, load_seg=True)
    for seq in sequences:
        results = None
        if show_prediction and tracker is not None:
            tracker.reset()
            with torch.no_grad():
                for frame in db[str(seq)]:
                    tracker.step(frame)
            results = tracker.get_results()
        for i, frame in enumerate(seq):
            img = frame['img']

            dpi = 150
            fig, ax = plt.subplots(1, dpi=dpi)

            img = img.mul(255).permute(1, 2, 0).byte().numpy()
            width, height, _ = img.shape

            ax.imshow(img, cmap='gray')
            fig.set_size_inches(width / dpi, height / dpi)

            # plot gt
            if show_gt and 'gt' in frame:
                gt = frame['gt']
                for gt_id, box in gt.items():
                    rect = plt.Rectangle(
                      (box[0], box[1]),
                      box[2] - box[0],
                      box[3] - box[1],
                      fill=False,
                      linewidth=1.0)
                    ax.add_patch(rect)

            # tracker
            if results:
                for j, t in results.items():
                    if i in t.keys():
                        t_i = t[i]
                        ax.add_patch(
                            plt.Rectangle(
                                (t_i[0], t_i[1]),
                                t_i[2] - t_i[0],
                                t_i[3] - t_i[1],
                                fill=False,
                                linewidth=1.0, **styles[j]
                            ))
                        ax.annotate(j, (t_i[0] + (t_i[2] - t_i[0]) / 2.0, t_i[1] + (t_i[3] - t_i[1]) / 2.0),
                                    color=styles[j]['ec'], weight='bold', fontsize=6, ha='center', va='center')

            plt.axis('off')
            plt.savefig("./VideoOutput/{}".format(str(i).zfill(4)))
            plt.close(fig)
            plt.cla()
            plt.clf()
            plt.close('all')

In [44]:
# plotSequences(seq_name='MOT16-02', show_gt=True, show_prediction=True, tracker=tracker, db=train_db)
plotSequences(seq_name='MOT16-test', show_gt=True, show_prediction=True, tracker=tracker, db=test_db)

RuntimeError: [enforce fail at ..\c10\core\CPUAllocator.cpp:73] data. DefaultCPUAllocator: not enough memory: you tried to allocate 24883200 bytes. Buy new RAM!

### Test Output

In [9]:
val_sequences = MOT16Sequences('MOT16-test', osp.join(root_dir, 'data/MOT16'), vis_threshold=0.)
run_tracker(val_sequences, db=test_db, tracker=tracker, output_dir=osp.join(root_dir, 'output'))

Tracking: MOT16-01
No GT evaluation data available.
Tracks found: 74
Runtime for MOT16-01: 6.9 s.
Writing predictions to: ./cv3dst_exercise/output\MOT16-01.txt
Tracking: MOT16-08
No GT evaluation data available.
Tracks found: 134
Runtime for MOT16-08: 9.7 s.
Writing predictions to: ./cv3dst_exercise/output\MOT16-08.txt
Tracking: MOT16-12
No GT evaluation data available.
Tracks found: 138
Runtime for MOT16-12: 13.4 s.
Writing predictions to: ./cv3dst_exercise/output\MOT16-12.txt
Runtime for all sequences: 29.9 s.
