#Library Imports

In [None]:
# Installing additional libraries
%%capture
!pip install torch torchvision torchtext torchaudio pytorch-metric-learning torch-lr-finder

In [None]:
import zipfile, os.path
import shutil
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, Subset, DataLoader, WeightedRandomSampler
from PIL import Image
from torchvision import transforms
import random
import torch.nn.functional as F
from typing import Dict, Set, List
from pytorch_metric_learning import distances, losses, miners, reducers
import torch.utils.checkpoint as cp
import math
from torchvision import models
from torch_lr_finder import TrainDataLoaderIter, ValDataLoaderIter, LRFinder
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter
import gc
from google.colab import files

# Data

## Importing the Drive archive
Following passages import and extract the image dataset in the Colab files, if not already present

In [None]:
if(not (os.path.isdir('test') and os.path.isdir('train') and os.path.isdir('queries') and os.path.isfile('annotations_train.csv'))):
  if(not os.path.isfile('dataset.zip')):
    # Download the file directly from Matteo's Drive (The file is public)
    !gdown --id 13tm5l5uwj4zUMhm2YMRx0IKXySaRdxEE
  # Dataset extraction
  with zipfile.ZipFile('dataset.zip', 'r') as zip_ref:
      zip_ref.extractall()
  # Remove the zip file
  os.remove('dataset.zip')
  shutil.rmtree('sample_data')

## Dataset and Sample
A Dataset class for training and testing samples. Samples have the following structure:

| Key         | Value |
| ----------- | ----------- |
| image       | A Tensor containing the image. |
| img_file    | Name of the image file. Used on the result reporting. |
| person_id    | Id of the person in the image. `None` if not available. |
| annotations | Annotations array for the person in the image. `None` if not available. |

If the `csv_file` constructor parameter of `MarketDataset` is not provided, then the ID and the annotations will be not available

[Pytorch tutorial for custom Datasets, Dataloaders and Transforms](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)

Image size: (3, 128, 64)

Output size: (56)

Output size (vector of probabilities): 56

In [None]:
# Class for the Dataset
class MarketDataset(Dataset):
  """Market-1501 (project version) dataset."""
 
  def __init__(self, root, csv_file=None, transform=None, target_transform=None):
      """
      Args:
          csv_file (string): Path to the csv file with annotations. If None, no
              labeling will be added to samples (test samples).
          root (string): Directory with all the images.
          transform (callable, optional): Optional transform to be applied
              on a sample.
      """
      self.annotations_frame = pd.read_csv(csv_file).set_index('id') if csv_file is not None else None
      self.root = root
      self.img_files = os.listdir(root)
      self.transform = transform
      self.target_transform = target_transform
 
  def __len__(self):
      return len(self.img_files)
 
  def __getitem__(self, idx):
      if torch.is_tensor(idx):
          idx = idx.tolist()
 
      # Loading the image from its folder
      img_path = os.path.join(self.root, self.img_files[idx])
      # The image must be in PIL format in order to apply the transformations
      image = Image.open(img_path)
 
      # Creating the sample
      sample = {'image': image, 'img_file': self.img_files[idx]}
 
      if self.annotations_frame is not None:
        # Extracting its PersonID
        person_id = int(self.img_files[idx].partition('_')[0])
        # Extracting corresponding annotations
        annotations = self.annotations_frame.loc[person_id]
        annotations = torch.tensor(annotations).int()
        # Adding to the sample
        sample['person_id'] = person_id
        sample['annotations'] = annotations
      
      # Apply the transformations
      if self.transform:
        sample['image'] = self.transform(sample['image'])
      if self.target_transform:
        sample['annotations'] = self.target_transform(sample['annotations'])
      
      return sample

## Transformations
Typically, a composition of transformations is passed to the Dataset, so the samples are modified when loaded

### Image transformations (data augmentation)
This composition of transformations is applied to the images when loaded by the dataset

Further ideas about transformations: https://www.programmersought.com/article/19232071306/


In [None]:
# Define transformations for the data
data_transform = transforms.Compose([
                          transforms.RandomRotation(degrees=(0,5), center =(62,30)),
                          transforms.RandomAffine(2, translate=(0.05,0.05), scale=None, shear=None),
                          transforms.RandomPerspective(distortion_scale=0.02, fill=(0,0,0)),
                          transforms.ColorJitter(brightness=0.1),
                          transforms.ColorJitter(contrast=0.1),
                          transforms.RandomAutocontrast(p=1),
                          transforms.ColorJitter(saturation=0.1),
                          transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.3),
                          transforms.RandomHorizontalFlip(p=0.5),
                          transforms.ToTensor(),
                          transforms.RandomErasing(scale=(0.01,0.05), value =(1,1,1)),
])

### Label transformations
These transformations can be applied to annotations to switch from label-based annotations to probability-based annotations. The probability vector is a sequence of 0-1 values, where each group of values encode the label with a 1 at the corresponding (local) index. Colors fields could be all ones: in this case, the correct classification is `multicolor`

**Annotation form:**

| Description | Attribute | Labels |
| :----------- | ----------- | ----------- | 
| age | age | young(1), teenager(2), adult(3), old(4) 
| carrying backpack| backpack | no(1), yes(2) | 
| carrying bag | bag | no(1), yes(2) | 
| carrying handbag | handbag | no(1), yes(2) | 
| type of lower-body clothing| clothes | dress(1), pants(2) | 
| length of lower-body clothing | down | long(1), short(2) | 
| sleeve length | up | long(1), short(2) | 
| hair length | hair | short(1), long(2) | 
| wearing hat| hat | no(1), yes(2) | 
| gender | gender | male(1), female(2) | 
| Black upper-body clothing | upblack | no(1), yes(2) | 
| White upper-body clothing | upwhite | no(1), yes(2) | 
| Red upper-body clothing | upred | no(1), yes(2) | 
| Purple upper-body clothing | uppurple | no(1), yes(2) | 
| Yellow upper-body clothing | upyellow | no(1), yes(2) | 
| Gray upper-body clothing | upgray | no(1), yes(2) | 
| Blue upper-body clothing | upblue | no(1), yes(2) | 
| Green upper-body clothing | upgreen | no(1), yes(2) | 
| Black lower-body clothing | downblack | no(1), yes(2) | 
| White lower-body clothing | downwhite | no(1), yes(2) | 
| Pink lower-body clothing | downpink | no(1), yes(2) | 
| Purple lower-body clothing | downpurple | no(1), yes(2) | 
| Yellow lower-body clothing | downyellow | no(1), yes(2) | 
| Gray lower-body clothing | downgray | no(1), yes(2) | 
| Blue lower-body clothing | downblue | no(1), yes(2) | 
| Green lower-body clothing | downgreen | no(1), yes(2) | 
| Brown lower-body clothing | downbrown | no(1), yes(2) |

**Target form:**

| Task | Classes |
| :--- | ------- |
| age | [young, teenager, adult, old] |
| backpack | [no, yes] |
| bag | [no, yes] |
| handbag | [no, yes] |
| clothes | [dress, pants]
| down | [long, short]
| up | [long, short]
| hair | [short, long]
| hat | [no, yes]
| gender | [male, female]
| upcolor | [black, white, red, purple, yellow, gray, blue, green, multicolor]
| downcolor | [black, white, pink, purple, yellow, gray, blue, green, brown, multicolor]

In [None]:
def annotation_to_target(annotation):
  """
    Transforms the labels in the annotation form (as they are encoded in the
    dataset, with the color information spread across several values), into the 
    target form (a single value for each task representing the correct class), 
    which is used in the training for the loss function.
    :param annotation (torch.Tensor) labels in the annotation form.
    :return (torch.Tensor) labels in the target form.
  """
  # Convert the annotation into integers starting from 0
  not_colors = annotation[..., :10] - 1
  upcolors = annotation[..., 10:18] - 1
  downcolors = annotation[..., 18:] - 1
  
  # Handle the multicolors
  upmulticolor = 1 - torch.unsqueeze(torch.sum(upcolors, axis=-1), 0).T
  upcolor = torch.argmax(torch.hstack((upcolors, upmulticolor)), axis=-1)
  upcolor = torch.unsqueeze(upcolor, 0).T

  downmulticolor = 1 - torch.unsqueeze(torch.sum(downcolors, axis=-1), 0).T
  downcolor = torch.argmax(torch.hstack((downcolors, downmulticolor)), axis=-1)
  downcolor = torch.unsqueeze(downcolor, 0).T

  return torch.hstack((not_colors, upcolor, downcolor))

def prediction_to_target(prediction):
  """
    Transform the labels in the prediction form (the one-hot encoding form that
    the network should output) into the target form (a single value for each task 
    representing the correct class) by taking the most probable value for each task.
    :param prediction (torch.Tensor) labels in the prediction form.
    :return (torch.Tensor) labels in the target form.
  """
  tasks_splits = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]
  target = torch.vstack([torch.argmax(task, axis=-1) for task in torch.split(prediction, tasks_splits, dim=-1)]).T
  return target

def target_to_annotation(target):
  """
    Transforms the labels of multiple samples in the target form (a single value 
    for each task representing the correct class), into the annotation form, 
    (as they are encoded in the dataset, with the color information spread 
    across several values)
    :param target (torch.Tensor) labels in the target form.
    :return (torch.Tensor) labels in the annotation form.
  """  
  # Handle the multicolors
  upcolor = target[..., -2]
  upcolors = F.one_hot(upcolor, num_classes=9)[..., :-1]

  downcolor = target[..., -1]
  downcolors = F.one_hot(downcolor, num_classes=10)[..., :-1]
  
  # First part, without colors
  not_colors = target[..., :10]
  
  # Go back to 1-starting values
  return torch.hstack((not_colors, upcolors, downcolors)).int() + 1

### Id transformations
These transformations can be used to normalize the person IDs within a dataset split.

Since the IDs on the dataset are not uniform, and will be randomly divided into different splits, these transformations will normalize them in the interval [0, num_of_ids].

In [None]:
def normalize_pids(pids: set):
  """
  The functions creates a lambda to convert a person ID into its normalized
  version, which is its index in the pids function parameter.

  :param pids(set) The list of all the person IDs in the considered split
  :return A lambda that normalize a pid from the pids list
  """
  pids = list(pids)
  translator = {pid: idx for idx, pid in enumerate(pids)}

  return lambda pid: translator[pid]

## Dataset splitting
As stated in the project assignment:
> Be careful when producing your train and validation splits. All images of the same person should be either in the train or in the validation dataset, otherwise, your validation performance will be higher than test performance.

So, we need a splitter function to handle this.

Moreover, a custom Subset is needed to apply the transformations directly to it and not on the main dataset.

In [None]:
class TransformSubset(Subset):
  """
  Subset of a dataset at specified indices, with optional transformations
  applicable to data or target.
  
  Args:
    dataset (Dataset): The whole Dataset
    indices (sequence): Indices in the whole set selected for subset
    pid_transform: a function that transforms the person_id attribute
  """

  def __init__(self, dataset: Dataset, indices, transform=None, target_transform=None, pid_transform=None) -> None:
    super().__init__(dataset, indices)
    self.transform = transform
    self.target_transform = target_transform
    self.pid_transform = pid_transform

  def __getitem__(self, idx):
    sample = self.dataset[self.indices[idx]]
    if self.transform:
      sample['image'] = self.transform(sample['image'])
    if self.target_transform:
      sample['annotations'] = self.target_transform(sample['annotations'])
    if self.pid_transform:
      sample['person_id'] = self.pid_transform(sample['person_id'])
    return sample

In [None]:
def rand_balanced_split(dataset, proportions):
  """
    Method to randomly split the MarketDataset, grouping by the personID
    :param dataset (MarketDataset) holds market Sample.
    :param proportions (List[float]) proportions to divide the dataset; should sum to 1.
    :return (Tuple[Dataset]) splitted datasets.
  """
  # Grouping by personID
  indices_of_id = {}
  for sample_idx, img_file in enumerate(dataset.img_files):
    person_id = int(img_file.partition('_')[0])
    if person_id in indices_of_id:
      indices_of_id[person_id].append(sample_idx)
    else:
      indices_of_id[person_id] = [sample_idx]
  # Get the annotations for each personID
  target_of_id = {}
  for person_id in indices_of_id:
    annotation = dataset.annotations_frame.loc[person_id]
    target = annotation_to_target(torch.tensor(annotation).int())
    target_of_id[person_id] = target
  # Creating the lists of indices for splitting:
  # The target quantity of samples for each split
  target_counts = np.array(proportions) * len(dataset)
  # Lists of how should the indices be divided among the splits
  samples_splits = [[] for _ in proportions]
  # A collection of the still incomplete splits
  incomplete_splits = [*range(len(proportions))]
  # List, for each split, of a set for each task to check the added classes
  tasks_splits = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]
  unfilled_classes = [[set(range(split)) for split in tasks_splits] for _ in proportions]
  # Randomly iterate over all person_ids:
  for person_id in random.sample(indices_of_id.keys(), len(indices_of_id)):
    indices = indices_of_id[person_id]
    target = target_of_id[person_id]
    # Compute which split is better completed by the target of the random person_id
    best_split_idx = None
    max_score = -1
    # Randomly choose among the max-scoring incomplete splits
    for split_idx in random.sample(incomplete_splits, len(incomplete_splits)):
      split_unfilled_classes = unfilled_classes[split_idx]
      # Score: how many new classes can the new sample fill in the split
      score = 0
      for task_class, task_unfilled_classes in zip(target, split_unfilled_classes):
        # Increase the score if the task_class is still in the set of the unfilled classes
        score += task_class in task_unfilled_classes
      if score > max_score:
        max_score = score
        best_split_idx = split_idx
    # Add the indices of the selected person_id to the best index
    samples_splits[best_split_idx].extend(indices)
    # Remove classes added just now from the unfilled classes
    for task_class, task_unfilled_classes in zip(target, unfilled_classes[best_split_idx]):
      if task_class in task_unfilled_classes:
        task_unfilled_classes.remove(task_class)
    # Reset the unfilled counter when completely emptied
    reset = True
    for split_unfilled_classes in unfilled_classes:
      for task_unfilled_classes in split_unfilled_classes:
        if not len(task_unfilled_classes) == 0:
          reset = False
    if reset:
      unfilled_classes = [[set(range(split)) for split in tasks_splits] for _ in proportions]

    # Remove the split from the incomplete ones when it reaches its capacity
    if len(samples_splits[best_split_idx]) > target_counts[best_split_idx] and len(incomplete_splits) > 1:
      incomplete_splits.remove(best_split_idx)
  
  return tuple(TransformSubset(dataset, indices) for indices in samples_splits)

## Loading the data


### Class balancing (weight)

In [None]:
def get_dataset_targets(dataset):
  """
    Method to recover the targets from the dataset's annotations, without having
    to load the images.
    :param dataset (MarketDataset) the dataset containing the targets.
  """
  if isinstance(dataset, torch.utils.data.Subset):
    # If the dataset is a subset, only some indices are used from the internal original dataset
    indices = dataset.indices
    dataset = dataset.dataset # Replace the subset with the whole dataset
  else:
    # Otherwise, all the indices are used
    indices = range(len(dataset.img_files))
  targets = []
  for idx in indices:
    # Retrieve the target of the considered image without loading it in memory
    img_file = dataset.img_files[idx]
    person_id = int(img_file.partition('_')[0])
    annotation = dataset.annotations_frame.loc[person_id]
    annotation = torch.tensor(annotation).int()
    targets.append(annotation_to_target(annotation))
  return torch.vstack(targets)

In [None]:
def get_dataset_unique_pids(dataset):
  """
    Method to recover the unique person IDs from the dataset's annotations, 
    without having to load the images.
    :param dataset (MarketDataset) The dataset containing the person IDs.
    :return pids (set) The set of all unique person IDs in the dataset
  """
  if isinstance(dataset, torch.utils.data.Subset):
    # If the dataset is a subset, only some indices are used from the internal original dataset
    indices = dataset.indices
    dataset = dataset.dataset # Replace the subset with the whole dataset
  else:
    # Otherwise, all the indices are used
    indices = range(len(dataset.img_files))
  pids = set()
  for idx in indices:
    # Retrieve the target of the considered image without loading it in memory
    img_file = dataset.img_files[idx]
    person_id = int(img_file.partition('_')[0])
    pids.add(person_id)
  return pids

In [None]:
def get_class_weight(targets, weights=None):
  """
    Method to get the class weights, used to balance the samples importance in
    the loss, from the frequencies of the various task-classes in the dataset. 
    The frequencies can be pre-weighted, in the case that the samples are not
    uniformely sampled but extracted with different probabilities.
    :param dataset (Dataset)
    :param weights (torch.Tensor): The sampling weights of the samples
  """
  # Count the frequencies for each task
  tasks_splits = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]
  freq = [torch.bincount(task.reshape(-1), weights, minlength) for task, minlength in zip(torch.split(targets, 1, dim=1), tasks_splits)]
  # Compute the weights as the reciprocal of the normalized frequencies
  # The normalization permit to compare tasks with a different num of classes
  return torch.cat([torch.reciprocal(task / torch.sum(task) * len(task)) for task in freq])

### Dataloaders

The DataLoader objects helps to create batches of samples from the Dataset. The following `get_data` function is used to obtain all needed dataloaders.

- The dataset is split into balanced validation and training
- Data transformations are applied to training data
- Person IDs are normalized separately in both splits
- Balanced class weights are recomputed

In [None]:
# Define the variables with the data from the dataset to support the get_data() function
full_data = MarketDataset(csv_file='annotations_train.csv', root='train', target_transform=annotation_to_target)
test_data = MarketDataset(root='test', transform=transforms.ToTensor())
queries_data = MarketDataset(root='queries', transform=transforms.ToTensor())
# Train and validation split
while True:
  train_data, val_data = rand_balanced_split(full_data, [.9, .1])
  train_targets = get_dataset_targets(train_data)
  train_class_weights = get_class_weight(train_targets)
  # Check that all classes are present at least once in the training set
  if torch.all(torch.isfinite(train_class_weights)):
    break
# Apply the data transformation ONLY on the training set, not to the validation
train_data.transform = data_transform
val_data.transform = transforms.ToTensor()
# Apply the person IDs transformation SEPARATELY on training and validation sets
train_pids = get_dataset_unique_pids(train_data)
train_data.pid_transform = normalize_pids(train_pids)
val_pids = get_dataset_unique_pids(val_data)
val_data.pid_transform = normalize_pids(val_pids)

# Compute the samples' weights as the mean of the its target classes' weights
tasks_splits = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]
# Splitting the class weights into a list with the different tasks
class_weights = list(torch.split(train_class_weights, tasks_splits))
samples_weights = torch.zeros(train_targets.shape[0])
for sample, target in enumerate(train_targets):
  for task, task_class in enumerate(target):
    samples_weights[sample] += class_weights[task][task_class]
samples_weights /= len(tasks_splits)
# Create a sampler that extract more frequently samples presenting low-frequence classes 
sampler = WeightedRandomSampler(samples_weights, int(len(samples_weights) * 1.25), replacement=True)
# Recalculate the class weights taking into account the sampling weights
balanced_class_weights = get_class_weight(train_targets, weights=samples_weights)

# Get a DataLoader object for all the sets
def get_data(batch_size, test_batch_size=128):
  full_loader = DataLoader(full_data, batch_size, shuffle=True, num_workers=2, pin_memory=True)
  train_loader = DataLoader(train_data, batch_size, sampler=sampler, num_workers=2, pin_memory=True)
  val_loader = DataLoader(val_data, test_batch_size, shuffle=False, num_workers=2, pin_memory=True)
  test_loader = DataLoader(test_data, test_batch_size, shuffle=False, num_workers=2, pin_memory=True)
  queries_loader = DataLoader(queries_data, test_batch_size, shuffle=False, num_workers=2, pin_memory=True)
  return {
    'full': full_loader,
    'train': train_loader,
    'val': val_loader,
    'test': test_loader,
    'queries': queries_loader,
    'class_weight': balanced_class_weights,
    'train_pids_count': len(train_pids),
    'val_pids_count': len(val_pids),
  }

# Training Utilities

## Metrics & Logging

In [None]:
# Class that is used to compute the metrics:
# + Accuracy for each sub-task of the attribute recognition
# + Average global accuracy
# + Average mAcc
# + Average mAP
class Metrics():
  def __init__(self, tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10], device='cuda'):
    self.M = [torch.zeros(class_count, class_count, device=device) for class_count in tasks_splits]
    self.device = device
    self.baselines = 1 / torch.tensor(tasks_splits, device=device)
    self.reid_prediction = None
    self.reid_target = None
    self.suggested_threshold = None
  
  def clear(self):
    for C in self.M:
      C.zero_()
    self.reid_prediction = None
    self.reid_target = None
    self.suggested_threshold = None
  
  def add(self, prediction, target):
    # Split the prediction and the target columns of the same task
    prediction = prediction.to(self.device).split(1, dim=-1)
    target = target.to(self.device).split(1, dim=-1)
    with torch.no_grad(): # We require no computation graph
      for C, yp, yt in zip(self.M, prediction, target):
        C += (yt.reshape(-1) * C.shape[1] + yp.reshape(-1)).bincount(minlength=C.numel()).view(C.shape).float()
  
  def add_reid(self, predictions: Dict[str, List], ground_truth: Dict[str, Set], suggested_threshold: int):
    """
    :param predictions: dictionary from query filename to list of test image 
      filenames associated with the query ordered from the most to the least 
      confident prediction. Represents the predictions to be evaluated.
    :param ground_truth: dictionary from query filename to set of test image 
      filenames associated with the query. Represents the ground truth on which 
      to evaluate predictions.
    :param suggested_threshold: a value between 0 and 1 corresponding to the 
      threshold used to compute the predictions.
    """
    self.reid_prediction = predictions
    self.reid_target = ground_truth

  # Computes the global accuracy
  def acc(self):
    return torch.stack([C.diag().sum() / C.sum() for C in self.M])

  # Computes the class-averaged accuracy
  def mAcc(self):
    averages = [C.diag() / C.sum(-1) for C in self.M]
    return torch.stack([average[~torch.isnan(average)].mean() for average in averages])

  # Computes the normalized mAcc, that is set to zero if below the baseline mAcc (1 / # of Classes)
  def mAcc_norm(self):
    mAcc = self.mAcc()
    # Rescaling and set to zero negative values
    mAcc_norm = torch.max(((mAcc - self.baselines) / (1 - self.baselines)),  torch.tensor(0))
    return mAcc_norm

  # Returns the confusion matrix
  # Rows: target classes | Columns: predicted classes
  def confusion_matrices(self):
    return self.M    

  # Willi's evaluator for mAP (mAcc)
  def reid_mAP(self):
    """
    Computes the mAP (https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173) 
    of the predictions with respect to the given ground truth. In person reidentification 
    mAP refers to the mean of the AP over all queries. The AP for a query is the 
    area under the precision-recall curve obtained from the list of predictions 
    considering the ground truth elements as positives and the other ones as negatives.
    """
    m_ap = 0.0
 
    for current_ground_truth_query, current_ground_truth_query_set in self.reid_target.items():
      # No predictions were performed for the current query, AP = 0
      if not current_ground_truth_query in self.reid_prediction:
        continue

      current_ap = 0.0  # The area under the curve for the current sample
      current_predictions_list = self.reid_prediction[current_ground_truth_query]

      # Recall increments of this quantity each time a new correct prediction is encountered in the prediction list
      delta_recall = 1.0 / len(current_ground_truth_query_set)

      # Goes through the list of predictions
      encountered_positives = 0
      for idx, current_prediction in enumerate(current_predictions_list):
        # Each time a positive is encountered, compute the current precition and the area under the curve
        # since the last positive
        if current_prediction in current_ground_truth_query_set:
          encountered_positives += 1
          current_precision = encountered_positives / (idx + 1)
          current_ap += current_precision * delta_recall

      m_ap += current_ap
    # Compute mean over all queries
    m_ap /= len(self.reid_target)
    return m_ap

In [None]:
# Logging functions for the various prints at each epoch
def log_tensorboard(prefix, writer, step, loss, metric):
  writer.add_scalar(f"{prefix}/loss", loss, step)
  writer.add_scalar(f"{prefix}/accuracy", metric.acc().mean().cpu().numpy(), step)
  writer.add_scalar(f"{prefix}/mAccuracy", metric.mAcc().mean().cpu().numpy(), step)

def log(tr_loss, val_loss, tr_metric, val_metric):
  columns = ["Age", "Backpack", "Bag", "Handbag", "Cloth", "Down", "Up", "Hair", "Hat", "Gender", "UpColor", "DownColor"]
  print("==== ATTRIBUTES: Global Accuracy % ====")
  df = pd.DataFrame.from_dict({"Train": tr_metric.acc().cpu().numpy() * 100, "Val": val_metric.acc().cpu().numpy() * 100}, columns=columns, orient='index')
  pd.set_option('display.max_columns', None)
  pd.set_option("display.precision", 1)
  print(df)
  print("Train average Acc:", tr_metric.acc().mean().cpu().numpy())
  print("Val average Acc:", val_metric.acc().mean().cpu().numpy())
  print("==== ATTRIBUTES: Class Averaged Accuracy % ====")
  df = pd.DataFrame.from_dict({"Train": tr_metric.mAcc().cpu().numpy() * 100, "Val": val_metric.mAcc().cpu().numpy() * 100}, columns=columns, orient='index')
  pd.set_option('display.max_columns', None)
  pd.set_option("display.precision", 1)
  print(df)
  print("Train average mAcc:", tr_metric.mAcc().mean().cpu().numpy())
  print("Val average mAcc:", val_metric.mAcc().mean().cpu().numpy())
  print("==== RE-IDENTIFICATION: mean Average Precision % ====")
  print("Train mAP:", tr_metric.reid_mAP())
  print("Val mAP:", val_metric.reid_mAP())
  print("==== Loss ====")
  print(f"Train: {tr_loss}")
  print(f"Validation: {val_loss}")

## Cost functions

### Focal loss

In [None]:
class FocalLoss(torch.nn.Module):
  """
  Implementation of the Focal Loss introduced in 	arXiv:1708.02002v2
  """
  def __init__(self, gamma=2):
    super().__init__()
    self.gamma = gamma
  def forward(self, input: torch.Tensor, target: torch.Tensor, weight=None) -> torch.Tensor:
    '''weight is equivalent to the alpha parameter of Focal loss. It controls the
    importance of the sample, taking into consideration the class imbalance.
    (1-p)**gamma controls instead the difficulty of the sample, reducing the
    importance of easy examples'''
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    cE = torch.nn.CrossEntropyLoss()
    ce_loss = cE(input,target.clone().detach())
    # Get back the probability confidence of the network on the correct samples
    p = torch.exp(-ce_loss)
    # The more common is the class, the less the sample matters (alpha)
    if weight is None:
      alphas = torch.tensor(1)
    else:
      # Convert the targets in the corresponding class weights
      alphas = weight.gather(0, target)
    # The more confident is the network, the less the sample matters (gamma)
    focal_losses = alphas * (1-p)**self.gamma * ce_loss
    # Reduction
    focal_loss = focal_losses.sum() / alphas.sum()
    return focal_loss.to(device)

### Multitask aggregators
These losses aggregators use different loss weighting to combine together the losses computed on multiple tasks

In [None]:
class MultitaskAggregator(torch.nn.Module):
  """
    A generic class for multitask aggregators, defines a common taskwise_losses
    function that compute all the losses in the several tasks defined by tasks_splits.
    Should be subclassed by an aggregator that defines how to aggregate the losses.

    :param loss_fn (function) The loss function used to compute the task-wise losses
    :param tasks_splits (Tensor[int]) A tensor of size (T) where T = number of tasks,
      which contains the number of classes of each task.
    :param weight (Tensor[int]) A tensor of size (C*T) where C = n. of 
      classes of each task, T = number of tasks. It should contain the weights of
      the classes for each task, normalized across single tasks.
  """
  def __init__(self, loss_fn,
               weight: torch.Tensor = None,
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]):
    super().__init__()
    self.tasks_count = len(tasks_splits)
    self.tasks_splits = tasks_splits
    self.loss_fn = loss_fn
    self.weight = weight
  
  def aggregate(self, losses: torch.Tensor):
    return losses

  def forward(self, input: torch.Tensor, target: torch.Tensor):
    """
      :param input (Tensor) A tensor of size (N, C*T) where N = size of the 
        minibatch, C = n. of classes of each task, T = number of tasks. It is a
        one-hot encoding of the prediction.
      :param target (Tensor[int]) A tensor of size (N, T) where N = size of the 
        minibatch, T = number of tasks. It contains the correct class for each task.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Divide the input in T columns of width C
    task_inputs = torch.split(input, [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10], dim=-1)
    # Divide the target into T columns of width 1
    task_targets = torch.split(target, 1, dim=-1)
    # If there aren't weights, a None for each task. Otherwise, divide in chunks
    if self.weight is None:
      task_weight = [None for _ in self.tasks_splits]
    else: 
      task_weight = torch.split(self.weight, self.tasks_splits)

    losses = torch.hstack([
      self.loss_fn(t_input, t_target.flatten().clone().detach())
      for t_input, t_target in zip(task_inputs, task_targets)
    ])
    return self.aggregate(losses)

#### UniformMultitask

In [None]:
class UniformMultitask(MultitaskAggregator):
  """
    A simple aggregators that uniformely sums all the losses together
  """
  def __init__(self, loss_fn,
               weight: torch.Tensor = None,
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]):
    super().__init__(loss_fn, weight, tasks_splits)

  def aggregate(self, losses: torch.Tensor):
    return losses.sum()

#### UncertMultitask

In [None]:
class UncertMultitask(MultitaskAggregator):
  """
    [Task Uncertanty Weighting]
    Originally, a CrossEntropy loss function that can be used for Multi Task Learning.
    Can be used with different losses.
    It employs an uncertainty-based weighting approach to combine the losses of
    the various tasks, as described in the paper
    (Auxiliary Tasks in Multi-task Learning)[https://arxiv.org/pdf/1805.06334.pdf].
  """
  def __init__(self, loss_fn,
               weight: torch.Tensor = None,
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]):
    super().__init__(loss_fn, weight, tasks_splits)

    #Sum to 1, based on the number of classes per each task
    # num_class*12/41
    self.params = torch.nn.Parameter(torch.ones(self.tasks_count))

  def aggregate(self, losses: torch.Tensor):
    return (0.5 / (self.params ** 2) * losses + torch.log(1 + self.params ** 2)).sum()

#### LBTWMultitask

In [None]:
class LBTWMultitask(MultitaskAggregator):
  """
    [Loss-Balanced Task Weighting]
    A method to balance several loss functions for a Multi Task Learning scenario. 
    It employs an learning speed-based weighting approach to combine the losses of
    the various tasks, as described in the paper:
    (Loss-Balanced Task Weighting to Reduce Negative Transfer in Multi-Task Learning)[https://doi.org/10.1609/aaai.v33i01.33019977].
    
    The aim is to reduce the negative transfer by limiting the influence of the
    tasks dominating the training process.
    Poorly trained tasks have ratios closer to 1, contribuiting more to the final loss.
    It's important to call epoch_reset() after each epoch to reset the baseline losses values.

    :param alpha(float) An hyperparameter to control the influence of the weights
  """
  def __init__(self, loss_fn,
               weight: torch.Tensor = None, 
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10], 
               alpha=.5):
    super().__init__(loss_fn, weight, tasks_splits)
    self.alpha = alpha
    self.eps = 1e-10
    self.baseline_losses = None
  
  def epoch_reset(self):
    """Has to be called at the end of every epoch, to reset the baseline values"""
    self.baseline_losses = None

  def aggregate(self, losses: torch.Tensor):
    # If the computed loss is on the first batch of the current epoch, save it: l(0,i)
    if self.baseline_losses is None:
      self.baseline_losses = losses.detach()
    # Compute the losses task weights
    weights = (losses.detach() / (self.baseline_losses + self.eps)) ** self.alpha
    return (losses * weights).sum()

####DTPMultitask

In [None]:
class DTPMultitask(MultitaskAggregator):
  """
    [Dynamic Task Prioritization]
    This loss functions balance strategy is inspired by the same strategy used by
    the focal loss. For each task, a meaningful metric called Key Performance Indicator 
    (KPI) is chosen, which is used to determine the dynamic evolution of the difficulty
    of the tasks as the training progresses. The loss of the most difficult tasks
    is increased, while the loss of the better learned tasks is downweighted.

    (Dynamic Task Prioritization for Multitask Learning)[https://openaccess.thecvf.com/content_ECCV_2018/papers/Michelle_Guo_Focus_on_the_ECCV_2018_paper.pdf].

    :param gamma(float) An hyperparameter to control the reweighting factor
    :param kpi_metric('mAcc'|'mAcc_norm') Which metric should be used as KPI value
    :param df(float) The discount factor for the KPI update
  """
  def __init__(self, loss_fn,
               weight: torch.Tensor = None, 
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10], 
               gamma=1, df=.7, kpi_metric='mAcc_norm', kpi_norm=False, device='cuda'):
    super().__init__(loss_fn, weight, tasks_splits)
    self.gamma = gamma
    self.df = df
    self.metrics = Metrics(device=device)
    self.kpi_metric = kpi_metric
    self.kpi_norm = kpi_norm
    self.eps = 1e-10
    self.kpi = None  # Uninitialized
  
  def forward(self, input: torch.Tensor, target: torch.Tensor):
    """Needs to be overridden in order to compute the metrics"""
    predicted = prediction_to_target(input)
    self.metrics.add(predicted, target)
    return super().forward(input, target)

  def aggregate(self, losses: torch.Tensor):
    # Compute the new KPI on arbitrary metric
    if self.kpi_metric == 'mAcc_norm':
      new_kpi = self.metrics.mAcc_norm()
    elif self.kpi_metric == 'mAcc':
      new_kpi = self.metrics.mAcc()
    else:
      raise TypeError("Invalid 'kpi_metric' value")
    # Compute the next KPI values as an exponential moving average from new kpi computed from mAcc and the previous values
    if self.kpi is None:
      self.kpi = new_kpi
    else:
      self.kpi = self.df * new_kpi + (1 - self.df) * self.kpi
    # Compute the difficulty using the Focal Loss strategy
    d = (1 - self.kpi)**self.gamma * -torch.log(self.kpi + self.eps)
    if self.kpi_norm:
      # Applies a normalization on the difficulties such that only the relative scale is taken into account (our contribution)
      d = d / d.mean()
    # Reset the new kpi computation after every training iteration 
    self.metrics.clear()

    return (losses * d).sum()

### Aggregated losses

These classes are in order to initialize the various possible losses for the attribute prediction task

In [None]:
class UncertCrossEntropy(UncertMultitask):
  def __init__(self, tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10],
               weight: torch.Tensor = None, learnable=True):
    super().__init__(F.cross_entropy, tasks_splits=tasks_splits, weight=weight)

In [None]:
class LBTWCrossEntropy(LBTWMultitask):
  def __init__(self, tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10],
               weight: torch.Tensor = None, alpha=.5):
    super().__init__(F.cross_entropy, tasks_splits=tasks_splits, weight=weight, alpha=alpha)

In [None]:
class UncertFocal(UncertMultitask):
  def __init__(self, gamma=2, 
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10],
               weight: torch.Tensor = None, learnable=True):
    super().__init__(FocalLoss(gamma), tasks_splits=tasks_splits, weight=weight)

In [None]:
class LBTWFocal(LBTWMultitask):
  def __init__(self, gamma=2, 
               tasks_splits: list = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10],
               weight: torch.Tensor = None, alpha=.5):
    super().__init__(FocalLoss(gamma), tasks_splits=tasks_splits, weight=weight, alpha=alpha)

## Cost functions (Re-ID)

### Center loss

In [None]:
class CenterLoss(torch.nn.Module):
  """"
  Center Loss (https://doi.org/10.1007/978-3-319-46478-7_31)
  An auxiliary loss that learns a center for each class (PIDs in our case); 
  embeddings that are far away from their respective class center are penalized, 
  so the similar samples tend to assume a spherical shape in the features space.
  It has to be paired to an actual loss, and it should be downweighted w.r.t it
  by adopting an appropriate hyperparameter.

  IMPORTANT: Needs to be learnt, so its parameters need to be passed to an 
    optimizer. A possible learning rate is .5

  :num_classes(int) Number of classes of the classification problem
  :dim_embedding(int) Dimension of the embedded images
  """
  def __init__(self, num_classes, dim_embedding):
    super().__init__()
    self.num_classes = num_classes
    self.dim_embedding = dim_embedding
    self.centers = torch.nn.Parameter(torch.randn(self.num_classes, self.dim_embedding))

  def forward(self, embeddings, labels):
    # Compute the squared 2-norm between the embeddings and the center corresponding to their label
    dist = .5 * (embeddings - self.centers[labels]).pow(2).sum(dim=-1)
    # Restrict the loss for numerical stability
    loss = torch.clamp(dist, min=1e-12, max=1e+12).mean(dim=-1)
    return loss

#### Island Loss


In [None]:
class IslandLoss(CenterLoss):
  """"
  Island Loss (https://arxiv.org/pdf/1710.03144v3.pdf)
  An auxiliary loss that complements the Center Loss with an additional term
  that penalizes normalized centers that are close to each other. So only their
  degree is taken into account, not the magnitude. The resulting clusters tend
  to be more distributed in the feature space

  IMPORTANT: Needs to be learnt, see CenterLoss for more details

  :num_classes(int) Number of classes of the classification problem
  :dim_embedding(int) Dimension of the embedded images
  :omega_island(float) The hyperparameter that controls the influence of the 
    island term with respect to the Center Loss
  """
  def __init__(self, num_classes, dim_embedding, omega_island=1.5):
    super().__init__(num_classes, dim_embedding)
    self.omega_island = omega_island

  def forward(self, embeddings, labels):
    # Compute the Center Loss from the superclass
    center_loss = super().forward(embeddings, labels)
    # ISLAND LOSS
    # Compute the normalized centers
    norm_centers = F.normalize(self.centers)
    # Compute the sum of pairwise distances (+1) between the normalized centers
    dists = norm_centers.mm(norm_centers.t()) + 1
    dists = dists.triu(diagonal=1)
    island_term = dists.sum()
    return center_loss + self.omega_island * island_term

### Centroid Triplet Loss

This loss function was used only during training

In [None]:
class CentroidTripletLoss(torch.nn.Module):
  """
  Centroid Triplet Loss (https://arxiv.org/pdf/2104.13643v1.pdf)
  """
  def __init__(self, margin=1):
    super().__init__()
    self.margin = margin
  
  def forward(self, embeddings, labels):
    # Normalize the labels into a continuous range. This permits to simplify computations
    u_labels = torch.unique(labels)
    translate = {label: i for i, label in enumerate(u_labels.tolist())}
    labels = torch.tensor([translate[label] for label in labels.tolist()])
    num_classes = u_labels.numel()
    # Compute the centroids for each class through matrix multiplication
    M = torch.zeros(num_classes, len(embeddings), device=embeddings.device)
    ## Columns are the one-hot encodings of the labels of the samples
    M[labels, torch.arange(len(embeddings))] = 1
    ## Normalize across the rows, to have the correct weights for the mean centroid
    M = F.normalize(M, p=1)
    ## Centroids for each class. Only classes in the batch are present
    centroids = M.mm(embeddings)
    # Positives are the same-class centroids
    positive = centroids[labels]
    # Negatives are other-class nearest (to the same-class centroid) centroids
    dists = torch.cdist(positive, centroids)
    idx = torch.topk(dists, 2, largest=False).indices[:, 1]
    negative = centroids[idx]
    # Compute the triplet loss w.r.t the centroids
    loss = F.triplet_margin_loss(embeddings, positive, negative, margin=self.margin)

    return loss

### JointLoss
An aggregator of all the losses for the multitask scenario. Assumes that the outputs are in the form returned by the JointNet

In [None]:
# Loss function combination designed to work with the outputs of the JointNet
class JointLoss(torch.nn.Module):
  def __init__(self,dim_embedding, attr_loss, 
               triplet_lambda = 0.7, pids_lambda = 1, margin=0.4):
    super().__init__()
    # This method of computing the triplet loss, was taken from the pytorch-metric-learning
    # documentation. Uses a miner to extract the tuples of indices, and then uses a 3-step
    # procedure to compute the loss using a regularizer and a reducer (see https://kevinmusgrave.github.io/pytorch-metric-learning/)
    # for more information.
    distance_metric = distances.CosineSimilarity()
    reducer = reducers.ThresholdReducer(low=0)
    self.triplet_loss = losses.TripletMarginLoss(margin=margin, distance=distance_metric, reducer=reducer)
    self.miner = miners.TripletMarginMiner(margin=margin, distance=distance_metric, type_of_triplets="semihard")
    # Initialize the CrossEntropy Loss for pids classification
    self.pids_loss = torch.nn.CrossEntropyLoss()
    self.attr_loss = attr_loss
    # Define regularization terms for the losses
    self.triplet_lambda = triplet_lambda
    self.pids_lambda = pids_lambda
    # Define learnable parameters for the Loss
    self.params = torch.nn.Parameter(torch.ones(3))

  def forward(self, outputs, attr_targets, pid_labels):
    # Mine for the tuple indices
    tuple_indices = self.miner(outputs['rich_embedding'], pid_labels)
    # Computation fo the triplet loss
    triplet_loss = self.triplet_lambda*self.triplet_loss(outputs['rich_embedding'], pid_labels, tuple_indices)#/2
    # Computation fo the attribute loss
    attr_loss = self.attr_loss(outputs['attributes'], attr_targets)
    # Consider the loss for the person IDs only when they are predicted (training)
    if outputs['pid'] is not None:
      pid_loss =  (self.pids_loss(outputs['pid'], pid_labels))*self.pids_lambda#/20
      losses = torch.hstack([triplet_loss, attr_loss, pid_loss
      ])
      return (0.5 / (self.params ** 2) * losses + torch.log(1 + self.params ** 2)).sum()
    else:
      losses = torch.hstack([triplet_loss, attr_loss])
      return (0.5 / (self.params[:2] ** 2) * losses + torch.log(1 + self.params[:2] ** 2)).sum()

## Initialization of the weights

In [None]:
def initialize_weights(model, nonlinearity='leaky_relu'):
  """
  When calling MyNeuralNetwork.apply(initialize_weights), this function is
  recursively applied to every module of the network, initializing correctly
  the weights according to the type of model
  """
  if isinstance(model, torch.nn.Conv2d):
      torch.nn.init.kaiming_uniform_(model.weight.data, nonlinearity=nonlinearity)
      if model.bias is not None:
          torch.nn.init.constant_(model.bias.data, 0)
  elif isinstance(model, torch.nn.BatchNorm2d):
      torch.nn.init.constant_(model.weight.data, 1)
      torch.nn.init.constant_(model.bias.data, 0)
  elif isinstance(model, torch.nn.Linear):
      torch.nn.init.kaiming_uniform_(model.weight.data, nonlinearity=nonlinearity)
      if model.bias is not None:
        torch.nn.init.constant_(model.bias.data, 0)

# Neural Networks

Multi-head network: a network that given a common backbone, splits the input with other nets and returns the concatenated outputs. All heads need to be added to a ModuleList, so they can be correctly registered.

Inspired by: https://stackoverflow.com/questions/59763775/how-to-use-pytorch-to-construct-multi-task-dnn-e-g-for-more-than-100-tasks

###JointNet

In [None]:
class JointNet(torch.nn.Module):
  """
  
  :params backbone (nn.Module) The initial part of the network, that extracts
    the features from the image, creating the embedding
  :params attr_classifiers (list[nn.Module]) The list of classifiers used to 
    predict the various attributes from the embeddings
  :params pid_classifier (nn.Module) The classifier used to predict 
  """
  def __init__(self, backbone, attr_classifiers, pid_classifier):
    super().__init__()
    self.task_splits = torch.tensor([4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10])
    self.attr_pred_size = self.task_splits.sum()

    self.attr_reweight = AttrReweight(self.attr_pred_size)
    
    self.backbone = backbone
    self.multihead = MultiHead(torch.nn.Identity(), attr_classifiers)
    self.pid_classifier = pid_classifier

    
  def forward(self, x):
    embedding = self.backbone(x)
    attributes = self.multihead(embedding)
    reweight_attributes = self.attr_reweight(attributes)
    rich_embedding = torch.hstack([reweight_attributes, embedding])
    # The person IDs are computed only when in training mode
    if self.training:
      pid = self.pid_classifier(rich_embedding)
    else:
      pid = None
    return {
      'embedding': embedding,
      'attributes': attributes,
      'reweight_attributes': reweight_attributes,
      'rich_embedding': rich_embedding,
      'pid': pid
    }

##JointConvNet

In [None]:
class ConvBlock(torch.nn.Module):
  def __init__(self, in_spatial_shape, in_features, out_features_conv, out_features_linear, kernel_size):
    super(ConvBlock, self).__init__()
    self.conv = torch.nn.Conv2d(in_features, out_features_conv, kernel_size=kernel_size, stride=(2, 2), padding=(1, 1), bias=False)
    self.bn = torch.nn.BatchNorm2d(out_features_conv)
    #If initial operations
    self.fc1 = torch.nn.Linear(in_features = 1024, out_features = 128)
    self.fc2 = torch.nn.Linear(in_features = 128, out_features = out_features_linear)

  def forward(self, x):
    y = self.conv(x)
    x = F.relu(self.bn(y))
    x = x.view(x.size(0), -1)  
    dr = torch.nn.Dropout(.6)
    x = dr(x)
    x = self.fc1(x)
    return self.fc2(x)

In [None]:
class JointConvNet(torch.nn.Module):
  """
  
  :params backbone (nn.Module) The initial part of the network, that extracts
    the features from the image, creating the embedding
  :params attr_classifiers (list[nn.Module]) The list of classifiers used to 
    predict the various attributes from the embeddings
  :params pid_classifier (nn.Module) The classifier used to predict 
  """
  def __init__(self, backbone, attr_classifiers, pid_classifier):
    super().__init__()
    self.task_splits = torch.tensor([4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10])
    attr_pred_size = self.task_splits.sum()
    self.attr_reweight = AttrReweight(attr_pred_size)
    self.backbone = backbone
    self.multihead = MultiHead(torch.nn.Identity(), attr_classifiers)
    self.pid_classifier = pid_classifier
    self.first_time = True
    self.conv_activation = None
  
  
  def conv_act(self, model, input, output): 
    self.conv_activation = output
    
  def forward(self, x):
    if self.first_time :
          self.first_time=False
          self.backbone.layer3[-1].relu.register_forward_hook(self.conv_act)
    embedding = self.backbone(x)
    attributes = self.multihead(self.conv_activation)
    
    reweight_attributes = self.attr_reweight(attributes)
    rich_embedding = torch.hstack([reweight_attributes, embedding])
    
    #The person IDs are computed only when in training mode
    if self.training:
       pid = self.pid_classifier(rich_embedding)
    else:
       pid = None

    return {
      'embedding': embedding,
      'attributes': attributes,
      'reweight_attributes': reweight_attributes,
      'rich_embedding': rich_embedding,
      'pid': pid
    }
  

### MultiHead architecture

In [None]:
class MultiHead(torch.nn.Module):
  def __init__(self, backbone, heads):
    super().__init__()
    self.backbone = backbone
    # Initializing all the heads as part of a ModuleList
    self.heads = torch.nn.ModuleList(heads)

  def forward(self, x):
    common_features = self.backbone(x)  # compute the shared features
    outputs = [head(common_features) for head in self.heads]
    outputs = torch.cat(outputs, dim=1)
    return outputs

###Backbone

####Trained-from-scratch backbone

##### SE-DenseNet

In [None]:
class BasicLayer(torch.nn.Sequential):
  def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride=1, droprate=0., extra_modules=[]):
    super().__init__(
      torch.nn.BatchNorm2d(in_channels),
      torch.nn.LeakyReLU(),
      torch.nn.Conv2d(in_channels, out_channels, kernel_size,
                      stride=stride, padding=kernel_size // 2, bias=False),
      torch.nn.Dropout(droprate),
      *extra_modules
    )

class BottleneckLayer(BasicLayer):
  def __init__(self, in_channels: int, out_channels: int, droprate=0.):
    super().__init__(in_channels, out_channels, 1, droprate=droprate)

class CompositionLayer(BasicLayer):
  def __init__(self, in_channels: int, out_channels: int, droprate=0.):
    super().__init__(in_channels, out_channels, 3, droprate=droprate)

class TransitionBlock(BasicLayer):
  def __init__(self, in_channels: int, compression, droprate=0.):
    out_channels = math.floor(in_channels * compression)
    super().__init__(in_channels, out_channels, 1, droprate=droprate,
      extra_modules=[torch.nn.AvgPool2d(kernel_size=2)])

class DenseLayer(torch.nn.Module):
  def __init__(self, in_channels: int, growth_rate: int, bn_size: int, memory_efficient: bool, droprate=0.):
    super().__init__()
    self.memory_efficient = memory_efficient
    self.bottleneck = BottleneckLayer(in_channels, bn_size * growth_rate, droprate=droprate)
    self.composition = CompositionLayer(bn_size * growth_rate, growth_rate, droprate=droprate)
  def forward(self, x):
    if self.memory_efficient and x.requires_grad:
      x = cp.checkpoint((lambda x: self.bottleneck(x)), x)
    else:
      x = self.bottleneck(x)
    x = self.composition(x)
    return x

class DenseBlock(torch.nn.Module):
  def __init__(self, layers_count: int, in_channels: int, growth_rate: int, bn_size: int, memory_efficient: bool, use_SE: bool, droprate=0.):
    super().__init__()
    self.layers = torch.nn.ModuleList([
      DenseLayer(in_channels + i * growth_rate, growth_rate, bn_size, memory_efficient, droprate=droprate) 
      for i in range(layers_count)
    ])
    self.use_SE = use_SE
    if self.use_SE:
      out_channels = in_channels + layers_count * growth_rate
      self.se = SEBlock(out_channels)

  def forward(self, x):
    for layer in self.layers:
      new_features = layer(x)
      x = torch.cat((x, new_features), 1)
    if self.use_SE:
      x = self.se(x)
    return x

class ClassificationLayer(torch.nn.Sequential):
  def __init__(self, in_channels, classes_count):
    super().__init__(
      torch.nn.ReLU(),
      torch.nn.AdaptiveAvgPool2d(1),
      torch.nn.Flatten(),
      torch.nn.Linear(in_channels, classes_count)
    )

class MTDenseNet(MultiHead):
  def __init__(self, starting_channels: int, backbone_config: tuple, head_configs: list,
               tasks_splits = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10],
               growth_rate=32, bn_size=4, compression=.5, droprate=0., memory_efficient=False, use_SE=False):
    # The backbone starts with an initial convolution and average pooling
    backbone = [
      BasicLayer(3, starting_channels, 7, stride=2),
      torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
    ]
    # And continues with a series of dense blocks with the number of layers specified by backbone_config
    channels = starting_channels
    for layers_count in backbone_config:
      backbone.append(DenseBlock(layers_count, channels, growth_rate, bn_size, memory_efficient, droprate=droprate, use_SE=use_SE))
      channels += layers_count * growth_rate
      backbone.append(TransitionBlock(channels, compression, droprate=droprate))
      channels = math.floor(channels * compression)
    # heads is the list holding every task-specific part of the network
    heads = []
    final_backbone_channels = channels
    for head_config, classes_count in zip(head_configs, tasks_splits):
      channels = final_backbone_channels
      head = []
      # Add the dense blocks to the head as specified by this head's head_config
      for i, layers_count in enumerate(head_config):
        head.append(DenseBlock(layers_count, channels, growth_rate, bn_size, memory_efficient, droprate=droprate, use_SE=use_SE))
        channels += layers_count * growth_rate
        # Do not append the transition block to the last dense block
        if i < len(head_config) - 1:
          head.append(TransitionBlock(channels, compression, droprate=droprate))
          channels = math.floor(channels * compression)
      # Append the final classification layer
      head.append(ClassificationLayer(channels, classes_count))
      # Add the current head to the heads
      heads.append(torch.nn.Sequential(*head))
    
    super().__init__(torch.nn.Sequential(*backbone), heads)

###### SEBlock
Squeeze-and-Excitation block

In [None]:
class SEBlock(torch.nn.Module):
  def __init__(self, in_channels: int, ratio=16):
    super().__init__()
    self.squeeze = torch.nn.AdaptiveAvgPool2d(1)
    mid_channels = in_channels // ratio if in_channels // ratio > 0 else 1
    self.excitation = torch.nn.Sequential(
      torch.nn.Linear(in_channels, mid_channels, bias=False),
      torch.nn.ReLU(),
      torch.nn.Linear(mid_channels, in_channels, bias=False),
      torch.nn.Sigmoid()
    )
  def forward(self, x):
    s = self.squeeze(x).squeeze()
    e = self.excitation(s)[..., None, None]
    return x * e

#### Pre-trained backbones

In [None]:
def pretrained_backbone(architecture, hidden_size):
  if architecture == 'resnet':
    net = models.resnet18(pretrained=True)
  elif architecture == 'wide':
    net = models.wide_resnet50_2(pretrained=True)
  elif architecture == 'res_next':
    net = models.resnext50_32x4d(pretrained=True)
  elif architecture == 'shufflenet':
    net = models.shufflenet_v2_x1_0(pretrained=True)
  elif architecture == 'inception':
    net = models.inception_v3(pretrained=True)
  elif architecture == 'dense':
    net = models.densenet161(pretrained=True)
  
  if architecture == 'dense':
    # Different final layer name for the densenet
    embedding_size = net.classifier.in_features
    net.classifier = torch.nn.Identity()
  elif architecture == 'vgg':
    embedding_size = net.classifier.features
    net.classifier = torch.nn.Identity()
  else:
    embedding_size = net.fc.in_features
    net.fc =  torch.nn.Identity()
  
  return net, embedding_size

##### Attribute Re-weighting module

Recalibrate predicted attributes based on a score called confidence score.

The confidence score encodes the relationship between the attributes itself.

It is computed by means of:

1.  a linear layer mapping 41 features into 41 features
2.  a sigmoid layer to map the output of the previous layer in the range [0,1]

In [None]:
class AttrReweight(torch.nn.Module):
  """
  Attribute Re-weighting Module (https://arxiv.org/pdf/1703.07220.pdf)
  This module aims to recalibrate the predicted attributes by taking into 
  account the relationship with the other attributes

  :params attr_size (int) 
          The size of the final attributes prediction vector
  :params softmax_flag(Bool) 
          Convert into probabilities (True) or not (False) the output passed in the forward step
  :params task_split(list)
          Allow the softmax to convert output into probabilities
  """
  def __init__(self, attr_size, softmax_flag=False, task_split = [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10]):
    super().__init__()

    self.softmax_flag = softmax_flag
    self.task_split = task_split
    self.confidence_net = torch.nn.Sequential (
              torch.nn.Linear(attr_size, attr_size),
              torch.nn.Sigmoid()
              )

  def forward(self, output):

    if self.softmax_flag == True:
      #Convert the task_output, a tensor having size: [batch_size, 41]
      #into a tuple of 12 tensors 
      #where the i-th tensor has size: [batch_size, task_split[i]]
      task_output = torch.split(output, self.task_split, dim=-1)
      #Convert tuple into list, to modify its values
      task_output = list(task_output)

      softmax = torch.nn.Softmax(dim=1)
      for idx,out in enumerate(task_output):
        task_output[idx] = softmax(out)
      #Convert the list of 12 tensors
      #into a single tensor having size: [batch_size, 41]
      softmax_output = torch.hstack(task_output)

      #Get confidence score from softmax output
      confidence = self.confidence_net(softmax_output)

    else:
      #Get confidence score from output
      confidence = self.confidence_net(output)
    
    #Output recalibration based on confidence
    return output * confidence

# Routines

## Learning rate finder

Existing tools can be used to find ideal values for the learning rate or lr boundaries for the 1Cycle scheduler

https://github.com/davidtvs/pytorch-lr-finder

In [None]:
class TrainLRFinderLoader(TrainDataLoaderIter):
  def inputs_labels_from_batch(self, batch_data):
    return batch_data["image"], batch_data["annotations"]

class ValLRFinderLoader(ValDataLoaderIter):
  def inputs_labels_from_batch(self, batch_data):
    return batch_data["image"], batch_data["annotations"]
    
def tune_lr(model, data, loss, optimizer, end_lr, num_iter, precise=False):
  # torch.backends.cudnn.benchmark = True
  lr_finder = LRFinder(model, optimizer, loss, device="cuda")
  train_loader = TrainLRFinderLoader(data['train'])
  val_loader = ValLRFinderLoader(data['val'])
  if precise:
    lr_finder.range_test(train_loader, val_loader=val_loader, end_lr=end_lr, num_iter=num_iter, step_mode="linear")
    lr_finder.plot(log_lr=False)
  else:
    lr_finder.range_test(train_loader, end_lr=end_lr, num_iter=num_iter)
    lr_finder.plot() # to inspect the loss-learning rate graph
  lr_finder.reset() # to reset the model and optimizer to their initial state

## Learning
Set of functions to learn the weights of the network and compute the metrics on the validation

### Re-identification
Re-identification code can be used during the training

#### Extract queries
Function to get a random list of possible queries and the corresponding ground truth reference from a dataset of embeddings. Used to get re-identification scores for training and validation sets.

Moreover, the function returns a suggested threshold value: the mean of the cosine distances between each query and the further embedding in the same class

In [None]:
def extract_queries(samples_emb, samples_pid, samples_name, utility, static_threshold = True):
  """
  From a group of embeddings with their names and pids, select randomly the queries
  such that there is a single query for each pid.
  Additionally, compute a suggested threshold given the current state of the network.

  :params samples_emb (torch.Tensor(dtype=float)) Embeddings of available samples
  :params samples_pid (torch.Tensor(dtype=int)) Continuous person IDs of available samples
  :params samples_name (list[String]) Names of available samples

  :return ground_truth, queries_emb, queries_name, suggested_threshold
  """
  pids_count = samples_pid.max() + 1
  # List of tensors containing indices of embeddings with the same pid
  groups = [(samples_pid == pid).nonzero().squeeze() for pid in range(pids_count)]
  # Trim groups that are too small
  groups = [group for group in groups if group.numel() > 2]
  # Select a random embedding index for every group of same-pid embedding idices
  queries = torch.tensor([group[torch.randint(group.numel(), ())] for group in groups])
  # Use queries (indexes) to get file names and the embeddings
  queries_name = [samples_name[i] for i in queries]
  queries_emb = samples_emb[queries]
  # Get the sets of file names, excluding the query, from the groups
  groups_names = [set(samples_name[i] for i in group if i != query) for query, group in zip(queries, groups)]
  # Create the ground truth dictionary
  ground_truth = {query_name: group_names for query_name, group_names in zip(queries_name, groups_names)}

  if static_threshold == False:
    # Compute a suggested threshold: average of maximum cosine distance across for each query
    min_dists = torch.tensor([F.cosine_similarity(query_emb[None, ...], samples_emb[group]).min() for query_emb, group in zip(queries_emb, groups)])
    suggested_threshold = min_dists.mean()
  else:
    if utility == 'train':
      suggested_threshold = 0.75
    else: #utility == 'val'
      suggested_threshold=.82

  return ground_truth, queries_emb, queries_name, suggested_threshold

#### Re-identification

In [None]:
def reidentification(queries_emb, queries_name, samples_emb, samples_name, threshold):
  """
  Use the queries embeddings and the test embeddings to create a dictionary where
  each query image name is associated to the list of test images names having a
  similarity above the defined threshold.
  """
  predicted_pids = {}
  for query_name, query_emb in zip(queries_name, queries_emb):
    # Compute the cosine similarities between the current query and all embeddings
    similarity = F.cosine_similarity(query_emb[None, ...], samples_emb)
    # Pick the indices of the embeddings with similartiy >= threshold
    matching_indices = (similarity >= threshold).nonzero().squeeze(-1)
    matching_indices = matching_indices.tolist()

    # Use the indices to select the corresponding file names. Remove the query, if present.
    matching_names = [samples_name[i] for i in matching_indices if samples_name[i] != query_name]
    # Add to the pids dictionary
    predicted_pids[query_name] = matching_names
  return predicted_pids

### Train

In [None]:
def compute_loss(outputs, targets):
  task_inputs = torch.split(outputs, [4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10], dim=-1)
  # Divide the target into T columns of width 1
  task_targets = torch.split(targets, 1, dim=-1)
  loss = torch.nn.CrossEntropyLoss()
  l = 0
  for inpt, trg in zip(task_inputs, task_targets):
    l += loss(inpt, trg.clone().detach().flatten())
  return l

In [None]:
def train(net, data_loader, loss, optimizer, scheduler, scaler, device='cuda'):
  batch_count = 0
  cumulative_loss = 0.
  metrics = Metrics(device=device)

  samples_emb = []
  samples_pid = []
  samples_name = []

  net.train()
  for batch_idx, sample in enumerate(tqdm(data_loader, desc='Training', leave=False)):   
    # Load data into GPU
    inputs = sample['image'].to(device)
    attr_targets = sample['annotations'].to(device)
    pid_labels = sample['person_id'].to(device)
    # Reset the gradients
    optimizer.zero_grad()
    # Forward pass with autocasting
    with torch.cuda.amp.autocast():
      outputs = net(inputs)
      loss_result = loss(outputs, attr_targets, pid_labels)
    # Loss scaling and backward pass
    # loss_result.backward()
    scaler.scale(loss_result).backward()
    # Optimizer step under scaling
    # optimizer.step()
    scaler.step(optimizer)
    scaler.update()
    # Updates the scheduler for next iteration
    if scheduler is not None:
      scheduler.step()
    # Storing execution information
    batch_count += 1
    cumulative_loss += loss_result.item()
    # Attributes evaluation
    attr_predict = prediction_to_target(outputs['attributes'])
    metrics.add(attr_predict, attr_targets)
    # Re-id accumulation
    samples_emb.append(outputs['embedding'])
    samples_pid.append(pid_labels)
    samples_name.extend(sample['img_file'])
  # Pack re-id components
  samples_emb = torch.vstack(samples_emb)
  samples_pid = torch.cat(samples_pid)
  # Re-identification
  ground_truth, queries_emb, queries_name, threshold = extract_queries(samples_emb, samples_pid, samples_name, static_threshold = True, utility='train')
  predicted_pids = reidentification(queries_emb, queries_name, samples_emb, samples_name, threshold)
  metrics.add_reid(predicted_pids, ground_truth, threshold)

  if isinstance(loss, LBTWFocal):
    loss.epoch_reset()

  return cumulative_loss/batch_count, metrics

### Validate

In [None]:
def validate(net, data_loader, loss, device='cuda'):
  batch_count = 0
  cumulative_loss = 0.
  metrics = Metrics(device=device)

  samples_emb = []
  samples_pid = []
  samples_name = []

  net.eval() # Strictly needed if network contains layers which has different behaviours between train and test
  with torch.no_grad():
    for batch_idx, sample in enumerate(tqdm(data_loader, desc='Validation', leave=False)):
      # Load data into GPU
      inputs = sample['image'].to(device)
      attr_targets = sample['annotations'].to(device)
      pid_labels = sample['person_id'].to(device)
      # Forward pass
      outputs = net(inputs)
      # Apply the loss
      loss_result = loss(outputs, attr_targets, pid_labels)

      # Storing execution information
      batch_count += 1
      cumulative_loss += loss_result.item()
      # Attributes evaluation
      attr_predict = prediction_to_target(outputs['attributes'])
      metrics.add(attr_predict, attr_targets)
      
      samples_emb.append(outputs['embedding'])
      samples_pid.append(pid_labels)
      samples_name.extend(sample['img_file'])
      
  samples_emb = torch.vstack(samples_emb)
  samples_pid = torch.cat(samples_pid)
  # Re-identification
  ground_truth, queries_emb, queries_name, threshold = extract_queries(samples_emb, samples_pid, samples_name, static_threshold = True, utility='val')

  predicted_pids = reidentification(queries_emb, queries_name, samples_emb, samples_name, threshold)
  metrics.add_reid(predicted_pids, ground_truth, threshold)
  ##############################################################################################

  
  if isinstance(loss, LBTWFocal):
    loss.epoch_reset()
  
  return cumulative_loss/batch_count, metrics

### Fit

In [None]:
class EarlyStopping():
  """
  :params metric ('score'|'loss') Which type of metric is analayzed. If it's a score,
  it should be maximized, if it's a loss, it should be minimized
  :params patience_epochs (int) Number of epochs to wait before stopping
  """
  #metric can be either 'score' or 'loss'
  def __init__(self, metric = 'score', patience_epochs=10):
    self.patience_epochs = patience_epochs
    # Metric can be either a score (to be maximized) or a loss (to be minimized)
    if metric != 'score' and metric != 'loss':
      raise ValueError('An invalid "metric" paramater has been specified')
    self.metric = metric
    self.epochs_count = 0
    self.wasted_epochs = 0
    self.best_metric = None
    self.best_epoch = 0

  def add_metric(self, new_metric):
    self.epochs_count += 1
    improved = False
    if self.best_metric == None:
      # Always improves at the start
      improved = True
    elif self.metric == 'score' and new_metric > self.best_metric:
      # If the score has increased, there is no overfitting
      improved = True
    elif self.metric == 'loss' and new_metric < self.best_metric:
      # If the loss has decreased, there is no overfitting
      improved = True
    
    if improved:
      self.wasted_epochs = 0
      self.best_epoch = self.epochs_count
      self.best_metric = new_metric
    else:
      self.wasted_epochs += 1
    
    return self.check_best_model()

  def check_best_model(self):
    """ Returns True if the current model is the best model """
    # If the wasted epochs are 0, the model is currently the best model
    return self.wasted_epochs == 0

  def check_overfitting(self):
    """ Returns True if it's overfitting """
    return self.wasted_epochs > self.patience_epochs

In [None]:
def fit(model, epochs, max_lr, early_stopping_patience=50):
  # Automatically set the best available device
  device = torch.device("cuda:0" if torch.cuda.is_available() or False else "cpu")
  # Create the Early Stopping mechanism
  early_stopping = EarlyStopping(metric='score', patience_epochs = early_stopping_patience)

  # Creates a logger for the experiment
  writer = SummaryWriter(log_dir="runs/exp1")
  
  # Instantiate a LR scheduler
  scheduler = None

  # Autoscaler instantiation: downgrades certain values' precision to reduce computations
  scaler = torch.cuda.amp.GradScaler()

  # For each epoch, train the network and then compute evaluation results
  min_val_error = math.inf
  wasted_epochs = 0
  # Autoscaler instantiation: downgrades certain values' precision to reduce computations
  #scaler = torch.cuda.amp.GradScaler()
  # Train the epochs
  # Save all the validation accuracies for early stopping
  accuracies = []
  # Train the epochs
  for e in range(epochs):
    print(f"---Epoch {e+1}:---------------------------------")
    tr_error, tr_metric = train(model.net, model.train_loader, model.multitask_loss, model.optimizer, scheduler, scaler, device=device)
    val_error, val_metric = validate(model.net, model.val_loader, model.multitask_loss, device=device)
    log(tr_error, val_error, tr_metric, val_metric)
    log_tensorboard("Train", writer, e, tr_error, tr_metric)
    log_tensorboard("Val", writer, e, val_error, val_metric)
    # Early stopping control
    score = val_metric.mAcc().mean() + val_metric.reid_mAP()
    early_stopping.add_metric(score)
    # Save the model if it's currently the best
    if early_stopping.check_best_model():
      torch.save(model.net.state_dict(), 'model_weights.pth')
    # Stop the training if it's overfitting
    if early_stopping.check_overfitting():
      print(f"Training stopped because {early_stopping_patience} epochs passed without improvements")
      break
  # Reload the weights of the best model
  print(f"The best weights have been saved as 'model_weights.pth'")
  print(f"The best epoch was Epoch {early_stopping.best_epoch}")
  model.net.load_state_dict(torch.load('model_weights.pth'))
  # Closes the logger
  writer.close()


## Predict

In [None]:
def predict(net, data, threshold=.82):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  net = net.to(device)
  test_loader = data["test"]
  queries_loader = data["queries"]

  samples_attr = []
  samples_emb = []
  samples_name = []
  queries_emb = []
  queries_name = []

  net.eval()
  with torch.no_grad():
    for batch in tqdm(test_loader, desc='Test', leave=False):
      # Load data into GPU
      inputs = batch['image'].to(device)
      # Forward pass
      outputs = net(inputs)
      samples_attr.append(outputs['attributes'])
      samples_emb.append(outputs['embedding'])
      samples_name.extend(batch['img_file'])
    for batch in tqdm(queries_loader, desc='Queries', leave=False):
      # Load data into GPU
      inputs = batch['image'].to(device)
      # Forward pass
      outputs = net(inputs)
      queries_emb.append(outputs['embedding'])
      queries_name.extend(batch['img_file'])
  # Pack components
  samples_attr = torch.vstack(samples_attr)
  samples_emb = torch.vstack(samples_emb)
  queries_emb = torch.vstack(queries_emb)
  # Re-identification: search the queries in the test set
  predicted_pids = reidentification(queries_emb, queries_name, samples_emb, samples_name, threshold)
  # Save the classification annotations as a dataframe file
  annotations = target_to_annotation(prediction_to_target(samples_attr))
  save_attributes(annotations, samples_name)
  # Save the re-IDs as a text file
  save_reids(predicted_pids)
  
  return annotations

In [None]:
def save_attributes(annotations, file_names, save_file='classification_test'):
  columns = ['age', 'backpack', 'bag', 'handbag', 'clothes', 'down', 'up', 'hair', 'hat', 'gender', 'upblack', 'upwhite', 'upred', 'uppurple', 'upyellow', 'upgray', 'upblue', 'upgreen', 'downblack', 'downwhite', 'downpink', 'downpurple', 'downyellow', 'downgray', 'downblue', 'downgreen', 'downbrown']
  index = pd.Index(file_names, name='file')
  df = pd.DataFrame(annotations, index=index, columns=columns).astype(int)
  df.to_csv(f"{save_file}.csv")

In [None]:
def save_reids(predicted_pids, save_file='reid_test'):
  with open(f"{save_file}.txt", mode='w') as f:
    for query in predicted_pids:
      f.write(f"{query}: ")
      f.write(', '.join(predicted_pids[query]))
      f.write('\n')

# Execution

In [None]:
%load_ext tensorboard
%tensorboard --logdir=runs

## Training instances

Fetch the DataLoaders (only the batch size it set, the splits are already created)
- 'full': the Dataloader of the entire dataset
- 'train': the Dataloader of the training set
- 'val': the Dataloader of the validation set
- 'test': the Dataloader of the test set
- 'class_weight': the class weights for every task, computed on the training set
- 'train_pids_count': number of normalized IDs present in the training set [0, train_pids_count)
- 'val_pids_count': number of normalized IDs present in the validation set [0, val_pids_count)

Model Conv + Linear

Optimizer

In [None]:
def get_optimizer(net, cost_function=None, learning_rate=0.01, betas=(0.9, 0.999), weight_decay=0.01):

    return torch.optim.AdamW(
      [{'params': net.parameters(), 'lr': learning_rate},
       {'params': cost_function.parameters(), 'lr':learning_rate},
       ], betas=betas, weight_decay=weight_decay)

CUDA memory cleaning

In [None]:
# Clear the CUDA memory
gc.collect()
torch.cuda.empty_cache()

In [None]:
class Model():

  def __init__(self, task_splits, batch_size = 128, hidden_size = 256, learning_rate=1e-3, weight_decay=1e-1, expand_images=True, fc_dim=16, shape=(16,16)):
    self.task_splits = task_splits
    self.hidden_size = hidden_size
    self.device = torch.device("cuda:0" if torch.cuda.is_available() or False else "cpu")
    self.data = get_data(batch_size=batch_size)
    # Gets DataLoaders and class weights (computed wrt the sampler probabilities)
    self.train_loader = self.data["train"]
    self.val_loader = self.data["val"]
    self.class_weight = self.data["class_weight"].to(self.device)
  
    backbone, EMB_SIZE = pretrained_backbone('resnet', hidden_size = hidden_size)
    self.RICH_EMB_SIZE = EMB_SIZE + task_splits.sum()
 
    if expand_images == True:
      backbone.conv1.stride = (2, 1)
      backbone.maxpool.stride = (1, 1)
   
    # fc_dim = 16
    # shape = (16, 16)
    # Attribute recognition
    attr_classifiers = [torch.nn.Sequential(
           ConvBlock(in_spatial_shape=shape, in_features=hidden_size, out_features_conv=fc_dim, out_features_linear=num_classes, kernel_size=(3,3)))
           for num_classes in task_splits]
    
    #Pid classifier instantiation
    pid_classifier = torch.nn.Sequential(torch.nn.Dropout(.6), torch.nn.Linear(self.RICH_EMB_SIZE, self.data['train_pids_count']))

    self.net = JointConvNet(backbone, attr_classifiers, pid_classifier).to(self.device)

    # Initialize the network weights
    self.net.apply(initialize_weights)

    #Loss functions
    self.attr_rec_loss = UncertFocal(gamma=2, weight= self.data['class_weight']).to(self.device)
    self.multitask_loss = JointLoss(self.RICH_EMB_SIZE, self.attr_rec_loss).to(self.device)

    #self.learning_rate = learning_rate
    #self.weight_decay = weight_decay
    #Optimizer instatiation
    self.optimizer = get_optimizer(self.net, cost_function=self.multitask_loss, learning_rate=learning_rate, weight_decay=weight_decay)

## Training

In [None]:
task_splits = torch.tensor([4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 9, 10])
model = Model(task_splits = task_splits)

# Training the defined network
! rm -rf runs
fit(model, epochs=40, max_lr=1e-3, early_stopping_patience=10)


embedding size:  512
---Epoch 1:---------------------------------




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  75.2      74.7  71.1     83.8   79.1  70.1  88.4  61.5  91.9    61.7   
Val    79.6      74.4  72.6     94.1   94.6  82.9  91.2  75.6  98.1    65.2   

       UpColor  DownColor  
Train     42.3       39.8  
Val       54.3       29.7  
Train average Acc: 0.69978094
Val average Acc: 0.760225
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  25.8      50.6  50.6     50.9   53.5  67.7  50.2  59.1  50.9    61.7   
Val    33.8      51.2  48.8     50.6   51.1  83.6  49.6  58.8  49.9    63.5   

       UpColor  DownColor  
Train     31.9       18.4  
Val       45.5       19.0  
Train average mAcc: 0.47607756
Val average mAcc: 0.5045742
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.005578813936367555
Val mAP: 0.08279580546645364
==== Loss ====
Train: 9.76310651198677



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  77.7      75.7  72.9     86.0   81.4  79.6  89.9  69.5  92.4    71.6   
Val    78.6      72.2  74.1     93.0   90.0  85.7  91.9  74.1  97.8    67.9   

       UpColor  DownColor  
Train     60.8       51.2  
Val       63.7       56.9  
Train average Acc: 0.7571945
Val average Acc: 0.78815186
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  27.4      52.4  50.9     50.9   58.7  78.4  50.6  67.8  50.7    71.7   
Val    34.6      51.9  49.7     49.5   56.4  84.1  50.0  64.7  49.7    65.7   

       UpColor  DownColor  
Train     53.8       31.7  
Val       56.8       45.1  
Train average mAcc: 0.53752875
Val average mAcc: 0.5485917
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.015635306413772972
Val mAP: 0.14289892012945432
==== Loss ====
Train: 7.5079664437667



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  78.2      74.8  73.3     85.3   82.5  81.6  88.1  72.4  92.4    74.4   
Val    78.7      74.5  74.3     93.9   90.4  88.3  92.0  73.5  98.2    73.9   

       UpColor  DownColor  
Train     64.5       57.6  
Val       62.0       64.7  
Train average Acc: 0.77080196
Val average Acc: 0.8037449
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  31.7      54.0  51.3     51.3   63.9  80.9  50.4  70.4  52.2    74.4   
Val    34.5      52.3  50.2     50.0   62.6  88.8  50.5  68.2  50.0    70.4   

       UpColor  DownColor  
Train     59.2       42.1  
Val       58.0       50.1  
Train average mAcc: 0.5684337
Val average mAcc: 0.5711769
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.024672001357929556
Val mAP: 0.20771477074943792
==== Loss ====
Train: 6.30366103545479



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  79.3      76.4  73.8     85.9   84.7  83.5  90.2  75.2  93.1    77.5   
Val    76.8      73.0  73.8     93.4   89.0  87.3  91.8  66.7  98.0    63.4   

       UpColor  DownColor  
Train     68.2       60.6  
Val       59.7       61.0  
Train average Acc: 0.79029167
Val average Acc: 0.7782464
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  38.0      57.1  52.6     51.4   68.1  82.7  50.7  73.8  56.7    77.5   
Val    37.0      55.7  49.7     50.3   63.6  84.5  49.9  72.6  49.8    67.9   

       UpColor  DownColor  
Train     63.8       50.9  
Val       54.9       50.4  
Train average mAcc: 0.6027934
Val average mAcc: 0.5718929
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.036243111444448604
Val mAP: 0.19937932832233468
==== Loss ====
Train: 5.45665629428365



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  81.1      78.2  74.2     85.7   85.5  85.2  89.7  76.3  93.8    78.2   
Val    71.5      74.3  73.4     92.7   94.5  89.0  92.5  77.4  97.2    74.8   

       UpColor  DownColor  
Train     69.4       62.7  
Val       62.8       64.1  
Train average Acc: 0.8000309
Val average Acc: 0.8034253
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  47.5      59.9  53.7     52.2   70.8  84.4  52.4  75.2  61.7    78.3   
Val    39.4      55.6  50.6     49.9   60.4  88.4  58.1  69.9  53.9    70.9   

       UpColor  DownColor  
Train     65.3       54.1  
Val       62.3       45.3  
Train average mAcc: 0.629486
Val average mAcc: 0.58725536
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.03987404377809174
Val mAP: 0.2486551642098085
==== Loss ====
Train: 4.975229342087456
V



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  79.6      79.3  74.9     85.4   86.6  86.3  90.3  78.1  94.1    80.2   
Val    74.5      74.3  74.2     93.5   95.0  89.2  92.9  83.7  95.7    76.8   

       UpColor  DownColor  
Train     70.7       65.2  
Val       41.5       60.9  
Train average Acc: 0.8087487
Val average Acc: 0.7935838
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  46.5      63.4  55.1     52.3   73.5  85.6  57.4  76.9  67.1    80.2   
Val    33.9      53.0  50.1     50.9   64.1  89.4  57.0  71.6  55.4    72.4   

       UpColor  DownColor  
Train     66.3       57.6  
Val       55.2       45.4  
Train average mAcc: 0.6515516
Val average mAcc: 0.5821583
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.054398073875348174
Val mAP: 0.2249498363569767
==== Loss ====
Train: 4.639583699599556




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  81.2      80.7  75.2     85.8   87.7  87.0  91.1  79.8  94.4    81.5   
Val    77.4      71.8  72.7     93.5   93.9  90.9  91.8  82.1  96.9    77.0   

       UpColor  DownColor  
Train     72.3       66.5  
Val       66.0       67.9  
Train average Acc: 0.81923527
Val average Acc: 0.81818765
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  51.8      65.3  56.7     53.4   76.7  86.4  61.3  78.7  67.7    81.5   
Val    35.3      61.8  53.6     50.3   72.1  90.3  52.5  79.3  58.2    77.7   

       UpColor  DownColor  
Train     68.5       60.5  
Val       59.4       52.6  
Train average mAcc: 0.6737613
Val average mAcc: 0.61934984
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.06121381088863044
Val mAP: 0.30993300973083704
==== Loss ====
Train: 4.3707473713418



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  82.6      81.2  76.0     86.8   88.4  88.5  91.7  80.9  94.9    83.5   
Val    78.1      73.2  72.3     92.7   87.0  91.3  90.2  74.0  95.6    72.2   

       UpColor  DownColor  
Train     73.0       67.7  
Val       67.3       68.7  
Train average Acc: 0.8293452
Val average Acc: 0.8020195
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  57.4      67.1  58.8     55.7   78.3  88.1  65.4  79.9  72.2    83.6   
Val    34.8      65.1  54.9     50.5   74.4  90.5  56.4  79.7  57.5    76.0   

       UpColor  DownColor  
Train     69.4       63.5  
Val       61.5       51.9  
Train average mAcc: 0.6993898
Val average mAcc: 0.6276599
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.07188750097006369
Val mAP: 0.33520209096748266
==== Loss ====
Train: 4.189914114578911




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  82.7      81.8  76.4     86.7   89.1  88.3  91.9  81.6  95.5    83.7   
Val    74.4      75.0  74.5     91.8   95.4  90.3  89.3  84.0  97.4    78.8   

       UpColor  DownColor  
Train     74.6       68.7  
Val       62.0       63.4  
Train average Acc: 0.83426905
Val average Acc: 0.8135864
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  58.6      68.8  59.9     59.0   79.7  87.9  68.4  80.7  74.5    83.8   
Val    38.0      59.3  50.2     53.6   61.8  91.4  59.9  77.4  49.5    77.8   

       UpColor  DownColor  
Train     70.9       65.3  
Val       61.7       42.9  
Train average mAcc: 0.71455824
Val average mAcc: 0.60294414
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.08129635053694194
Val mAP: 0.27759849404610004
==== Loss ====
Train: 4.0311102867126



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  83.5      83.5  77.5     86.7   90.4  88.7  92.6  82.9  95.8    85.0   
Val    67.1      74.8  71.5     92.6   94.2  89.1  91.3  81.4  97.5    79.8   

       UpColor  DownColor  
Train     75.0       70.1  
Val       42.9       55.1  
Train average Acc: 0.84311813
Val average Acc: 0.78125
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  60.5      71.9  62.1     57.6   81.8  88.3  70.4  82.0  76.8    85.0   
Val    37.6      57.1  54.9     53.5   64.5  90.4  60.1  78.1  56.3    76.7   

       UpColor  DownColor  
Train     71.7       67.8  
Val       50.7       41.7  
Train average mAcc: 0.72973365
Val average mAcc: 0.60132825
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.09551927041670012
Val mAP: 0.305266187706797
==== Loss ====
Train: 3.9159207530643627




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  83.8      83.9  77.0     86.9   90.6  89.7  93.3  83.9  95.7    86.0   
Val    68.7      76.2  72.4     93.6   94.5  80.9  92.5  83.6  95.6    79.9   

       UpColor  DownColor  
Train     75.2       69.8  
Val       62.3       62.2  
Train average Acc: 0.84652424
Val average Acc: 0.8019556
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  63.7      71.8  61.9     60.3   82.3  89.3  74.9  82.9  75.6    86.1   
Val    37.1      63.7  53.0     55.2   67.3  84.1  58.1  79.2  57.5    74.3   

       UpColor  DownColor  
Train     71.5       66.9  
Val       60.4       45.5  
Train average mAcc: 0.73927486
Val average mAcc: 0.612874
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.11054803254459912
Val mAP: 0.30453895691424704
==== Loss ====
Train: 3.837323199147763



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  84.7      84.1  78.5     87.9   90.8  88.3  93.7  83.8  96.4    83.9   
Val    77.4      78.1  72.3     92.7   88.7  89.0  91.6  84.3  96.8    80.9   

       UpColor  DownColor  
Train     76.1       71.6  
Val       63.4       65.8  
Train average Acc: 0.84985054
Val average Acc: 0.81742084
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  62.8      73.6  65.0     61.1   83.0  87.6  76.2  83.0  80.7    84.0   
Val    37.3      64.7  52.5     50.5   68.5  86.7  55.9  82.9  58.2    79.6   

       UpColor  DownColor  
Train     73.1       69.8  
Val       58.8       50.4  
Train average mAcc: 0.74993765
Val average mAcc: 0.62178373
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.11268150356469418
Val mAP: 0.3760439795280483
==== Loss ====
Train: 3.7618214026741



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  84.8      84.4  79.1     88.1   91.8  89.4  93.8  84.6  95.9    87.5   
Val    77.7      74.5  67.9     90.3   90.2  91.3  91.7  74.1  94.8    75.1   

       UpColor  DownColor  
Train     77.2       72.6  
Val       63.9       63.6  
Train average Acc: 0.8576441
Val average Acc: 0.7958845
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  67.6      74.4  66.0     63.0   85.2  88.9  76.5  83.9  78.6    87.5   
Val    35.7      63.7  52.7     51.0   80.4  91.7  59.9  78.6  54.9    78.4   

       UpColor  DownColor  
Train     74.5       70.7  
Val       62.2       53.5  
Train average mAcc: 0.7640022
Val average mAcc: 0.6356118
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.12070688825301817
Val mAP: 0.40679299034684596
==== Loss ====
Train: 3.638517962331357




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  86.0      85.3  80.0     87.8   92.0  91.0  94.4  85.8  96.6    88.3   
Val    78.4      78.1  72.5     91.6   92.1  90.9  91.9  86.9  96.2    84.8   

       UpColor  DownColor  
Train     78.0       74.1  
Val       55.2       65.8  
Train average Acc: 0.866111
Val average Acc: 0.8202965
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  69.1      75.3  67.7     63.0   85.8  90.6  79.0  85.0  83.4    88.3   
Val    38.5      63.4  53.0     53.5   70.3  89.9  54.3  80.9  55.6    81.1   

       UpColor  DownColor  
Train     75.2       72.9  
Val       53.9       46.6  
Train average mAcc: 0.7792926
Val average mAcc: 0.61763304
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.11952286011147974
Val mAP: 0.38074750275781366
==== Loss ====
Train: 3.533297240215799




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  85.0      84.6  79.7     87.8   92.2  89.9  92.8  84.7  96.2    87.0   
Val    70.4      78.7  72.9     90.6   91.0  86.9  91.9  81.7  94.9    79.2   

       UpColor  DownColor  
Train     77.8       73.0  
Val       46.9       61.6  
Train average Acc: 0.85902476
Val average Acc: 0.7889187
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  68.9      74.6  66.7     62.8   85.4  89.4  70.5  84.0  81.2    87.1   
Val    36.9      60.3  50.6     66.8   62.9  88.2  57.8  80.0  52.8    79.2   

       UpColor  DownColor  
Train     75.0       71.0  
Val       55.8       48.6  
Train average mAcc: 0.7638453
Val average mAcc: 0.6164412
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.12584362123465653
Val mAP: 0.35930521766640466
==== Loss ====
Train: 3.579318546212238



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  86.2      85.6  80.3     86.8   92.3  90.4  94.5  86.3  96.8    89.1   
Val    76.7      77.1  70.2     89.2   94.6  90.4  90.6  84.8  94.2    80.8   

       UpColor  DownColor  
Train     78.2       74.3  
Val       63.3       64.3  
Train average Acc: 0.8673319
Val average Acc: 0.8135225
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  70.4      75.7  68.3     59.8   85.5  89.9  76.4  85.6  83.0    89.1   
Val    39.2      62.4  48.9     54.6   69.0  91.6  59.2  81.0  50.2    79.8   

       UpColor  DownColor  
Train     75.9       72.9  
Val       62.0       45.8  
Train average mAcc: 0.777143
Val average mAcc: 0.6197438
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.13279549498595228
Val mAP: 0.4222429414841985
==== Loss ====
Train: 3.441379176015439
Va



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.2      86.1  81.3     88.3   92.9  91.2  94.8  87.5  97.0    89.6   
Val    75.5      80.3  73.1     92.7   92.2  92.1  93.1  78.2  93.9    77.4   

       UpColor  DownColor  
Train     79.2       76.4  
Val       62.9       63.0  
Train average Acc: 0.87615824
Val average Acc: 0.8119249
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  74.5      76.4  71.5     65.0   86.6  90.8  79.8  86.8  84.4    89.6   
Val    37.2      71.8  52.8     52.9   67.8  91.4  63.2  83.8  52.2    79.5   

       UpColor  DownColor  
Train     76.2       75.1  
Val       61.6       48.6  
Train average mAcc: 0.7972498
Val average mAcc: 0.63562286
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.1396231686575687
Val mAP: 0.41382023046795713
==== Loss ====
Train: 3.317912047842274



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.2      86.6  81.9     89.4   93.7  91.5  95.3  87.9  97.1    90.5   
Val    77.9      77.8  70.6     91.6   93.9  91.7  92.9  86.4  97.6    84.4   

       UpColor  DownColor  
Train     80.3       76.6  
Val       67.6       64.6  
Train average Acc: 0.88153267
Val average Acc: 0.8307132
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  74.2      78.5  71.9     67.1   88.6  91.1  82.8  87.3  85.6    90.5   
Val    40.4      65.7  53.8     57.7   63.5  92.0  60.1  78.6  58.6    81.8   

       UpColor  DownColor  
Train     78.0       76.5  
Val       60.1       49.3  
Train average mAcc: 0.8101875
Val average mAcc: 0.634602
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.15535731224286178
Val mAP: 0.43255427753603126
==== Loss ====
Train: 3.2323273845340896



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.5      87.3  81.7     89.6   93.6  91.4  95.4  86.9  97.2    90.3   
Val    78.0      78.4  71.9     86.3   85.7  91.6  92.5  68.7  92.5    70.9   

       UpColor  DownColor  
Train     80.4       77.0  
Val       66.2       67.8  
Train average Acc: 0.88182944
Val average Acc: 0.7919223
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  73.0      78.9  72.6     69.4   88.5  91.0  83.3  86.2  85.9    90.3   
Val    41.4      67.5  55.5     60.9   80.6  90.5  64.2  78.0  56.0    76.1   

       UpColor  DownColor  
Train     78.4       76.6  
Val       63.5       57.9  
Train average mAcc: 0.8118295
Val average mAcc: 0.6601137
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.14612251304063065
Val mAP: 0.4841392040890352
==== Loss ====
Train: 3.2066100783970044



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  88.3      87.7  82.9     88.8   93.6  91.7  95.2  88.1  97.2    90.9   
Val    73.7      79.6  72.8     90.5   94.2  91.5  92.1  83.2  97.0    82.1   

       UpColor  DownColor  
Train     80.7       78.0  
Val       51.3       67.0  
Train average Acc: 0.8859088
Val average Acc: 0.8125639
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  78.0      79.4  73.6     66.2   88.8  91.2  83.2  87.6  86.4    90.9   
Val    37.9      62.7  53.6     57.1   62.0  91.8  57.9  64.1  60.5    76.2   

       UpColor  DownColor  
Train     78.4       78.1  
Val       58.8       47.3  
Train average mAcc: 0.81831205
Val average mAcc: 0.60814965
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.15405958907667625
Val mAP: 0.39259046957504695
==== Loss ====
Train: 3.13833690311597



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.2      82.9  81.3     89.3   92.9  91.5  95.0  82.6  96.9    87.9   
Val    73.5      77.9  72.3     93.4   93.7  89.7  92.6  81.1  96.8    83.1   

       UpColor  DownColor  
Train     79.5       75.6  
Val       61.6       64.4  
Train average Acc: 0.86880964
Val average Acc: 0.8167817
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  74.2      69.3  71.1     67.2   87.5  91.1  82.2  81.5  85.0    87.9   
Val    37.8      63.0  54.6     53.3   63.5  90.1  59.9  76.9  58.2    81.0   

       UpColor  DownColor  
Train     76.8       74.7  
Val       59.7       46.9  
Train average mAcc: 0.79046607
Val average mAcc: 0.62078047
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.16183845283193776
Val mAP: 0.440598015730943
==== Loss ====
Train: 3.307951048146123



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.9      86.1  82.9     90.3   93.9  92.3  95.9  87.1  97.4    90.8   
Val    76.4      80.1  69.1     92.0   85.4  89.5  93.1  82.4  94.4    78.3   

       UpColor  DownColor  
Train     81.2       77.7  
Val       64.0       63.4  
Train average Acc: 0.88625115
Val average Acc: 0.8067484
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  76.5      76.5  73.7     71.0   89.6  91.9  84.9  86.5  86.8    90.8   
Val    41.4      67.0  54.2     57.9   75.3  88.7  58.0  82.4  61.4    80.9   

       UpColor  DownColor  
Train     78.7       78.1  
Val       60.5       44.7  
Train average mAcc: 0.82084274
Val average mAcc: 0.64367247
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.17561872690590677
Val mAP: 0.47235727494578966
==== Loss ====
Train: 3.0564180000968



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.8      86.1  84.3     89.9   94.2  92.0  92.9  87.6  97.5    91.0   
Val    76.8      78.9  70.2     89.7   92.8  91.9  93.5  83.1  96.2    82.5   

       UpColor  DownColor  
Train     81.4       79.2  
Val       69.7       69.5  
Train average Acc: 0.886582
Val average Acc: 0.8289878
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  76.5      76.0  74.9     71.8   89.6  91.6  67.5  86.9  87.5    91.0   
Val    37.8      67.5  54.7     57.9   77.5  91.4  61.7  81.6  55.6    82.2   

       UpColor  DownColor  
Train     79.2       79.1  
Val       65.0       58.3  
Train average mAcc: 0.8095156
Val average mAcc: 0.6593735
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.18859734775324344
Val mAP: 0.5115560090012672
==== Loss ====
Train: 3.0316200816113015
V



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  88.5      88.3  84.6     90.7   94.5  93.1  95.2  89.2  97.6    91.6   
Val    78.1      80.1  71.3     91.9   93.1  91.6  93.9  80.4  95.9    84.0   

       UpColor  DownColor  
Train     82.6       80.2  
Val       68.2       65.5  
Train average Acc: 0.89674914
Val average Acc: 0.82834864
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  79.5      80.2  76.5     74.6   90.5  92.7  81.3  88.6  88.9    91.6   
Val    38.6      72.0  53.0     59.7   81.0  91.3  65.9  83.0  55.5    84.3   

       UpColor  DownColor  
Train     80.5       80.0  
Val       65.6       44.8  
Train average mAcc: 0.837507
Val average mAcc: 0.6622678
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.19873331101394784
Val mAP: 0.4396185917091138
==== Loss ====
Train: 2.9274632744167164



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  89.0      88.4  85.0     91.2   94.5  92.8  95.4  89.5  97.8    92.6   
Val    74.5      79.9  74.8     89.3   92.5  91.3  92.3  85.7  96.4    81.1   

       UpColor  DownColor  
Train     82.8       80.4  
Val       64.6       65.0  
Train average Acc: 0.8994934
Val average Acc: 0.8227889
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  77.9      81.0  77.3     75.9   89.9  92.5  84.0  89.0  89.5    92.6   
Val    38.6      63.5  55.9     59.5   64.5  89.9  56.7  80.7  55.7    79.6   

       UpColor  DownColor  
Train     81.0       80.7  
Val       62.7       48.3  
Train average mAcc: 0.8426627
Val average mAcc: 0.6297273
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.18088421599998133
Val mAP: 0.5252369931723242
==== Loss ====
Train: 2.880578161322552
V



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  89.5      88.8  86.0     91.4   94.8  93.5  95.7  89.8  97.7    92.6   
Val    73.5      79.3  72.5     88.9   93.2  90.6  93.3  85.5  94.5    81.8   

       UpColor  DownColor  
Train     83.3       81.0  
Val       62.5       67.9  
Train average Acc: 0.9034414
Val average Acc: 0.8195297
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  81.0      81.5  78.8     76.3   91.0  93.1  85.2  89.5  89.2    92.7   
Val    36.9      63.1  56.5     59.3   77.7  91.5  62.4  83.3  61.5    83.2   

       UpColor  DownColor  
Train     81.2       81.7  
Val       62.7       55.6  
Train average mAcc: 0.8510518
Val average mAcc: 0.66149414
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.18971204753608303
Val mAP: 0.520777965762039
==== Loss ====
Train: 2.8271840800409733




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  90.0      89.4  85.9     91.7   95.0  93.0  96.2  90.4  97.7    92.8   
Val    75.8      78.0  66.5     91.2   94.2  89.5  93.2  86.3  96.2    82.3   

       UpColor  DownColor  
Train     83.5       80.7  
Val       67.8       68.0  
Train average Acc: 0.90519875
Val average Acc: 0.8241948
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  81.2      82.6  78.9     77.2   91.2  92.7  85.5  89.9  89.9    92.8   
Val    38.3      70.1  57.7     56.3   68.0  90.2  62.4  83.9  62.3    81.5   

       UpColor  DownColor  
Train     81.4       81.0  
Val       61.5       48.9  
Train average mAcc: 0.8536447
Val average mAcc: 0.6509303
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.20380242049503416
Val mAP: 0.49689763837893075
==== Loss ====
Train: 2.804244657184767



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  90.7      90.8  86.4     91.9   95.5  93.7  96.4  90.7  98.0    92.8   
Val    73.7      78.5  70.6     92.9   94.8  91.4  92.7  83.9  94.9    85.4   

       UpColor  DownColor  
Train     84.0       81.1  
Val       58.4       61.4  
Train average Acc: 0.9099743
Val average Acc: 0.8155036
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  84.2      84.7  79.1     78.6   92.2  93.4  88.6  90.2  89.8    92.8   
Val    37.5      72.0  54.9     57.8   66.6  90.7  66.5  77.8  63.9    82.9   

       UpColor  DownColor  
Train     82.2       82.2  
Val       60.5       41.5  
Train average mAcc: 0.8650358
Val average mAcc: 0.64389867
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.20376795584666899
Val mAP: 0.5144953191482584
==== Loss ====
Train: 2.6952293478924294



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  89.4      90.0  86.4     91.7   94.6  93.1  96.2  90.2  97.7    93.0   
Val    78.6      79.3  70.1     88.3   92.6  89.3  90.5  74.7  95.1    79.7   

       UpColor  DownColor  
Train     84.4       81.1  
Val       66.4       63.7  
Train average Acc: 0.9064996
Val average Acc: 0.8068763
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  80.0      83.8  79.0     78.8   90.0  92.8  87.0  89.8  89.3    93.0   
Val    39.7      72.1  57.4     62.0   69.7  89.4  65.3  79.8  64.0    82.2   

       UpColor  DownColor  
Train     82.7       81.5  
Val       63.5       47.1  
Train average mAcc: 0.8564309
Val average mAcc: 0.6601136
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.21366042044616107
Val mAP: 0.4424025003826636
==== Loss ====
Train: 2.7331005200095797




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  90.6      90.5  86.3     92.5   95.4  93.8  95.8  90.6  98.0    93.2   
Val    79.9      80.0  72.9     90.4   90.9  89.3  93.8  79.0  95.5    70.2   

       UpColor  DownColor  
Train     85.1       81.8  
Val       57.9       69.2  
Train average Acc: 0.91130924
Val average Acc: 0.8074515
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  82.2      85.1  79.7     79.4   92.0  93.5  84.3  90.1  90.0    93.2   
Val    40.0      70.9  59.0     59.5   73.1  89.5  65.8  81.9  64.2    75.5   

       UpColor  DownColor  
Train     83.2       82.1  
Val       59.8       54.3  
Train average mAcc: 0.8623427
Val average mAcc: 0.6611895
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.2174511278380601
Val mAP: 0.48673945263553575
==== Loss ====
Train: 2.651681945634925




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  90.5      90.6  87.4     92.5   95.5  93.8  96.7  91.3  97.8    93.7   
Val    78.7      75.0  74.8     90.7   94.1  89.0  92.0  81.2  95.6    79.7   

       UpColor  DownColor  
Train     84.8       82.6  
Val       66.1       65.3  
Train average Acc: 0.9142761
Val average Acc: 0.8185072
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  83.3      84.5  80.8     79.7   92.4  93.5  89.1  90.9  90.4    93.7   
Val    44.2      70.5  55.9     54.8   73.0  88.2  57.4  81.3  64.3    81.8   

       UpColor  DownColor  
Train     83.5       83.0  
Val       61.5       50.9  
Train average mAcc: 0.8707192
Val average mAcc: 0.6531396
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.23231483038903097
Val mAP: 0.4378927780895004
==== Loss ====
Train: 2.6439978309299637




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  91.1      90.8  87.4     92.5   95.4  93.1  96.5  91.6  97.9    93.7   
Val    78.5      80.3  71.2     92.2   91.5  90.6  92.9  74.2  93.4    76.5   

       UpColor  DownColor  
Train     84.6       81.9  
Val       67.3       66.6  
Train average Acc: 0.91373414
Val average Acc: 0.8126917
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  84.9      85.3  80.8     80.5   92.0  92.7  88.1  91.2  89.3    93.7   
Val    36.5      73.0  54.2     57.4   77.6  89.0  57.9  80.4  69.8    79.4   

       UpColor  DownColor  
Train     83.1       82.6  
Val       61.1       48.1  
Train average mAcc: 0.8701807
Val average mAcc: 0.6536223
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.2390982065150905
Val mAP: 0.488917906878967
==== Loss ====
Train: 2.630382714064225
Va



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  91.1      88.5  87.5     92.5   95.3  93.5  96.8  90.4  98.2    93.1   
Val    77.8      81.9  72.2     91.7   92.6  93.9  92.3  80.4  96.5    79.8   

       UpColor  DownColor  
Train     85.2       82.4  
Val       69.0       64.6  
Train average Acc: 0.91204536
Val average Acc: 0.82745403
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  83.6      81.4  80.9     81.0   92.2  93.1  89.8  90.0  90.9    93.1   
Val    41.7      73.9  54.1     55.4   79.1  93.1  60.2  83.4  62.5    81.1   

       UpColor  DownColor  
Train     83.5       83.2  
Val       66.2       51.0  
Train average mAcc: 0.86885905
Val average mAcc: 0.6681117
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.21410271104027878
Val mAP: 0.5628567847166346
==== Loss ====
Train: 2.62746909390325



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  92.1      91.0  88.9     93.2   96.2  94.4  97.1  92.6  98.4    94.3   
Val    75.2      77.9  71.5     89.6   92.6  90.8  92.4  85.1  96.9    85.1   

       UpColor  DownColor  
Train     86.6       84.0  
Val       62.3       64.5  
Train average Acc: 0.92404956
Val average Acc: 0.819977
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.4      86.0  83.4     83.2   93.6  94.1  90.2  92.3  92.9    94.3   
Val    41.8      72.0  54.5     57.3   68.0  89.6  57.2  81.9  62.7    84.1   

       UpColor  DownColor  
Train     85.2       84.7  
Val       63.2       47.1  
Train average mAcc: 0.88936925
Val average mAcc: 0.6493802
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.24051785840272125
Val mAP: 0.508740649867416
==== Loss ====
Train: 2.451631276503853
V



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  92.1      91.4  88.8     93.5   95.8  93.6  97.2  91.9  98.4    94.4   
Val    79.3      65.9  63.3     92.8   77.1  79.0  92.7  54.3  93.9    54.8   

       UpColor  DownColor  
Train     86.9       85.1  
Val       62.5       58.4  
Train average Acc: 0.924312
Val average Acc: 0.72827196
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.2      85.7  83.5     83.3   92.7  93.3  91.2  91.6  92.0    94.5   
Val    35.9      63.7  56.5     53.5   74.4  70.7  57.8  66.9  63.4    63.6   

       UpColor  DownColor  
Train     85.4       85.5  
Val       52.3       38.3  
Train average mAcc: 0.88812315
Val average mAcc: 0.580881
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.24149702410994772
Val mAP: 0.2760969236850631
==== Loss ====
Train: 2.4591969904692275




==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  89.5      88.4  86.6     88.0   93.3  92.8  96.4  89.3  96.9    91.3   
Val    76.4      77.2  73.5     92.1   93.4  91.3  92.6  83.5  95.3    80.8   

       UpColor  DownColor  
Train     83.2       80.0  
Val       64.3       66.8  
Train average Acc: 0.89635533
Val average Acc: 0.8227889
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  79.8      80.8  79.2     65.0   86.9  92.4  87.7  88.7  83.2    91.3   
Val    39.7      68.9  55.2     53.2   76.1  90.1  60.4  82.8  59.6    80.8   

       UpColor  DownColor  
Train     81.4       80.5  
Val       59.8       53.3  
Train average mAcc: 0.83072984
Val average mAcc: 0.6498139
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.21494724161207315
Val mAP: 0.4749294690478295
==== Loss ====
Train: 2.954487114367277



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  91.9      90.9  88.4     91.2   95.4  94.1  96.9  91.9  97.6    93.7   
Val    75.8      77.1  73.0     92.6   93.8  91.0  93.5  85.7  96.4    84.0   

       UpColor  DownColor  
Train     86.3       83.2  
Val       68.6       64.6  
Train average Acc: 0.91788197
Val average Acc: 0.8300742
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  85.2      85.5  82.6     76.1   91.7  93.7  89.8  91.5  87.1    93.7   
Val    40.4      72.7  55.4     52.8   72.9  90.9  62.1  79.7  60.2    81.4   

       UpColor  DownColor  
Train     85.0       83.8  
Val       64.7       46.8  
Train average mAcc: 0.8713808
Val average mAcc: 0.6500753
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.24829551975981123
Val mAP: 0.5006908505764237
==== Loss ====
Train: 2.5130481678506604



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  92.1      91.7  89.1     92.6   95.8  94.8  96.4  92.2  98.1    94.0   
Val    76.3      78.1  71.7     93.6   91.9  91.6  92.2  82.5  97.9    81.4   

       UpColor  DownColor  
Train     87.6       83.8  
Val       69.0       66.5  
Train average Acc: 0.92368436
Val average Acc: 0.8272623
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  86.5      86.6  84.2     80.9   92.4  94.6  88.5  91.8  90.7    94.0   
Val    43.4      66.4  57.6     54.0   76.2  90.6  61.9  83.1  60.9    82.0   

       UpColor  DownColor  
Train     86.3       84.8  
Val       64.0       51.5  
Train average mAcc: 0.8843553
Val average mAcc: 0.6596594
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.25446209934587927
Val mAP: 0.47451811272614564
==== Loss ====
Train: 2.431729743791663



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  92.3      91.5  89.5     92.9   96.3  95.2  97.4  92.7  98.2    95.1   
Val    75.7      79.1  74.7     91.5   92.3  90.2  91.7  79.4  97.8    76.8   

       UpColor  DownColor  
Train     87.8       85.1  
Val       64.7       61.6  
Train average Acc: 0.9282658
Val average Acc: 0.8128196
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  87.0      86.5  84.6     81.3   93.5  94.9  91.4  92.3  90.9    95.1   
Val    39.0      67.7  54.7     59.5   73.8  90.5  54.7  79.9  63.1    77.1   

       UpColor  DownColor  
Train     86.7       85.8  
Val       57.1       46.0  
Train average mAcc: 0.89163846
Val average mAcc: 0.63593805
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.2533399904253026
Val mAP: 0.4559026101401293
==== Loss ====
Train: 2.3562935124272886



==== ATTRIBUTES: Global Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  91.3      91.1  84.3     92.1   95.8  94.5  95.5  91.7  98.0    94.0   
Val    76.2      80.6  73.5     91.3   92.8  88.5  91.9  85.6  97.1    82.2   

       UpColor  DownColor  
Train     86.5       84.3  
Val       65.3       62.4  
Train average Acc: 0.9160847
Val average Acc: 0.8227889
==== ATTRIBUTES: Class Averaged Accuracy % ====
        Age  Backpack   Bag  Handbag  Cloth  Down    Up  Hair   Hat  Gender  \
Train  85.4      85.2  75.4     80.0   92.8  94.2  85.2  91.4  90.6    94.1   
Val    38.7      66.8  54.8     54.0   68.9  89.6  57.8  82.6  58.3    79.9   

       UpColor  DownColor  
Train     85.0       84.9  
Val       65.0       43.1  
Train average mAcc: 0.87014866
Val average mAcc: 0.632951
==== RE-IDENTIFICATION: mean Average Precision % ====
Train mAP: 0.2513943830580144
Val mAP: 0.4787404593621182
==== Loss ====
Train: 2.5312469440957774
V

## Test classification
Predicts the annotations on the test set and produces a csv file

In [None]:
predict(model.net, model.data, 0.82)



tensor([[3, 1, 1,  ..., 1, 1, 1],
        [2, 1, 1,  ..., 1, 1, 1],
        [2, 1, 1,  ..., 1, 1, 1],
        ...,
        [2, 1, 1,  ..., 1, 1, 1],
        [2, 2, 1,  ..., 2, 1, 1],
        [2, 1, 1,  ..., 1, 1, 1]], device='cuda:0', dtype=torch.int32)

In [None]:
torch.save(model.net.state_dict(), 'model_weights.pth')

In [None]:
files.download('model_weights.pth')
files.download('classification_test.csv')
files.download('reid_test.txt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>