In [1]:
import argparse
import os
import pickle

import torch as T
import yaml
from torch.optim import Adam
from tqdm import tqdm

import sys

sys.path.append('/home/volpepe/Desktop/FSOD_CenterNet/src')
from data_pipeline import DatasetsGenerator
from model import Model


def load_settings(settings_path: str):
    with open(settings_path, 'r') as f:
        return yaml.safe_load(f)
    
settings = '../settings/model_testing_debug_check.yaml'
    
config = load_settings(settings)

if os.path.exists(config['training']['save_base_weights_dir']) \
                and config["training"]["train_base"] \
                and config["training"]["no_overwrite"]:
    raise ValueError("Cannot overwrite weights")

os.makedirs(config['training']['save_training_info_dir'], exist_ok=True)

debug_mode = config['debug']['debug_mode_active']
device = config['device']

K = config['data']['K']
val_K = config['data']['val_K']
test_K = config['data']['test_K']
n_repeats_novel_train = config['training']['repeat_novel_training']

if isinstance(K, int): K = [K]
if isinstance(val_K, int): val_K = [val_K]
if isinstance(test_K, int): test_K = [test_K]

# Dataset generator. Only one of these has to be instantiated. It always returns
dataset_gen = DatasetsGenerator(config)

if config['training']['train_base']:

    # Instantiate the model (only the base part)
    model = Model(config, n_base_classes=len(dataset_gen.train_base.cats))
    model = model.to(device)

    # Use the dataset generator to generate the base set
    dataset_base_train, dataset_base_val, dataset_base_test = dataset_gen.get_base_sets_dataloaders(
        config['training']['batch_size'], config['training']['num_workers'],
        config['training']['pin_memory'], config['training']['drop_last'], 
        shuffle=True
    )
    

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=1.63s)
creating index...
index created!
Skipping instantating novel head


In [2]:
model.load_state_dict(T.load('../../data/weights/from_server/best_model_fix.pt'))

<All keys matched successfully>

In [9]:
import numpy as np
params = sum([np.prod(p.size()) for p in model.parameters()])
params

13806677

In [11]:
sum([np.prod(p.size()) for p in model.encoder.parameters()])

11176512

In [4]:
for i, (input_image, labels, n_detections, _) in enumerate(dataset_base_train):
    out = model(input_image.to(device))
    break

In [5]:
out_reg, out_head_base, _ = out
regressor_label, heatmap_base, _ = labels

In [6]:
out_reg[T.where(regressor_label != 0)].reshape(-1, 4)[:10]

tensor([[10.1728, 12.6651,  0.5879,  0.5829],
        [14.5293, 16.9659,  0.5041,  0.5115],
        [12.3113, 14.4862,  0.5710,  0.5692],
        [11.7774,  9.7300, 14.3218, 12.4755],
        [ 0.5241,  0.5881,  0.5271,  0.5828]], device='cuda:0',
       grad_fn=<SliceBackward0>)

In [7]:
regressor_label[T.where(regressor_label != 0)].reshape(-1, 4)[:10]

tensor([[3.2071e+01, 3.9691e+01, 4.3597e-01, 9.6491e-03],
        [8.6848e+00, 4.7006e+00, 4.2223e-01, 6.9492e-01],
        [1.0055e+01, 1.3155e+01, 2.5462e-01, 9.7897e-01],
        [1.1757e+01, 1.9492e+01, 9.7176e+00, 1.0894e+01],
        [6.3536e-01, 4.1989e-01, 5.2031e-01, 4.7195e-01]])

In [8]:
out_head_base[T.where(heatmap_base==1)]

tensor([0.0063, 0.0024, 0.0031, 0.0049, 0.0058], device='cuda:0',
       grad_fn=<IndexBackward0>)

In [9]:
out_head_base.max(), out_head_base.min(), out_head_base.mean()

(tensor(0.0232, device='cuda:0', grad_fn=<MaxBackward1>),
 tensor(0.0005, device='cuda:0', grad_fn=<MinBackward1>),
 tensor(0.0033, device='cuda:0', grad_fn=<MeanBackward0>))

In [10]:
heatmap_base.max(), heatmap_base.min(), heatmap_base.mean()

(tensor(1.), tensor(0.), tensor(0.0005))

In [11]:
def heatmap_loss(pred_heatmap, gt_heatmap, num_keypoints, config):
    gt_heatmap = gt_heatmap.to(pred_heatmap.device)
    num_keypoints = num_keypoints.to(pred_heatmap.device)

    loss = T.where(
        gt_heatmap == 1,
        (1 - pred_heatmap) ** config['model']['alpha_loss'] * T.log(pred_heatmap),
        (1 - gt_heatmap) ** config['model']['beta_loss'] * \
            (pred_heatmap) ** config['model']['alpha_loss'] * T.log(1 - pred_heatmap),
    ).reshape(pred_heatmap.shape[0], -1).sum(dim=-1)

    result = T.where(
        num_keypoints != 0,
        input = loss / num_keypoints,
        other = 0
    )

    return -result

In [29]:
pred_heatmap = out_head_base
gt_heatmap = heatmap_base
num_keypoints = n_detections

gt_heatmap = gt_heatmap.to(pred_heatmap.device)
num_keypoints = num_keypoints.to(pred_heatmap.device)

a1 = (1 - pred_heatmap) ** config['model']['alpha_loss'] * T.log(pred_heatmap)
a2 = (1 - gt_heatmap) ** config['model']['beta_loss'] * (pred_heatmap) ** config['model']['alpha_loss'] * T.log(1 - pred_heatmap)

a1.max(), a1.mean()

(tensor(-3.5904, device='cuda:0', grad_fn=<MaxBackward1>),
 tensor(-5.8532, device='cuda:0', grad_fn=<MeanBackward0>))

In [30]:
a1.shape

torch.Size([4, 240, 8, 8])

In [25]:
a2.mean(), a2.min()

(tensor(-1.0031e-07, device='cuda:0', grad_fn=<MeanBackward0>),
 tensor(-1.2655e-05, device='cuda:0', grad_fn=<MinBackward1>))

In [12]:
heatmap_loss(out_head_base, heatmap_base, n_detections, config)

tensor([5.0132, 5.9995, 5.7377, 5.1834], device='cuda:0',
       grad_fn=<NegBackward0>)

In [28]:
(32*32-1)*-1.0031e-07

-0.00010261713

In [14]:
import torch.nn.functional as NNF
import torch as T
from torchmetrics.detection import MeanAveragePrecision

@T.no_grad()
def get_heatmap_maxima_idxs(complete_heatmaps):
    pooled_heatmaps = NNF.max_pool2d(complete_heatmaps,
                                    3,
                                    stride=1,
                                    padding=1)
    return (complete_heatmaps == pooled_heatmaps)

def landmarks_from_idxs(regressor_pred: T.tensor,
                        complete_heatmaps: T.tensor,
                        idxs_tensor_mask: T.tensor):
    n_classes, output_width, output_height = idxs_tensor_mask.shape

    num_detections = T.sum(idxs_tensor_mask).to('cpu')
    num_detections = min(10, num_detections)
    
    landmarks_pred = {
        "boxes": T.zeros(num_detections,4),
        "labels": T.zeros(num_detections).to(T.int32),
        "scores": T.zeros(num_detections)
    }

    # Flattens it so we can use topk
    confidence_scores = T.masked_select(complete_heatmaps, idxs_tensor_mask)

    # The i-th element in top_k_scores has the i-th highest confidence score in the image, 
    # but its index refers to its position in "confidence_scores" (which is a flattened tensor
    # tthat has as many elements as idxs_tensor_mask's true values, or peaks).
    # Instead, we will need a n_classes*output_width*output_height tensor to get indices
    top_k_scores = T.topk(confidence_scores, num_detections)
    
    # This retrieves all of the (flattened) indices (of the output image) where the classification has a peak
    flattened_idxs = T.nonzero(T.flatten(idxs_tensor_mask)).reshape(-1)

    # this retrieves only the top "num_detections" of them (but still, flattened)
    flattened_top_k_idxs = flattened_idxs[top_k_scores.indices]

    base_mask = T.zeros(n_classes*output_width*output_height).to(device='cuda')
    # Populates the mask with 1s for topk indices
    base_mask[flattened_top_k_idxs] += 1
    mask = base_mask.to(dtype=T.bool)

    top_k_mask = T.unflatten(mask, dim=0, sizes=(n_classes, output_width, output_height))
    top_k_idxs = T.nonzero(top_k_mask)

    print(top_k_idxs)

    regressor_pred_repeated = regressor_pred.repeat(n_classes,1,1,1)

    size_x = T.masked_select(regressor_pred_repeated[:,0,:,:],
                                top_k_mask)
    size_y = T.masked_select(regressor_pred_repeated[:,1,:,:],
                                top_k_mask)
    off_x = T.masked_select(regressor_pred_repeated[:,2,:,:],
                                top_k_mask)
    off_y = T.masked_select(regressor_pred_repeated[:,3,:,:],
                                top_k_mask)

    category = top_k_idxs[:,0]
    center_idx_y = top_k_idxs[:,1]
    center_idx_x = top_k_idxs[:,2]

    center_coord_x = center_idx_x+off_x
    center_coord_y = center_idx_y+off_y

    for i, (c, cx, cy, sx, sy, score) in \
        enumerate(zip(category, center_coord_x, center_coord_y, size_x, size_y, confidence_scores)):

            landmarks_pred["boxes"][i,0] = cx
            landmarks_pred["boxes"][i,1] = cy
            landmarks_pred["boxes"][i,2] = sx
            landmarks_pred["boxes"][i,3] = sy
            landmarks_pred["labels"][i] = c
            landmarks_pred["scores"][i] = score

    return landmarks_pred

In [15]:
for counter, (image_batch, _, n_landmarks_batch, padded_landmarks) in enumerate(dataset_base_train):

    # both image and landmarks will be resized to model_input_size
    reg_pred_batch, heat_base_pred_batch, heat_novel_pred_batch = model(image_batch.to('cuda'))

    for i, (reg_pred, heat_base_pred, n_landmarks) in \
        enumerate(zip(reg_pred_batch, heat_base_pred_batch, n_landmarks_batch)):
        complete_heatmaps = heat_base_pred

        idxs_tensor = get_heatmap_maxima_idxs(complete_heatmaps)

        landmarks_pred = landmarks_from_idxs(
            reg_pred,
            complete_heatmaps,
            idxs_tensor
        )

        landmarks_gt = {
            "boxes": padded_landmarks["boxes"][i,:n_landmarks,:],
            "labels": padded_landmarks["labels"][i,:n_landmarks]
        }
    
    break

tensor([[  8,  16,  15],
        [ 95,  16,  13],
        [ 95,  16,  15],
        [100,  16,  15],
        [110,  16,  15],
        [132,  18,  15],
        [137,  16,  15],
        [150,  16,  15],
        [187,  17,  15],
        [187,  18,  13]], device='cuda:0')


In [12]:
landmarks_gt

{'boxes': tensor([[67.8649, 56.3060, 61.1892, 38.9362]]),
 'labels': tensor([41], dtype=torch.int32)}

In [13]:
landmarks_pred

{'boxes': tensor([[16.4100, 16.4538, 72.0674, 47.1962],
         [16.5736, 15.6011, 71.1743, 49.0226],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962],
         [16.4100, 16.4538, 72.0674, 47.1962]], grad_fn=<CopySlices>),
 'labels': tensor([ 10,  15,  52, 111, 116, 117, 156, 157, 190, 195], dtype=torch.int32),
 'scores': tensor([0.0025, 0.0020, 0.0017, 0.0016, 0.0023, 0.0022, 0.0022, 0.0028, 0.0027,
         0.0021], grad_fn=<CopySlices>)}