In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd drive/MyDrive/colab_projects/cocoapi/notebooks/

In [None]:
%pwd

# Visualising Images with best and worst losses

## CONFIG

In [None]:
MODEL_NAMES = [
    'baseline-Adam-2021-06-11',
    'strong-aug-not-so-long-Adam-2021-06-14',
    '2-layer-head_dropout=0.8_wd=0.1_2021-06-16',
    'gated-attention_dilated-block=True_resnet18_wd=0_2021-06-30'
]

MODEL_NAME = 'gated-attention_dilated-block=True_resnet18_wd=0_2021-06-30'
assert MODEL_NAME in MODEL_NAMES

## Model Descriptions

* batch size: 32
* augmentation
    * baseline train transforms: **at the beginning**
        1. resize (224x224)
        2. RandomHorizontalFlip(p=0.5)
        3. Normalize (ImageNet constants)
    * SimCLR train transforms: starting from **strong-aug-longer-Adam-2021-06-14**
        1. resize (224x224) - the only step not like in SimCLR
        2. Random Horizontal Flip
        3. Random  Color Distortion
        4. Random Gaussian Blur
    * dev transforms: **always for dev set**
        1. resize (224x224)
        2. Normalize (ImageNet constants)


### baseline-Adam-2021-06-11

Last layer training
* epochs: 4 (0, ..., 3)
* optimizer: **Adam(lr=1e-3)**
* scheduler: StepLR(step_size=2, gamma=0.1)
* learning rate alpha:
    * epochs 0, 1: 1e-3
    * epochs 2, 3: 1e-4
* weight_decay=0
    
All layers training
* epochs: 10 (4, 5, ..., 13)
* optimizer: **Adam(lr=1e-4)**
* scheduler: StepLR(step_size=5, gamma=0.1)
* learning rate alpha:
    * epochs 4, ..., 8: 1e-4
    * epochs 9, ..., 13: 1e-5
* **weight_decay=0**

### strong-aug-not-so-long-Adam-2021-06-14

Last layer training
* **epochs: 6 (0, ..., 6)**
* **optimizer: Adam(lr=1e-2)**
* scheduler: StepLR(**step_size=3**, gamma=0.1)
* learning rate alpha:
    * **epochs 0, 1, 2: 1e-2**
    * **epochs 3, 4, 5: 1e-3**
* weight_decay=0
    
All layers training
* **epochs: 10 (6, 7, ..., 15)**
* optimizer: Adam(lr=1e-4)
* scheduler: StepLR(**step_size=5**, gamma=0.1)
* learning rate alpha:
    * **epochs 6, ..., 10: 1e-4**
    * **epochs 11, ..., 15: 1e-5**
* weight_decay=0

### 2-layer-head_dropout=0.8_wd=0.1_2021-06-16

Last 2 layers training
* epochs: 8 (0, ..., 7)
* optimizer: Adam(lr=1e-2)
* scheduler: StepLR(step_size=6, gamma=0.1)
* learning rate alpha:
    * epochs 0, 1, 2, 3: 1e-2
    * epochs 4, 5, 6, 7: 1e-3
* weight_decay=0
* dropout_prob=0
    
All layers training
* epochs: 10 (6, 7, ..., 15)
* optimizer: Adam(lr=1e-4)
* scheduler: StepLR(step_size=5, gamma=0.1)
* learning rate alpha:
    * epochs 6, ..., 10: 1e-4
    * epochs 11, ..., 15: 1e-5
* **weight_decay=0.1**
* dropout_prob=0.8

### gated-attention_dilated-block=True_resnet18_wd=0_2021-06-30

**Attention head** training
* epochs: 8 (0, ..., 7)
* optimizer: Adam(lr=1e-2)
* scheduler: StepLR(step_size=6, gamma=0.1)
* learning rate alpha:
    * epochs 0, 1, 2, 3, 4, 5: 1e-2
    * epochs 6, 7: 1e-3
* weight_decay=0
    
All layers training
* epochs: 14 (8, 9, ..., 21)
* optimizer: Adam(lr=1e-4)
* scheduler: StepLR(step_size=10, gamma=0.1)
* learning rate alpha:
    * epochs 8, ..., 17: 1e-4
    * epochs 18, ..., 21: 1e-5
* **weight_decay=0**
* **USE_DILATED_BLOCK = True**

Reason: need better representation

## Imports

In [None]:
import os
import copy

import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import torchvision
from torchvision.io import read_image
from torchvision import datasets, models, transforms

from tqdm.notebook import trange, tqdm

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

In [None]:
%cd ../utils/

from config import CATEGORIES_OF_INTEREST
from config import IMG_SIZE, NORMALIZATION_MEAN, NORMALIZATION_STD

from my_transforms import baseline_train_transforms, SimCLR_train_transforms, dev_transforms

from my_dataloaders import CocoNoCropping

from my_visualizations import imshow_from_normalized

%cd ../notebooks/

In [None]:
%cd ../models/
from george_model import get_resnet, GatedAttentionModule
%cd ../notebooks/

## Transforms, Datasets, and Data Loaders

In [None]:
dev1_dataset = CocoNoCropping(
    img_ids = '../my_splits/dev1_ids.txt',
    my_annotations_file = '../my_annotations/imgIds_to_labels_train2017.json',
    img_dir = '../my_images/dev1/',
    transform=dev_transforms,
    divide_by_255=True,
    return_image_details=False
)

dev1_error_analysis_dataset = CocoNoCropping(
    img_ids = '../my_splits/dev1_ids.txt',
    my_annotations_file = '../my_annotations/imgIds_to_labels_train2017.json',
    img_dir = '../my_images/dev1/',
    transform=dev_transforms,
    divide_by_255=True,
    return_image_details=True # difference
)

In [None]:
# droplast=False by default, addressed the problem of total % batch_size != 0 via reduction='sum' in the loss

# for visualization
example_dataloader = DataLoader(dev1_error_analysis_dataset, batch_size=8, shuffle=True)

# for error analysis
dev1_error_analysis_dataloader = DataLoader(dev1_error_analysis_dataset, batch_size=32, shuffle=False)
dev1_dataloader = DataLoader(dev1_dataset, batch_size=32, shuffle=False)

## Visualize a few images

In [None]:
LIMIT = 5
for i, batch in enumerate(example_dataloader):
    inputs, labels, image_ids = batch['image'], batch['label'], batch['image_id']

    # Make a grid from batch
    out = torchvision.utils.make_grid(inputs)

    plt.subplots(figsize=(18, 3))
    imshow_from_normalized(out)
    
    df = pd.DataFrame({CATEGORIES_OF_INTEREST[i]: labels[i].numpy() for i in range(len(labels))})
    df.index = image_ids # index with image ids
    display(df)
        
    if i == LIMIT:
        break

## Make and load the model

In [None]:
def load_pre_trained_model(model_name):

    # sanity check
    assert model_name in MODEL_NAMES

    if model_name == 'gated-attention_dilated-block=True_resnet18_wd=0_2021-06-30':
        # resnet
        headless_resnet18 = get_resnet(arch='resnet18', pretrained=False, include_top=False)
        # attention head
        gated_attention_model = GatedAttentionModule(num_classes=len(CATEGORIES_OF_INTEREST),
                                                    use_dilated_block=True)
        # combined model
        model_ft = nn.Sequential()
        model_ft.add_module('resnet', headless_resnet18)
        model_ft.add_module('attention_head', gated_attention_model)
    else:
        model_ft = models.resnet18(pretrained=True)
        num_ftrs = model_ft.fc.in_features

        if model_name == 'baseline-Adam-2021-06-11' or model_name == 'strong-aug-not-so-long-Adam-2021-06-14':
            model_ft.fc = nn.Linear(num_ftrs, len(CATEGORIES_OF_INTEREST))
        elif model_name == '2-layer-head_dropout=0.8_wd=0.1_2021-06-16':
            # replace with 2 layers like in SimCLR
            interm_num_ftrs = 64
            model_ft.fc = nn.Sequential(
                nn.Linear(num_ftrs, interm_num_ftrs),
                nn.Dropout(p=0.8), # p - dropout probability
                nn.Linear(interm_num_ftrs, len(CATEGORIES_OF_INTEREST))
            )
        else:
            assert False, "Model name scenario not addressed"
        
    model_ft.load_state_dict(torch.load(f'../weights/{model_name}.pth', map_location=torch.device(device)))

    # place it on the device
    model_ft = model_ft.to(device)

    return model_ft

In [None]:
model_ft = load_pre_trained_model(model_name=MODEL_NAME)

In [None]:
criterion = torch.nn.BCEWithLogitsLoss(reduction='none')
criterion_with_sum_reduction = torch.nn.BCEWithLogitsLoss(reduction='sum')

### Define evaluation functions

In [None]:
def evaluate(iterator, model, criterion):
    total = 0
    epoch_loss = 0
    per_class_correct = torch.zeros(4)
    
    for batch in tqdm(iterator, desc='dev'):
        
        # no need for gradients, batch-norm and dropout in evaluation mode
        model.eval()
        with torch.no_grad():
            
            inputs = batch['image'].to(device) # [batch size, channel, h, w]
            #print(inputs.shape, inputs.dtype)
            # right now, `labels` is a list with num_classes tensors, each tensor with batch_size of labels (integers)
            labels = torch.vstack(batch['label']).T.contiguous().to(torch.float32).to(device)
            #print(labels.shape, labels.dtype, labels.is_contiguous()) # labels
            total += labels.shape[0]

            outputs = model(inputs) #logits
            #print(outputs.shape, outputs.dtype, outputs.is_contiguous())
            #print(outputs)

            loss = criterion(outputs, labels)
            epoch_loss += loss.item() # no need to drag 
            #print(loss.item())

            preds = (outputs > 0.5).detach().to(torch.int32) # no need to drag the gradients
            #print(preds)
            per_class_correct += (preds.cpu() == labels.cpu()).sum(0) # 0-th axis for batch_size
            #print(per_class_correct)
    
    avg_epoch_loss = epoch_loss / total
    avg_per_class_acc = per_class_correct / total
    
    return avg_epoch_loss, avg_per_class_acc

In [None]:
def evaluate_and_record_losses(iterator, model, criterion):
    losses = []
    correct_predictions = []
    image_ids = []
    image_paths = []
    all_labels = []
    all_preds = []
    
    for batch in tqdm(iterator, desc='dev'):
        
        # no need for gradients, batch-norm and dropout in evaluation mode
        model.eval()
        with torch.no_grad():
            
            inputs = batch['image'].to(device) # [batch size, channel, h, w]
            #print(inputs.shape, inputs.dtype)
            # right now, `labels` is a list with num_classes tensors, each tensor with batch_size of labels (integers)
            labels = torch.vstack(batch['label']).T.contiguous().to(torch.float32).to(device)
            #print(labels.shape, labels.dtype, labels.is_contiguous()) # labels
            

            outputs = model(inputs) #logits
            #print(outputs.shape, outputs.dtype, outputs.is_contiguous())
            #print(outputs)
            
            # reduction='none'
            loss = criterion(outputs, labels)
            #print(loss.item())

            preds = (outputs > 0.5).detach().to(torch.int32) # no need to drag the gradients
            #print(preds)
            
            image_ids.extend(batch['image_id'])
            image_paths.extend(batch['image_path'])
            losses.extend(loss.detach().cpu())
            correct_predictions.extend(preds.cpu() == labels.cpu())
            all_labels.extend(labels.cpu())
            all_preds.extend(preds.cpu())
                
    out = {
        'image_ids': image_ids,
        'image_paths': image_paths,
        'losses': losses,
        'correct_predictions': correct_predictions,
        'labels': all_labels,
        'preds': all_preds
    }
    
    return out

## Evaluate the model

In [None]:
print(MODEL_NAME)
loss, per_class_accs = evaluate(iterator=dev1_dataloader, model=model_ft, criterion=criterion_with_sum_reduction)
print(loss, per_class_accs, per_class_accs.mean())

In [None]:
print(MODEL_NAME)
result = evaluate_and_record_losses(iterator=dev1_error_analysis_dataloader, model=model_ft, criterion=criterion)

In [None]:
image_ids = result['image_ids']
image_paths = result['image_paths']

labels_np = torch.vstack(result['labels']).numpy()
preds_np = torch.vstack(result['preds']).numpy()
correct_predictions_np = torch.vstack(result['correct_predictions']).numpy()
losses_np = torch.vstack(result['losses']).numpy()

In [None]:
len(image_ids), len(image_paths), losses_np.shape, correct_predictions_np.shape, labels_np.shape, preds_np.shape

In [None]:
ids_to_paths = {image_ids[i]: image_paths[i] for i in range(len(image_ids))}
ids_to_labels = {image_ids[i]: list(labels_np[i,:].astype(int)) for i in range(len(image_ids))}
ids_to_preds = {image_ids[i]: list(preds_np[i,:].astype(int)) for i in range(len(image_ids))}

In [None]:
ids_to_performances = {
    "id": image_ids,
    "total_loss": losses_np.sum(1),
    "n_correct": correct_predictions_np.sum(1),
}

for i in range(len(CATEGORIES_OF_INTEREST)):
    # correct label per category of interest
    ids_to_performances[f"label_{CATEGORIES_OF_INTEREST[i]}"] = labels_np[:, i].astype(int)
    # predicted label per category of interest
    ids_to_performances[f"pred_{CATEGORIES_OF_INTEREST[i]}"] = preds_np[:, i].astype(int)
    # correct prediction made per category of interest
    ids_to_performances[f"correct_{CATEGORIES_OF_INTEREST[i]}"] = correct_predictions_np[:, i]
    # loss per category of interest
    ids_to_performances[f"loss_{CATEGORIES_OF_INTEREST[i]}"] = losses_np[:, i]

In [None]:
METRIC = 'total_loss'

In [None]:
ids_to_performances_df = pd.DataFrame(ids_to_performances)
ids_to_performances_df.set_index('id', inplace=True)

ids_to_performances_df

In [None]:
def show_extreme_cases(df, metric, metric_ascending=False, n_extreme=5, worst=True):
    num_categories = []
    
    df_sorted = df.sort_values(metric, ascending=metric_ascending)    
    #display(df_sorted.head)
    
    if worst==True:
        df_extreme = df_sorted.iloc[:n_extreme,:]
    else:
        df_extreme = df_sorted.iloc[-n_extreme:,:]
    display(df_extreme)
    
    for i, img_id in enumerate(df_extreme.index):
        print('-'*80)
        
        img_path = ids_to_paths[img_id]
        image = torchvision.io.read_image(img_path)
        image = image.numpy().transpose((1, 2, 0))

        print(img_path)
        plt.imshow(image)
        plt.pause(0.001)

        df = pd.DataFrame({'pred': ids_to_preds[img_id],
                           'label': ids_to_labels[img_id],
                           'category': CATEGORIES_OF_INTEREST})
        df.set_index('category', inplace=True)
        display(df)
        
        
        print(str(METRIC), ':', df_extreme.loc[img_id, METRIC])
        print("Image id:", img_id)
        
        num_categories.append(sum(ids_to_labels[img_id]))
    
    return num_categories


In [None]:
# next image: 150989

### Worst performance

In [None]:
num_obects_in_cat = show_extreme_cases(ids_to_performances_df, metric='total_loss',
                                       metric_ascending=False, n_extreme=50, worst=True)

In [None]:
plt.hist(num_obects_in_cat)
plt.xticks(range(len(CATEGORIES_OF_INTEREST)))
plt.show()

#### Tricky images

In [None]:
# ids of images that are objectively hard for the model
# top-50 worst losses for the gated-attention model were considered

hard_img_ids = {
    'false positives': {
        # to be looked during the feature visualization, e.g. GradCAM
        '525682': 'dog',
        '67126': 'person',
        '110724': "dog",
        '473720': "cat, person",
        '287427': "person, maybe incorrect label",
        '204272': "dog",
        '433896': "dog",
        '150989': "bird", # water
        '334413': "bird", # water
        '495687': "bird",
        '92002': "cat",
        '240755': "dog",
        '341612': "bird, person", # water
        '430302': "bird", # water
        '439865': "cat",
        '500267': "bird", # water
        '49763': "dog, person",
        '349784': "bird", # water
        '351734': "cat, person",
        '6229': "person", # dog in a vest 
    },
    'partial object (might be enough)': {
        '392035': "cat walking out of the image (do not see cat's face, only ears)",
        '38447': "only see dog's face",
        '431241': "part of the dog is behind the wall, but can definitely see most of it",
        '514934': "only see dog's face; definitely enough for a human",
        '124567': "part of the cat is obstructed by a car",
        '396432': "part of the dog is obstructed by Christmass clothing",
        '555909': "both dog and person are partialy obstructed by the paper flags, but definitely enough",
        '92002': "part of the dog's face is not seen behind the Teddy bear",
        '439865': "person is partially obstructed, but definitely seen"
        
    },
    'partial object (almost nothing)': {
        '38447': "person is not really in the image (1 foot only)",
        '498644': "only see part of person's palm, no person on the image",
        '41097': "only person's hand holding a pizza",
        '273885': "only person's leg is in the frame",
        '137016': "only small parts of the dog visible, but enough for a human",
    
    },
    'not alive': {
        '193506': "'person' on TV, very hard to see",
        '525682': "bird on wallpaper",
        '110724': "birds drawn on a pastic blue cow",
        '287427' : "dog is made from playdow, it also might be a rat from the captions",
        '175202': "drawings of pegions on the wall, no real bird",
        '62154': "paper bird, not a real bird, although realistic",
        '129786': "mosaic of a peacock on the window",
        '208535': "chicken/turkey in the oven",
        '224134': "small fluffy toy with Scooby-Doo",
        '210471': "image of a dog in the screen",
        '437947': "stuffed owl at the very top of the image",
        
    },
    'hard to see an object': {
        '193506': "'person' on TV, very hard to see", 
    },
    'dog looks like a cat': {
        '193506': "dog loocs like a cat",
        '92002': "dog loocs like a cat",
    },
    'back to the camera': {
        '150989': "the closest dog has its back to the camera",
        '273885': "cat sits with its back to the camera",
        '467769': "only see the back of the dog",
        '91465': "only see back of the child"
        
    },
    'small percentage of the image': {
        '67126': "cats are pretty far away and small",
        '511506': "both people and a dog are far away - need to squint",
        '150989': "most of the dogs are far away",
        '467386': "birds occupy only a very small percentage of the image",
        '579267': "bird occupies only a very small percentage of the image",
        '6709': "birds occupy only a very small percentage of the image",
        '129026': "dog occupies only a very small percentage of the image",
        '95081': "birds occupy only a very small percentage of the image",
        '459733': "both person and dog are quite far on the paddle board, but definitely enough",
           
    },
    'incorrect label': {
        '287427': "person and a dog are made from playdow - person's label is missing",
        '392035': "the same object is marked both as a cat and as a dog"
        
    },

}

In [None]:
with open('../my_splits/dev1_tricky_images.json', 'w') as f:
    json.dump(hard_img_ids, f)

### Best performance

In [None]:
num_obects_in_cat = show_extreme_cases(ids_to_performances_df, metric='total_loss',
                                       metric_ascending=False, n_extreme=50, worst=False)

In [None]:
plt.hist(num_obects_in_cat)
plt.xticks(range(len(CATEGORIES_OF_INTEREST)))
plt.show()

50 images with best losses have either 0 or 1 object present in the image. But it's normal. **Most images only have one object.**

### Evaluate performance without certain categories identified during error analysis

#### Make a file with some ids excluded

In [None]:
with open('../my_splits/dev1_tricky_images.json', 'r') as f:
    hard_img_ids = json.load(f)

In [None]:
for key in hard_img_ids.keys():
    print(key)

In [None]:
exclude_categories = [
    'partial object (almost nothing)',
    'not alive',
    'incorrect label'
]

# check that they are all valid keys
assert all([cat in hard_img_ids.keys() for cat in exclude_categories])

In [None]:
ids_to_exclude = []
for category in exclude_categories:
    ids = list(hard_img_ids[category].keys())
    ids_to_exclude.extend(ids)

print('len(ids_to_exclude):', len(ids_to_exclude))
ids_to_exclude = list(set(ids_to_exclude))
print('number of unique elements in `ids_to_exclude`:', len(ids_to_exclude))

In [None]:
with open('../my_splits/dev1_ids.txt', 'r') as f:
    dev1_all_ids = json.loads(f.read()) # python list saved as txt IDs as ints

len(dev1_all_ids)

In [None]:
ids_left = list(set(dev1_all_ids) - set([int(id) for id in ids_to_exclude]))
len(ids_left)

In [None]:
with open('../my_splits/dev1_wo_some_tricky_images.txt', 'w') as f:
    f.write(json.dumps(ids_left))

In [None]:
with open('../my_splits/dev1_wo_some_tricky_images.txt', 'r') as f:
    ids_left = json.load(f)

len(ids_left)

#### Create a dataset and a dataloader objects

In [None]:
dev1_wo_some_tricky_images_dataset = CocoNoCropping(
    img_ids = '../my_splits/dev1_wo_some_tricky_images.txt',
    my_annotations_file = '../my_annotations/imgIds_to_labels_train2017.json',
    img_dir = '../my_images/dev1/',
    transform=dev_transforms,
    divide_by_255=True,
    return_image_details=False
)
print("Examples:", len(dev1_wo_some_tricky_images_dataset))

dev1_wo_some_tricky_images_dataloader = DataLoader(dev1_wo_some_tricky_images_dataset,
                                                   batch_size=32,
                                                   shuffle=False)

print("Batches: ", len(dev1_wo_some_tricky_images_dataloader))

#### Evaluate the performance

In [None]:
print(MODEL_NAME)
loss, per_class_accs = evaluate(iterator=dev1_wo_some_tricky_images_dataloader,
                                model=model_ft, criterion=criterion_with_sum_reduction)
print(loss, per_class_accs, per_class_accs.mean())

## Compare the performances of all models before and after removing some of the tricky images

In [None]:
performence_results = {
    'All images acc.': [],
    'Cleaner images acc.': [],
}

for MODEL_NAME in MODEL_NAMES:
    print(MODEL_NAME)

    model_ft = load_pre_trained_model(model_name=MODEL_NAME)

    loss, per_class_accs = evaluate(iterator=dev1_dataloader,
                                model=model_ft, criterion=criterion_with_sum_reduction)
    acc = per_class_accs.mean().item()
    print('\t All images: ', acc)
    performence_results['All images acc.'].append(acc)
    
    loss, per_class_accs = evaluate(iterator=dev1_wo_some_tricky_images_dataloader,
                                model=model_ft, criterion=criterion_with_sum_reduction)
    acc = per_class_accs.mean().item()
    print('\t Clean images:', per_class_accs.mean().item())
    performence_results['Cleaner images acc.'].append(acc)

    print('\n', '-'*80, '\n')

In [None]:
performence_results_df = pd.DataFrame(performence_results)
performence_results_df

In [None]:
performence_results_df.index = MODEL_NAMES

In [None]:
performence_results_df.to_csv('../results/best_models_summary.csv')

In [None]:
performence_results_df = pd.read_csv('../results/best_models_summary.csv', index_col=0)
performence_results_df = (performence_results_df * 100).round(1)

In [None]:
performence_results_df