# Metrics analysis

## Utils

In [43]:
#!pip install bert_score

In [44]:
from transformers import BertTokenizer
from configuration import Config
from bert_score import score
from models import caption
from datasets import coco
from PIL import Image
import torch 
import os

model_type = "microsoft/deberta-xlarge-mnli"

In [45]:
def create_caption_and_mask(start_token, max_length):
    caption_template = torch.zeros((1, max_length), dtype=torch.long)
    mask_template = torch.ones((1, max_length), dtype=torch.bool)

    caption_template[:, 0] = start_token
    mask_template[:, 0] = False

    return caption_template, mask_template

@torch.no_grad()
def evaluate():
    model.eval()
    for i in range(config.max_position_embeddings - 1):
        predictions = model(image, caption, cap_mask)
        predictions = predictions[:, i, :]
        predicted_id = torch.argmax(predictions, axis=-1)

        if predicted_id[0] == 102:
            return caption

        caption[:, i+1] = predicted_id[0]
        cap_mask[:, i+1] = False

    return caption

## Checkpoints

In [46]:
checkpoints = [
    '1685838940-0-resnet50-checkpoint.pth',
    '1685838940-1-resnet50-checkpoint.pth',
    '1685838940-2-resnet50-checkpoint.pth',
    '1685838940-3-resnet50-checkpoint.pth',
    '1685838940-4-resnet50-checkpoint.pth',
]

## Dataset

In [47]:
config = Config()
dataset = coco.build_dataset(config, mode='test')

## Model and evaluation

In [48]:
print("Checking for checkpoint.")
scores = []
for checkpoint_path in checkpoints:
    if checkpoint_path is None:
        raise NotImplementedError('No model to chose from!')
    else:
        if not os.path.exists(checkpoint_path):
            raise NotImplementedError('Give valid checkpoint path')
    config.backbone = checkpoint_path.split('-')[2]
    model,_ = caption.build_model(config)
    print(f"Loading Checkpoint {checkpoint_path}...")
    checkpoint = torch.load(checkpoint_path, map_location='cuda')
    model.load_state_dict(checkpoint['model'])

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    start_token = tokenizer.convert_tokens_to_ids(tokenizer._cls_token)
    end_token = tokenizer.convert_tokens_to_ids(tokenizer._sep_token)

    reference, predicted = [], []
    annotations = dataset.annot[:3]

    for i, (img, cap) in enumerate(annotations):

        print(f'\rAnnotation {1 + i}/{len(annotations)}', end = '')

        image = Image.open(os.path.join(config.dir, 'ImageCLEFmedical_Caption_2023_valid_images', 'valid', img))
        image = coco.val_transform(image)
        image = image.unsqueeze(0)

        caption, cap_mask = create_caption_and_mask(start_token, config.max_position_embeddings)

        output = evaluate()

        result = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
        predicted.append(result)
        reference.append(cap)

    score = score(predicted, reference, model_type=model_type)
    print(f'{checkpoint_path}\t{score}') # Precision, recall, F1
    scores.append(score)

Checking for checkpoint.




Loading Checkpoint 1685838940-0-resnet50-checkpoint.pth...
Annotation 2/3

Some weights of the model checkpoint at microsoft/deberta-xlarge-mnli were not used when initializing DebertaModel: ['classifier.bias', 'pooler.dense.bias', 'pooler.dense.weight', 'classifier.weight']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 4.00 GiB total capacity; 3.38 GiB already allocated; 0 bytes free; 3.48 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
scores