# Metrics analysis

## Utils

In [54]:
#!pip install bert_score

In [55]:
from transformers import BertTokenizer
from configuration import Config
from bert_score import score
from models import caption
from datasets import coco
from PIL import Image
import torch 
import os

model_type = "microsoft/deberta-xlarge-mnli"

In [56]:
def create_caption_and_mask(start_token, max_length):
    caption_template = torch.zeros((1, max_length), dtype=torch.long)
    mask_template = torch.ones((1, max_length), dtype=torch.bool)

    caption_template[:, 0] = start_token
    mask_template[:, 0] = False

    return caption_template, mask_template

@torch.no_grad()
def evaluate():
    model.eval()
    for i in range(config.max_position_embeddings - 1):
        predictions = model(image, caption, cap_mask)
        predictions = predictions[:, i, :]
        predicted_id = torch.argmax(predictions, axis=-1)

        if predicted_id[0] == 102:
            return caption

        caption[:, i+1] = predicted_id[0]
        cap_mask[:, i+1] = False

    return caption

## Checkpoints

In [57]:
checkpoints = [
    #'1685838940-0-resnet50-checkpoint.pth',
    #'1685838940-1-resnet50-checkpoint.pth',
    #'1685838940-2-resnet50-checkpoint.pth',
    #'1685838940-3-resnet50-checkpoint.pth',
    #'1685838940-4-resnet50-checkpoint.pth',
    '1685898201-2-resnet101-checkpoint.pth',
]

## Dataset

In [58]:
config = Config()
dataset = coco.build_dataset(config, mode='test')

## Model and evaluation

In [59]:
print("Checking for checkpoint.")
scores = []
for checkpoint_path in checkpoints:
    if checkpoint_path is None:
        raise NotImplementedError('No model to chose from!')
    else:
        if not os.path.exists(checkpoint_path):
            raise NotImplementedError('Give valid checkpoint path')
    config.backbone = checkpoint_path.split('-')[2]
    model,_ = caption.build_model(config)
    print(f"Loading Checkpoint {checkpoint_path}...")
    checkpoint = torch.load(checkpoint_path, map_location='cuda')
    model.load_state_dict(checkpoint['model'])

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    start_token = tokenizer.convert_tokens_to_ids(tokenizer._cls_token)
    end_token = tokenizer.convert_tokens_to_ids(tokenizer._sep_token)

    reference, predicted = [], []
    annotations = dataset.annot[:100]

    for i, (img, cap) in enumerate(annotations):

        print(f'\rAnnotation {1 + i}/{len(annotations)}', end = '')

        image = Image.open(os.path.join(config.dir, 'ImageCLEFmedical_Caption_2023_valid_images', 'valid', img))
        image = coco.val_transform(image)
        image = image.unsqueeze(0)

        caption, cap_mask = create_caption_and_mask(start_token, config.max_position_embeddings)

        output = evaluate()

        result = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
        predicted.append(result)
        reference.append(cap)

    score = score(predicted, reference, model_type=model_type, device = 'cpu')
    print(f'{checkpoint_path}\t{score}') # Precision, recall, F1
    scores.append(score)

Checking for checkpoint.




Loading Checkpoint 1685898201-2-resnet101-checkpoint.pth...
Annotation 100/100

Some weights of the model checkpoint at microsoft/deberta-xlarge-mnli were not used when initializing DebertaModel: ['pooler.dense.bias', 'classifier.weight', 'pooler.dense.weight', 'classifier.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


1685898201-2-resnet101-checkpoint.pth	(tensor([0.7111, 0.6570, 0.6919, 0.6808, 0.7740, 0.7756, 0.7556, 0.7106, 0.6957,
        0.7048, 0.6193, 0.6977, 0.5378, 0.3739, 0.6839, 0.6959, 0.5827, 0.5818,
        0.7056, 0.7320, 0.6532, 0.7361, 0.7728, 0.6588, 0.7142, 0.5457, 0.6938,
        0.6832, 0.6539, 0.7571, 0.7838, 0.7319, 0.6955, 0.7205, 0.7165, 0.7239,
        0.6668, 0.6862, 0.5982, 0.7409, 0.6268, 0.6570, 0.7314, 0.7015, 0.6916,
        0.6634, 0.7412, 0.7287, 0.6755, 0.6495, 0.6595, 0.7678, 0.6125, 0.6700,
        0.7084, 0.5975, 0.6127, 0.6956, 0.6343, 0.7493, 0.7450, 0.5167, 0.7191,
        0.7279, 0.6317, 0.6692, 0.7078, 0.7069, 0.4602, 0.5980, 0.6322, 0.6350,
        0.6691, 0.7045, 0.5537, 0.6518, 0.7219, 0.7052, 0.6768, 0.7643, 0.6978,
        0.7146, 0.6616, 0.7379, 0.7353, 0.6402, 0.6477, 0.4913, 0.7004, 0.5655,
        0.5387, 0.7433, 0.8238, 0.6495, 0.7991, 0.8655, 0.7573, 0.7040, 0.6173,
        0.7710]), tensor([0.5358, 0.5931, 0.5934, 0.5564, 0.8689, 0.6572, 0.7219,

In [60]:
scores

[(tensor([0.7111, 0.6570, 0.6919, 0.6808, 0.7740, 0.7756, 0.7556, 0.7106, 0.6957,
          0.7048, 0.6193, 0.6977, 0.5378, 0.3739, 0.6839, 0.6959, 0.5827, 0.5818,
          0.7056, 0.7320, 0.6532, 0.7361, 0.7728, 0.6588, 0.7142, 0.5457, 0.6938,
          0.6832, 0.6539, 0.7571, 0.7838, 0.7319, 0.6955, 0.7205, 0.7165, 0.7239,
          0.6668, 0.6862, 0.5982, 0.7409, 0.6268, 0.6570, 0.7314, 0.7015, 0.6916,
          0.6634, 0.7412, 0.7287, 0.6755, 0.6495, 0.6595, 0.7678, 0.6125, 0.6700,
          0.7084, 0.5975, 0.6127, 0.6956, 0.6343, 0.7493, 0.7450, 0.5167, 0.7191,
          0.7279, 0.6317, 0.6692, 0.7078, 0.7069, 0.4602, 0.5980, 0.6322, 0.6350,
          0.6691, 0.7045, 0.5537, 0.6518, 0.7219, 0.7052, 0.6768, 0.7643, 0.6978,
          0.7146, 0.6616, 0.7379, 0.7353, 0.6402, 0.6477, 0.4913, 0.7004, 0.5655,
          0.5387, 0.7433, 0.8238, 0.6495, 0.7991, 0.8655, 0.7573, 0.7040, 0.6173,
          0.7710]),
  tensor([0.5358, 0.5931, 0.5934, 0.5564, 0.8689, 0.6572, 0.7219, 0.6853, 0.63

In [61]:
for t in scores[0]:
    print(torch.mean(t[0]))

tensor(0.7111)
tensor(0.5358)
tensor(0.6111)


In [62]:
for i, r in enumerate(predicted):
    print(f'{i}\t{annotations[i][0]}\n\t{r}')

0	ImageCLEFmedical_Caption_2023_valid_005219.jpg
	ct scan of the brain showing a large cystic lesion in the right parietal lobe.
1	ImageCLEFmedical_Caption_2023_valid_005220.jpg
	transthoracic echocardiogram showing a large vegetation in the right ventricle.
2	ImageCLEFmedical_Caption_2023_valid_005221.jpg
	angiography of the left coronary artery showing the left coronary artery ( arrow ).
3	ImageCLEFmedical_Caption_2023_valid_005222.jpg
	mri of the abdomen showing a large cystic mass in the right kidney.
4	ImageCLEFmedical_Caption_2023_valid_005223.jpg
	chest x - ray showing a large right - sided pleural effusion.
5	ImageCLEFmedical_Caption_2023_valid_005224.jpg
	ct scan of the abdomen showing a large mass in the right kidney.
6	ImageCLEFmedical_Caption_2023_valid_005225.jpg
	chest x - ray showing a large right - sided pleural effusion.
7	ImageCLEFmedical_Caption_2023_valid_005226.jpg
	mri of the brain showing a large cystic lesion in the right frontal lobe.
8	ImageCLEFmedical_Caption

In [63]:
for i, r in enumerate(reference):
    print(f'{i}\t{annotations[i][0]}\n\t{r}')

0	ImageCLEFmedical_Caption_2023_valid_005219.jpg
	Parenchymal hemorrhage. 43-year-old man with COVID-19 transferred to ICU, developed acute myocardial infarction, and received thrombolytic therapy. He suddenly deteriorated and was found with bilaterally fixed and dilated pupils. Axial non-contrast CT of the brain revealed acute subdural hemorrhage, large occipito-parietal intraparenchymal hematoma with blood-fluid level (arrow)
1	ImageCLEFmedical_Caption_2023_valid_005220.jpg
	Parasternal short axis. Origins of the coronary arteries with dilation of the anterior descendent artery (white arrow).
2	ImageCLEFmedical_Caption_2023_valid_005221.jpg
	Coronary angiography. Multiple coronary aneurysms on the anterior descending artery (stop flow, thrombosis process in progress: red arrow) and on the circumflex artery (white arrows).
3	ImageCLEFmedical_Caption_2023_valid_005222.jpg
	(A) Invasive haemodynamic measurement of right and left ventricular pressure with inconclusive interventricular in