# Metrics analysis

## Utils

In [10]:
#!pip install bert_score

In [11]:
from transformers import BertTokenizer
from configuration import Config
from bert_score import score
from models import caption
from datasets import coco
from PIL import Image
import torch 
import os

model_type = "microsoft/deberta-xlarge-mnli"

In [12]:
def create_caption_and_mask(start_token, max_length):
    caption_template = torch.zeros((1, max_length), dtype=torch.long)
    mask_template = torch.ones((1, max_length), dtype=torch.bool)

    caption_template[:, 0] = start_token
    mask_template[:, 0] = False

    return caption_template, mask_template

@torch.no_grad()
def evaluate():
    model.eval()
    for i in range(config.max_position_embeddings - 1):
        predictions = model(image, caption, cap_mask)
        predictions = predictions[:, i, :]
        predicted_id = torch.argmax(predictions, axis=-1)

        if predicted_id[0] == 102:
            return caption

        caption[:, i+1] = predicted_id[0]
        cap_mask[:, i+1] = False

    return caption

## Checkpoints

In [13]:
checkpoints = [
    #'1685838940-0-resnet50-checkpoint.pth',
    #'1685838940-1-resnet50-checkpoint.pth',
    #'1685838940-2-resnet50-checkpoint.pth',
    #'1685838940-3-resnet50-checkpoint.pth',
    '1685838940-4-resnet50-checkpoint.pth',
]

## Dataset

In [14]:
config = Config()
dataset = coco.build_dataset(config, mode='test')

## Model and evaluation

In [15]:
print("Checking for checkpoint.")
scores = []
for checkpoint_path in checkpoints:
    if checkpoint_path is None:
        raise NotImplementedError('No model to chose from!')
    else:
        if not os.path.exists(checkpoint_path):
            raise NotImplementedError('Give valid checkpoint path')
    config.backbone = checkpoint_path.split('-')[2]
    model,_ = caption.build_model(config)
    print(f"Loading Checkpoint {checkpoint_path}...")
    checkpoint = torch.load(checkpoint_path, map_location='cuda')
    model.load_state_dict(checkpoint['model'])

    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

    start_token = tokenizer.convert_tokens_to_ids(tokenizer._cls_token)
    end_token = tokenizer.convert_tokens_to_ids(tokenizer._sep_token)

    reference, predicted = [], []
    annotations = dataset.annot[:100]

    for i, (img, cap) in enumerate(annotations):

        print(f'\rAnnotation {1 + i}/{len(annotations)}', end = '')

        image = Image.open(os.path.join(config.dir, 'ImageCLEFmedical_Caption_2023_valid_images', 'valid', img))
        image = coco.val_transform(image)
        image = image.unsqueeze(0)

        caption, cap_mask = create_caption_and_mask(start_token, config.max_position_embeddings)

        output = evaluate()

        result = tokenizer.decode(output[0].tolist(), skip_special_tokens=True)
        predicted.append(result)
        reference.append(cap)

    score = score(predicted, reference, model_type=model_type, device = 'cpu')
    print(f'{checkpoint_path}\t{score}') # Precision, recall, F1
    scores.append(score)

Checking for checkpoint.




Loading Checkpoint 1685838940-4-resnet50-checkpoint.pth...
Annotation 100/100

Some weights of the model checkpoint at microsoft/deberta-xlarge-mnli were not used when initializing DebertaModel: ['pooler.dense.bias', 'classifier.weight', 'pooler.dense.weight', 'classifier.bias']
- This IS expected if you are initializing DebertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DebertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


1685838940-4-resnet50-checkpoint.pth	(tensor([0.7351, 0.7049, 0.7565, 0.7184, 0.7740, 0.7756, 0.7813, 0.7091, 0.7288,
        0.7051, 0.5995, 0.6786, 0.6270, 0.5454, 0.6839, 0.6825, 0.6345, 0.5843,
        0.6757, 0.7326, 0.6268, 0.7592, 0.7627, 0.7023, 0.7142, 0.5926, 0.7556,
        0.6856, 0.6545, 0.7599, 0.7598, 0.7912, 0.7274, 0.7304, 0.6737, 0.7775,
        0.7307, 0.6862, 0.6592, 0.7437, 0.5946, 0.6167, 0.6886, 0.7021, 0.7195,
        0.6750, 0.7412, 0.7255, 0.7128, 0.6849, 0.6659, 0.7687, 0.6422, 0.4404,
        0.7492, 0.6076, 0.6767, 0.6719, 0.6670, 0.7493, 0.7850, 0.6151, 0.6962,
        0.7623, 0.6606, 0.7101, 0.6899, 0.2799, 0.2552, 0.5766, 0.2617, 0.6712,
        0.6335, 0.7451, 0.5853, 0.6500, 0.6943, 0.7282, 0.6862, 0.7924, 0.6723,
        0.7113, 0.6811, 0.7180, 0.7031, 0.6933, 0.6870, 0.6080, 0.7058, 0.5926,
        0.5605, 0.6431, 0.8238, 0.6893, 0.7982, 0.8130, 0.7573, 0.7974, 0.6000,
        0.7671]), tensor([0.5000, 0.5924, 0.5980, 0.5658, 0.8689, 0.6572, 0.7026, 

In [16]:
scores

[(tensor([0.7351, 0.7049, 0.7565, 0.7184, 0.7740, 0.7756, 0.7813, 0.7091, 0.7288,
          0.7051, 0.5995, 0.6786, 0.6270, 0.5454, 0.6839, 0.6825, 0.6345, 0.5843,
          0.6757, 0.7326, 0.6268, 0.7592, 0.7627, 0.7023, 0.7142, 0.5926, 0.7556,
          0.6856, 0.6545, 0.7599, 0.7598, 0.7912, 0.7274, 0.7304, 0.6737, 0.7775,
          0.7307, 0.6862, 0.6592, 0.7437, 0.5946, 0.6167, 0.6886, 0.7021, 0.7195,
          0.6750, 0.7412, 0.7255, 0.7128, 0.6849, 0.6659, 0.7687, 0.6422, 0.4404,
          0.7492, 0.6076, 0.6767, 0.6719, 0.6670, 0.7493, 0.7850, 0.6151, 0.6962,
          0.7623, 0.6606, 0.7101, 0.6899, 0.2799, 0.2552, 0.5766, 0.2617, 0.6712,
          0.6335, 0.7451, 0.5853, 0.6500, 0.6943, 0.7282, 0.6862, 0.7924, 0.6723,
          0.7113, 0.6811, 0.7180, 0.7031, 0.6933, 0.6870, 0.6080, 0.7058, 0.5926,
          0.5605, 0.6431, 0.8238, 0.6893, 0.7982, 0.8130, 0.7573, 0.7974, 0.6000,
          0.7671]),
  tensor([0.5000, 0.5924, 0.5980, 0.5658, 0.8689, 0.6572, 0.7026, 0.6662, 0.63

In [25]:
for t in scores[0]:
    print(torch.mean(t[0]))

tensor(0.7351)
tensor(0.5000)
tensor(0.5952)


In [17]:
predicted

['ct scan of the brain showing a large mass in the left frontal lobe.',
 'transthoracic echocardiogram showing a large mass in the right atrium.',
 'the left coronary angiogram shows the left coronary artery ( arrow ).',
 'mri of the abdomen showing a large mass in the right atrium.',
 'chest x - ray showing a large right - sided pleural effusion.',
 'ct scan of the abdomen showing a large mass in the right kidney.',
 'chest x - ray showing a large mass in the right lung.',
 'mri brain showing a large mass in the left frontal lobe.',
 'ct scan of the abdomen showing a large mass in the right kidney.',
 'mri of the abdomen showing a large mass in the right kidney.',
 'ultrasound image of the right knee showing the presence of a large cystic lesion in the right breast.',
 'ct scan of the chest showing a large mass in the right upper lobe of the left lung.',
 'postoperative radiograph of the right hip.',
 'ultrasound image of the right shoulder showing a large mass in the left breast.',
 

In [18]:
reference

['Parenchymal hemorrhage. 43-year-old man with COVID-19 transferred to ICU, developed acute myocardial infarction, and received thrombolytic therapy. He suddenly deteriorated and was found with bilaterally fixed and dilated pupils. Axial non-contrast CT of the brain revealed acute subdural hemorrhage, large occipito-parietal intraparenchymal hematoma with blood-fluid level (arrow)',
 'Parasternal short axis. Origins of the coronary arteries with dilation of the anterior descendent artery (white arrow).',
 'Coronary angiography. Multiple coronary aneurysms on the anterior descending artery (stop flow, thrombosis process in progress: red arrow) and on the circumflex artery (white arrows).',
 '(A) Invasive haemodynamic measurement of right and left ventricular pressure with inconclusive interventricular interdependence. (B) Four-chamber cardiac magnetic resonance imaging image demonstrating moderate pericardial effusion and thickening of the pericardium.',
 'Chest X-ray: patchy bilateral 