In [11]:
pip install nltk




In [12]:
!pip install evaluate rouge rouge_score



In [13]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import nltk
import itertools
import re

from evaluate import load
from nltk.corpus import stopwords

Given a sentence of label and a sentence of prediction, compare them such that the metric indicates accuracy of the generated text being accurate to description. Since this is medical image captioning, importance of sentence can be captured in major description words and medical terminology, such as body parts and diagnosis.

1. Filter both label and prediction of stopwords (unneccesary content)
2. Extract important words from the label
3. Compare

In [14]:
nltk.download('stopwords', quiet=True)

class Tokenize:
  def __init__(self, references, predictions):
    self._stopwords = list(itertools.islice(stopwords.words('english'), 100)) #grab first 100 stopwords
    self._medical = [] #load in medical dictionary
    self._rouge = load("rouge", trust_remote_code=True)
    self._ref = [" ".join(self.filter(r)) for r in references]
    self._pred = [" ".join(self.filter(p)) for p in predictions] #this doesn,t work, this needs to be

  def filter(self, caption: str):
    '''Tokenize and filter text into raw data'''
    words = re.findall(r'\b\w+\b', caption.lower())
    return [word for word in words if word not in self._stopwords] #normalize the data to lowercase

  def rogue(self):
    '''Given labels and predictions, calculate rouge scores'''
    rouge_1_total, rouge_2_total, rouge_l_total = 0, 0, 0

    for i, p in enumerate(self._pred):
      calc = self._rouge.compute(predictions=[p], references=[self._ref[i]])
      rouge_1_total += calc["rouge1"]
      rouge_2_total += calc["rouge2"]
      rouge_l_total += calc["rougeL"]

    total_pred = len(self._pred)
    avg_rouge_1 = rouge_1_total / total_pred if total_pred > 0 else 0 #avoid division by 0 errors
    avg_rouge_2 = rouge_2_total / total_pred if total_pred > 0 else 0
    avg_rouge_l = rouge_l_total / total_pred if total_pred > 0 else 0
    return {"rouge_1": avg_rouge_1, "rouge_2": avg_rouge_2, "rouge_l": avg_rouge_l}

  def bleu(self):
    pass

  def param_sweep(weights, scores):
    '''Given a series of weights and scores (of the same length), output the final score'''
    if len(weights) != len(scores):
      raise ValueError()

    total = 0
    for i, (metric, score) in enumerate(scores.items()): #since python dictionaries maintain order, we can use enumerate
      total += score * weights[i]
    return total/len(scores) if len(scores) > 0 else 0 #normalize weighted score

  def default():
    '''Run basic pass through all metrics and average them.'''
    pass

## **Fetching Label Samples**

In [16]:
!pip install kaggle
from google.colab import drive
drive.mount('/content/drive')
! mkdir ~/.kaggle
! cp /content/drive/MyDrive/Florence_2/kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download virajbagal/roco-dataset
! unzip roco-dataset.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: all_data/validation/radiology/images/PMC3870636_CRIM.OTOLARYNGOLOGY2013-650428.002.jpg  
  inflating: all_data/validation/radiology/images/PMC3870648_CRIM.DENTISTRY2013-378062.012.jpg  
  inflating: all_data/validation/radiology/images/PMC3871037_enm-28-326-g001.jpg  
  inflating: all_data/validation/radiology/images/PMC3872022_CRIM.MEDICINE2013-653925.001.jpg  
  inflating: all_data/validation/radiology/images/PMC3872161_CRIM.SURGERY2013-209494.001.jpg  
  inflating: all_data/validation/radiology/images/PMC3872390_CRIM.OBGYN2013-906351.001.jpg  
  inflating: all_data/validation/radiology/images/PMC3872571_OJO-6-193-g005.jpg  
  inflating: all_data/validation/radiology/images/PMC3872571_OJO-6-193-g007.jpg  
  inflating: all_data/validation/radiology/images/PMC3872649_SNI-4-150-g001.jpg  
  inflating: all_data/validation/radiology/images/PMC3872649_SNI-4-150-g014.jpg  
  inflating: all_data/validation/radiolog

In [17]:
df_train = pd.read_csv('/content/all_data/train/radiologytraindata.csv', delimiter=',')
df_train.dataframeName = 'radiologytraindata.csv'
rows, cols = df_train.shape
print(f'There are {rows} rows and {cols} columns')

There are 65450 rows and 3 columns


In [18]:
mask = df_train['caption'].str.contains('chest x-ray', case=False)
captions = df_train[mask]['caption'].tolist()
print(captions)

# tokenize ^

[' Chest X-ray, which confirmed the position of guidewire, extending from the right internal jugular vein up to inferior vena cava\n', ' Chest X-ray findings. Chest radiograph revealed an engorged pulmonary trunk with an abrupt cutoff of pulmonary vascularity in the distal portions bilaterally, indicative of the "Westermark sign" (arrows).\n', ' Chest X-ray, PA, showing the position of the gun nails\n', ' Chest x-ray showing right-sided pneumothorax.\n', ' Chest X-ray on the day of admission showing diffuse bilateral haziness and air bronchogram.\n', ' Chest X-ray of the patient. A chest X-ray showed a faint patchy opacity over the periphery of the right upper lung zone (black arrow).\n', '  Chest X-ray 17 days after admission.\n', ' Chest X-ray, posterior-anterior view after the surgical removal of the intermediate lobe of the right lung. Drain in the right pleural cavity. The postoperative chest radiograph revealed no pneumothorax.\n', ' Chest X-ray postero-anterior view shows bilate

In [21]:
predictions = ['chest x-ray demonstrating normal lung fields with clear airway, no visible abnormalities in the heart or bony structures' for i in range(len(captions))]

In [27]:
eval = Tokenize(captions[:100], predictions[:100])

In [26]:
print(eval._ref)
print(eval._pred)
print(eval.rogue())

['chest x ray confirmed position guidewire extending right internal jugular vein inferior vena cava', 'chest x ray findings chest radiograph revealed engorged pulmonary trunk abrupt cutoff pulmonary vascularity distal portions bilaterally indicative westermark sign arrows', 'chest x ray pa showing position gun nails', 'chest x ray showing right sided pneumothorax', 'chest x ray day admission showing diffuse bilateral haziness air bronchogram', 'chest x ray patient chest x ray showed faint patchy opacity periphery right upper lung zone black arrow', 'chest x ray 17 days admission', 'chest x ray posterior anterior view surgical removal intermediate lobe right lung drain right pleural cavity postoperative chest radiograph revealed no pneumothorax', 'chest x ray postero anterior view shows bilateral lower zone consolidation bilateral pleural effusion', 'chest x ray pa view showing bilateral reticulo nodular infiltrates']
['chest x ray demonstrating normal lung fields clear airway no visibl


---

### TODO:

develop better metric/research other metrics

medical dictionary, ngrams, synonyms, ordering

ensure better runtime of retrieval

## 0 1 loss baseline

In [15]:
#loss methods
def loss(Filter, original, prediction):
  og = set(Filter.filter(original))
  pred = set(Filter.filter(prediction))

  common_words = og.intersection(pred)

  return len(common_words) / len(og)

In [None]:
filter = Tokenize(captions, predictions)
label = "computed tomography scan of the chest showing a large mass in the right lower lobe of the left lower lobe."
prediction = "xray shows a hairline fracture in lower extremity"

print(loss(filter, label, prediction))

#information retrieval for data parsing in order to create accurate evaluation metric


0.09090909090909091
