In [2]:
#!git clone https://github.com/YIKUAN8/Transformers-VQA.git
#%cd Transformers-VQA/
#!pip install -r requirements.txt


####**Download pre-trained models and place them to data/pretrained/, you could choose from [VisualBERT](https://github.com/uclanlp/visualbert), [LXMERT](https://github.com/airsplay/lxmert), [UNITER](https://github.com/ChenRocks/UNITER).**

In [3]:
#line 1: UNITER; line 2:LXMERT, line 3: VisualBERT. Comment out selected lines if you don't want to use this model
#if the pre-trained VisualBERT cannot be downloaded succesfully, rerun one more time or refer to this link: https://drive.google.com/file/d/1kuPr187zWxSJbtCbVW87XzInXltM-i9Y/view?usp=sharing
#!wget https://convaisharables.blob.core.windows.net/uniter/pretrained/uniter-base.pt -P models/pretrained/
#!wget --no-check-certificate https://nlp1.cs.unc.edu/data/model_LXRT.pth -P models/pretrained/
#!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1kuPr187zWxSJbtCbVW87XzInXltM-i9Y' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1kuPr187zWxSJbtCbVW87XzInXltM-i9Y" -O models/pretrained/visualbert.th && rm -rf /tmp/cookies.txt


In [5]:
# laod a pickle file of openI dataframe
import pickle
with open("/home/amisha/Transformers-VQA/data/openI.pkl", "rb") as file:
    openI = pickle.load(file)

***Load visual features***

In [9]:
import pickle
openI_v_f = pickle.load( open("/home/amisha/Transformers-VQA/data/openI_v_f.pkl", "rb" ) )

In [10]:
assert set(list(openI_v_f.keys())) == set(openI.id.values), "Visual Features are inconsistent with openI dataset"

In [11]:
openI

Unnamed: 0,id,abstract,affiliation,author,date,journal,title,split,text
0,1,1,0,0,0,0,0,train,Abstract. The article is about the specifics o...
1,2,0,0,0,0,1,0,test,2017|05
2,3,0,0,0,0,0,1,train,Eingabenkultur und Diktaturerfahrungen in der DDR
3,4,0,0,1,0,0,0,test,"Cornelia Neuert, Patricia Hadler, Timo Lenzner..."
4,5,0,0,0,0,0,1,train,Palliative care and bioethics: study with assi...
...,...,...,...,...,...,...,...,...,...
80687,80688,0,1,0,0,0,0,train,la implementacio6n del Programa de Apoyo pleme...
80688,80689,1,0,0,0,0,0,train,*“* Profesor investigador en la Universidad Au...
80689,80690,0,0,0,0,0,1,train,"Griindung der ""Association of the Sociologists..."
80690,80691,0,0,1,0,0,0,train,Heine von Alemann


In [12]:
feature_example, bbox_example, (img_w_example, img_h_example) = openI_v_f[openI.id.iloc[0]]

In [13]:
openI_v_f[openI.id.iloc[0]]

[array([[ 142,  951, 1479, 1097]], dtype=int32),
 array([[0.1515294 , 0.16800034, 0.09502802, ..., 0.16190083, 0.17268997,
         0.15456162]], dtype=float32),
 (1654, 2339)]

In [14]:
feature_example.shape, bbox_example.shape, (img_w_example, img_h_example)

((1, 4), (1, 2048), (1654, 2339))

####**Now We have download all data, models, and dependencies. We are good to go!!!**
**1. Change default arguments**

First, let's check it out!

In [15]:
from param import args

In [16]:
args.__dict__

{'model': 'lxmert',
 'train': 'train,nominival',
 'valid': 'minival',
 'test': None,
 'batch_size': 32,
 'optim': 'bert',
 'lr': 0.0001,
 'epochs': 2,
 'dropout': 0.1,
 'seed': 9595,
 'max_seq_length': 20,
 'output': 'models/trained/',
 'fast': False,
 'tiny': False,
 'tqdm': True,
 'load_trained': None,
 'load_pretrained': None,
 'from_scratch': False,
 'mce_loss': False,
 'multiGPU': False,
 'num_workers': 0,
 'optimizer': 'bert'}

***1.1*** Let's overwrite some arguments***

In [17]:
args.batch_size = 18
args.epochs = 2
args.model = 'visualbert' # use visualbert
args.load_pretrained = '/home/amisha/Transformers-VQA/models/pretrained/visualbert.th' #load pretrained visualbert model
args.max_seq_length = 128 #truncate or pad report lengths to 128 subwords

####**2. Create customized dataloader**

In [18]:
findings = list(openI.columns[1:-2])
findings

['abstract', 'affiliation', 'author', 'date', 'journal', 'title']

In [19]:
from torch.utils.data import Dataset
from torch.utils.data.dataloader import DataLoader
import numpy as np
class OpenIDataset(Dataset):
  def __init__(self, df, vf, split, model = 'lxmert'):
    # train_test_split and prepare labels
    self.dataset = df[df['split'] == split]
    self.visual_features = vf
    self.id_list = self.dataset.id.tolist()
    self.report_list = self.dataset.text.tolist()
    self.findings_list = self.dataset.columns[1:-2]
    self.target_list = self.dataset[self.findings_list].to_numpy().astype(np.float32)
    self.model = model

  def __len__(self):
    return len(self.id_list)

  def __getitem__(self, item):
    cxr_id = self.id_list[item]
    target = self.target_list[item]
    boxes, feats, (img_w, img_h) = self.visual_features[cxr_id]
    report = self.report_list[item]
    if self.model == 'uniter':
      boxes = self._uniterBoxes(boxes)
    if self.model == 'lxmert':
      boxes[:, (0, 2)] /= img_w
      boxes[:, (1, 3)] /= img_h
    return cxr_id, feats, boxes, report, target

  def _uniterBoxes(self, boxes):#uniter requires a 7-dimensiom beside the regular 4-d bbox
    new_boxes = np.zeros((boxes.shape[0],7),dtype='float32')
    new_boxes = np.zeros((boxes.shape[0],7),dtype='float32')
    new_boxes[:,1] = boxes[:,0]
    new_boxes[:,0] = boxes[:,1]
    new_boxes[:,3] = boxes[:,2]
    new_boxes[:,2] = boxes[:,3]
    new_boxes[:,4] = new_boxes[:,3]-new_boxes[:,1] #w
    new_boxes[:,5] = new_boxes[:,2]-new_boxes[:,0] #h
    new_boxes[:,6]=new_boxes[:,4]*new_boxes[:,5] #area
    return new_boxes  

In [20]:
training = OpenIDataset(df = openI, vf = openI_v_f,  split='train', model = args.model)
testing = OpenIDataset(df = openI, vf = openI_v_f,  split='test', model = args.model)

In [21]:
train_loader = DataLoader(training, batch_size=1,shuffle=True, num_workers=0,drop_last=True, pin_memory=True)
test_loader = DataLoader(testing, batch_size=1,shuffle=False, num_workers=0,drop_last=False, pin_memory=True)

In [None]:
len(test_loader)

####**3. Model, Optimizer, Loss Function, and Evaluation Function**

In [23]:
from vqa_model import VQAModel
#init model
model = VQAModel(num_answers = len(findings), model = args.model)

In [24]:
#load pretrained weights
model.encoder.load(args.load_pretrained)

Load VISUALBERT PreTrained Model from /home/amisha/Transformers-VQA/models/pretrained/visualbert.th

Weights in loaded but not in model:
cls.predictions.bias
cls.predictions.decoder.weight
cls.predictions.transform.LayerNorm.bias
cls.predictions.transform.LayerNorm.weight
cls.predictions.transform.dense.bias
cls.predictions.transform.dense.weight
cls.seq_relationship.bias
cls.seq_relationship.weight

Weights in model but not in loaded:



In [25]:
#send to GPU
model = model.cuda()

In [26]:
import torch
loss = torch.nn.BCEWithLogitsLoss()

In [27]:
from src.optimization import BertAdam
optim = BertAdam(list(model.parameters()),lr=args.lr,warmup=0.1,t_total=len(train_loader)*args.epochs)

In [28]:
"""
# Evaluation function, we will report the AUC and accuray of each finding
def eval(target, pred):
    acc_list = []
    for i, d in enumerate(findings[:]): #normal is excluded
        acc = np.mean(target[:,i] == (pred[:,i]>=0.5))
        print(i,d,acc)
        acc_list.append(acc)
    print('Averaged: '+str(np.average(acc_list)))
"""

In [30]:
#Importing the required libraries
from sklearn import datasets
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import plot_precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
import matplotlib.pyplot as plt

In [None]:
# Evaluation function, we will report the precision and recall of each finding

def eval(target, pred):
    metadata = []
    precision=[]
    recall=[]
    for i, d in enumerate(findings[:]):
        tar=[]
        pre = []
        for j in target:
            t = np.argmax(j)
            tar.append(t)
            
        for k in pred:
            p = np.argmax(k)
            pre.append(p)
        precisi = precision_score(tar,p)
        precision.append(precisi)
        recal = recall_score(t,p)
        recall.append(recal)
        metadata.append(d)
        print(metadata,precisi,recal)
    return metadata,precision,recall  

In [31]:
sgmd = torch.nn.Sigmoid()

####**4. HIT and RUN**

In [None]:
from tqdm.notebook import tqdm

iter_wrapper = (lambda x: tqdm(x, total=len(train_loader))) if args.tqdm else (lambda x: x)
best_valid = 0
for epoch in range(args.epochs):
    epoch_loss = 0
    for i, (cxr_id, feats, boxes, report, target) in iter_wrapper(enumerate(train_loader)):
        model.train()
        optim.zero_grad()
        feats, boxes, target = feats.cuda(), boxes.cuda(), target.cuda()
        logit = model(feats, boxes, report)
        running_loss = loss(logit, target)
        running_loss = running_loss * logit.size(1)
        epoch_loss += running_loss
        running_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5.)
        optim.step()
    print("Epoch "+str(epoch)+": Training Loss: "+str(epoch_loss/len(train_loader)))
    print('Evaluation: ')
    model.eval()
    logit_list, target_list = [], []
    iter_wrapper = (lambda x: tqdm(x, total=len(test_loader)))
    for i, (cxr_id, feats, boxes, report, target) in iter_wrapper(enumerate(test_loader)):
        target_list.append(target)
        with torch.no_grad():
            feats, boxes = feats.cuda(), boxes.cuda()
            logit = model(feats, boxes, report)
            logit_list.append(sgmd(logit).cpu().numpy())

    eval(np.concatenate(target_list,axis = 0), np.concatenate(logit_list,axis = 0))
   #matrix(np.concatenate(target_list,axis = 0), np.concatenate(logit_list,axis = 0))
    


In [None]:
label,pre,reca =eval(np.concatenate(target_list,axis = 0), np.concatenate(logit_list,axis = 0))
res1=pd.DataFrame(label,columns=["class"])
res2=pd.DataFrame(pre,columns=["precision"])
res3 =pd.DataFrame(reca,columns=["recall"])
result_df = pd.concat([res1,res2,res3],axis=1)
result_df.to_csv("Result_VB.csv")