In [None]:
import os
import sys  
import json
import torch 
import argparse
import numpy as np
from PIL import Image  
from tqdm import tqdm
from utils import evaluate_relaxed_accuracy, model_gen
from transformers import AutoModelForCausalLM, AutoTokenizer  
 

ckpt_path = 'internlm/internlm-xcomposer2-vl-7b'
tokenizer = AutoTokenizer.from_pretrained(ckpt_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(ckpt_path, device_map="cuda", trust_remote_code=True).eval().cuda().half()
model.tokenizer = tokenizer

In [None]:
samples = json.load(open('data/chartqa/ChartQA Dataset/test/test_human.json')) 

human_part = []
for q in tqdm(samples):
    im_path = 'data/chartqa/ChartQA Dataset/test/png/'+q['imgname']
    text = '[UNUSED_TOKEN_146]user\nAnswer the question using a single word or phrase.{}[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n'.format(q['query'])
    with torch.cuda.amp.autocast():
        response = model_gen(model, text, im_path)  
    human_part.append({
        'answer': response,
        'annotation': q['label'] 
    }) 
    
human_part_acc = evaluate_relaxed_accuracy(human_part)

In [None]:
samples = json.load(open('data/chartqa/ChartQA Dataset/test/test_augmented.json')) 

augmented_part = []
for q in tqdm(samples):
    im_path = 'data/chartqa/ChartQA Dataset/test/png/'+q['imgname']
    text = '[UNUSED_TOKEN_146]user\nAnswer the question using a single word or phrase.{}[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n'.format(q['query'])
    with torch.cuda.amp.autocast():
        response = model_gen(model, text, im_path)  
    augmented_part.append({
        'answer': response,
        'annotation': q['label'] 
    }) 
    
augmented_part_acc = evaluate_relaxed_accuracy(augmented_part)

In [None]:
print ((human_part_acc+augmented_part_acc)/2)