In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers.generation import GenerationConfig
from hip.models.qwen.modeling_qwen import QWenLMHeadModel
import torch
torch.manual_seed(1234)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-VL", trust_remote_code=True)

In [None]:
model = QWenLMHeadModel.from_pretrained(
    "Qwen/Qwen-VL", 
    device_map="auto", 
    trust_remote_code=True, 
    torch_dtype=torch.float16,
    load_in_4bit=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        llm_int8_skip_modules=['visual']
    ),
    fp16=True,
).eval()


In [None]:
query = tokenizer.from_list_format([
    {'image': 'notebook/mmmu_image.png'},
    # {'image': 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg'},
    # {'text': 'Each of the following situations relates to a different company. For company B, find the missing amounts. \n\n A: $63,020 \n\n B: $58,410 \n\n C: $71,320 \n\n D: $77,490 \n\n Answer:'},
])
print(query)
inputs = tokenizer(query, return_tensors='pt')
inputs = inputs.to(model.device)

print(inputs.keys(), inputs.input_ids.shape, print(inputs.input_ids))

with torch.no_grad():
    pred = model.generate(**inputs, do_sample=False)

print(pred.shape)

response = tokenizer.decode(pred.cpu()[0], skip_special_tokens=False)
print(response)

In [6]:
with torch.no_grad():
    output = model(**inputs)

In [None]:
output.logits.shape

In [5]:
import datasets, json

In [3]:
ds = datasets.load_dataset('MMMU/MMMU', 'Accounting')

In [None]:
json.loads(ds['validation'][0]['options'].replace("'", '"'))

In [None]:
query = tokenizer.from_list_format([
    {'image': ds['validation'][0]['image_1']},
    # {'image': 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg'},
    # {'text': 'Each of the following situations relates to a different company. For company B, find the missing amounts. \n\n A: $63,020 \n\n B: $58,410 \n\n C: $71,320 \n\n D: $77,490 \n\n Answer:'},
])
query

In [None]:
datasets.load_dataset('./cache/MMMU/Accounting', 'dev')

In [None]:
import json
json.loads('["One pathogen", "Two pathogens", "Three pathogens", "There is no pathogen involved", "I don\\"t know and I don\\"t want to guess"]')

In [None]:
eval("['a']")