In [1]:
import json
from tqdm import tqdm
import pickle

import torch
from PIL import Image
from moellava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
from moellava.conversation import conv_templates, SeparatorStyle
from moellava.model.builder import load_pretrained_model
from moellava.utils import disable_torch_init
from moellava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
import json
from tqdm import tqdm
import os
import pickle

  from .autonotebook import tqdm as notebook_tqdm


[2024-04-18 17:06:54,754] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


### Inference Function

In [2]:
def eval_gp(gp_id, cls=False):
    # Input data
    if cls:
        gp_eval_path = f"json_folder/okvqa_train_jsons/gp{gp_id}_val_cls_10.json"
    else:
        gp_eval_path = f"json_folder/okvqa_train_jsons/gp{gp_id}_val_10.json"
    with open(gp_eval_path) as json_file:
        gp_eval_data = json.load(json_file)
    num_data = len(gp_eval_data)

    # Load model
    disable_torch_init()
    model_path = f'/home/ubuntu/workspace/MoE-LLaVA/output_trained/MoE-LLaVA-StableLM-1.6B-4e-okvqa_gp{gp_id}'
    device = 'cuda'
    load_4bit, load_8bit = False, False
    model_name = get_model_name_from_path(model_path)
    tokenizer, model, processor, context_len = load_pretrained_model(model_path, None, model_name, load_8bit, load_4bit, device=device)

    def moeLLavaInf(image, inp):
        conv_mode = "stablelm"  # phi or qwen or stablelm
        conv = conv_templates[conv_mode].copy()
        roles = conv.roles
        image_processor = processor['image']
        image_tensor = image_processor.preprocess(Image.open(image).convert('RGB'), return_tensors='pt')['pixel_values'].to(model.device, dtype=torch.float16)

        #print(f"{roles[1]}: {inp}")
        inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
        conv.append_message(conv.roles[0], inp)
        conv.append_message(conv.roles[1], None)
        prompt = conv.get_prompt()
        input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()
        stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
        keywords = [stop_str]
        stopping_criteria = KeywordsStoppingCriteria(keywords, tokenizer, input_ids)

        with torch.inference_mode():
            output_ids = model.generate(
                input_ids,
                images=image_tensor,
                do_sample=False,
                temperature=0.2,
                max_new_tokens=128,
                use_cache=True,
                stopping_criteria=[stopping_criteria])

        outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:], skip_special_tokens=True).strip()
        return outputs
    
    # Evaluate
    questionId_to_answer = {}
    save_file_id = 0
    for i in tqdm(range(num_data)):
        entry = gp_eval_data[i]
        questionId = entry['id']
        image_path = os.path.join("image_folder", entry['image'])
        input = entry['conversations'][0]['value'][8:]  # Strip "<image>\n"
        input = input
        output = moeLLavaInf(image_path, input)
        questionId_to_answer[questionId] = output
        # Checkpoint output
        if (i % 100 == 0 and i != 0) or i == num_data - 1:
            with open(f"okvqa_eval/gp{gp_id}_outputs/saved_outputs_{save_file_id}.pkl", 'wb') as f:
                pickle.dump(questionId_to_answer, f)
            questionId_to_answer = {}
            save_file_id += 1

    # Delete to prepare for the next gp
    del tokenizer, model, processor

### Evaluate function

In [3]:
def evaluate_results(gp_id, cls=False):
    questions_to_outputs = {}
    if cls:
        with open(f"okvqa_eval/classifier_grouped/qid_to_output_gp_{gp_id}.pkl", "rb") as f:
            questions_to_outputs = pickle.load(f)
    else:
        with open(f"okvqa_eval/ground_truth_grouped/qid_to_output_gp_{gp_id}.pkl", "rb") as f:
            questions_to_outputs = pickle.load(f)

    print(f"Sample outputs: {list(questions_to_outputs.items())[:5]}")

    # okvqa_annotations
    val_annotations_path = "dataset/okvqa_annotations/mscoco_val2014_annotations.json"
    with open(val_annotations_path) as json_file:
        val_annotations = json.load(json_file)
    # print(f"Keys: {val_annotations.keys()}")
    # print(f"['question_types']['one']: {val_annotations['question_types']['one']}")
    # print(f"Num annotations: {len(val_annotations['annotations'])}")
    # print(f"annotation[0]: {val_annotations['annotations'][0]}")

    def post_process_output(output):
        # Get rid of `</s>` and lowercap the output
        #eos_id = output.find("</s>")
        return output.lower()
    
    total_scores = 0
    total_tests = 0
    for annotation in val_annotations["annotations"]:
        question_id = annotation["question_id"]
        if question_id not in questions_to_outputs.keys():
            continue
        llava_output = questions_to_outputs[question_id]
        llava_output = post_process_output(llava_output)
        score = 0
        for answer in annotation['answers']:
            if llava_output == answer['answer']:
                score += 1
        total_scores += min (score / 3.0, 1)
        total_tests += 1
    assert total_tests == len(questions_to_outputs.keys())

    print(f"total_scores: {total_scores}")
    print(f"total_tests: {total_tests}")
    print(f"acc: {total_scores / total_tests}")

In [13]:
def save_ground_truth_grouped(gp_id, cls=False):
    if cls:
        if gp_id == 0:
            num_files = 22
        if gp_id == 1:
            num_files = 23
        if gp_id == 2:
            num_files = 3
        if gp_id == 3:
            num_files = 4
    else:
        if gp_id == 0:
            num_files = 18
        if gp_id == 1:
            num_files = 21
        if gp_id == 2:
            num_files = 8
        if gp_id == 3:
            num_files = 4

    questions_to_outputs = {}
    for i in range(num_files):
        with open(f"okvqa_eval/gp{gp_id}_outputs/saved_outputs_{i}.pkl", "rb") as f:
            outputs_i = pickle.load(f)
            questions_to_outputs.update(outputs_i)
    if cls:
        output_path = f"okvqa_eval/classifier_grouped/qid_to_output_gp_{gp_id}.pkl"
    else:
        output_path = f"okvqa_eval/ground_truth_grouped/qid_to_output_gp_{gp_id}.pkl"
    with open(output_path, 'wb') as f:
        pickle.dump(questions_to_outputs, f)

## Ground-truth grouped

### GP0

In [None]:
# eval_gp(0)
# save_ground_truth_grouped(0)

In [22]:
evaluate_results(0)

Sample outputs: [(2076115, 'wallet'), (5723996, 'man'), (5759705, 'island'), (3045575, 'shop'), (2863135, 'hit ball')]
total_scores: 1079.333333333331
total_tests: 1800
acc: 0.5996296296296283


### GP1

In [16]:
# eval_gp(1)
# save_ground_truth_grouped(1)

In [23]:
evaluate_results(1)

Sample outputs: [(2971475, 'racing'), (3397615, 'ivy'), (949225, 'paw'), (2183655, 'flower'), (217115, 'dirt bike')]
total_scores: 1204.6666666666663
total_tests: 2101
acc: 0.5733777566238297


### GP2

In [20]:
eval_gp(2)
save_ground_truth_grouped(2)

  return self.fget.__get__(instance, owner)()


[2024-04-18 15:26:15,318] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,325] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,328] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,332] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,335] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,338] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:26:15,341] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/748 [00:03<39:14,  3.15s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 2/748 [00:03<18:54,  1.52s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 3/748 [00:03<11:31,  1.08it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected beha

In [24]:
evaluate_results(2)

Sample outputs: [(3575865, 'teddy bear'), (115115, 'henri matisse'), (5169165, 'work'), (2357845, 'cross country'), (3190735, 'wetsuit')]
total_scores: 440.6666666666675
total_tests: 748
acc: 0.5891265597147961


### GP3

In [7]:
eval_gp(3)
# save_ground_truth_grouped(3)

  return self.fget.__get__(instance, owner)()


[2024-04-18 15:32:19,134] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,137] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,141] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,144] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,147] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,151] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 15:32:19,154] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/397 [00:03<23:41,  3.59s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|          | 2/397 [00:03<10:36,  1.61s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|          | 3/397 [00:04<06:52,  1.05s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected beha

In [25]:
evaluate_results(3)

Sample outputs: [(1833195, 'charging'), (5069455, 'zoom'), (2162545, '1886'), (505145, 'coconut'), (2980515, 'concert')]
total_scores: 219.33333333333326
total_tests: 397
acc: 0.5524769101595296


## Classifier grouped

### gp 0

In [5]:
eval_gp(0, cls=True)

  return self.fget.__get__(instance, owner)()


[2024-04-18 16:29:15,775] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,779] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,782] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,785] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,788] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,791] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:29:15,794] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/2175 [00:03<1:53:58,  3.15s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 2/2175 [00:03<51:42,  1.43s/it]  The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 3/2175 [00:03<31:45,  1.14it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpect

In [14]:
save_ground_truth_grouped(0, cls=True)

In [15]:
evaluate_results(0, cls=True)

Sample outputs: [(2076115, 'money'), (5723996, 'man'), (3045575, 'shop'), (2863135, 'hit ball'), (115115, 'artist')]
total_scores: 1310.6666666666677
total_tests: 2175
acc: 0.6026053639846748


### gp 1

In [5]:
eval_gp(1, cls=True)

  return self.fget.__get__(instance, owner)()


[2024-04-18 16:44:38,329] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,333] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,337] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,340] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,344] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,348] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 16:44:38,352] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/2236 [00:03<2:14:03,  3.60s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 2/2236 [00:03<1:00:57,  1.64s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 3/2236 [00:04<36:40,  1.01it/s]  The attention mask and the pad token id were not set. As a consequence, you may observe unexpe

In [16]:
save_ground_truth_grouped(1, cls=True)

In [17]:
evaluate_results(1, cls=True)

Sample outputs: [(2971475, 'racing'), (3397615, 'ivy'), (949225, 'paw'), (2183655, 'flower'), (217115, 'dirt bike')]
total_scores: 1264.0000000000005
total_tests: 2236
acc: 0.5652951699463329


### gp 2

In [5]:
eval_gp(2, cls=True)

  return self.fget.__get__(instance, owner)()


[2024-04-18 17:02:40,734] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,737] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,741] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,743] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,747] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,751] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:02:40,754] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/239 [00:03<14:09,  3.57s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|          | 2/239 [00:03<06:19,  1.60s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|▏         | 3/239 [00:04<03:51,  1.02it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected beha

In [18]:
save_ground_truth_grouped(2, cls=True)

In [19]:
evaluate_results(2, cls=True)

Sample outputs: [(3575865, 'teddy bear'), (5759705, 'island'), (299845, 'salt water'), (1121605, 'wallpaper'), (2352215, 'anxiety')]
total_scores: 141.00000000000003
total_tests: 239
acc: 0.589958158995816


### gp 3

In [5]:
eval_gp(3, cls=True)

  return self.fget.__get__(instance, owner)()


[2024-04-18 17:07:53,930] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,933] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,936] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,938] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,941] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,944] [INFO] [logging.py:96:log_dist] [Rank -1] Creating MoE layer with num_experts: 4 | num_local_experts: 4 | expert_parallel_size: 1
[2024-04-18 17:07:53,947] [INFO] [logging.py:96:log_dist] [Rank 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  0%|          | 1/396 [00:03<20:26,  3.10s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|          | 2/396 [00:03<09:15,  1.41s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:100257 for open-end generation.
  1%|          | 3/396 [00:03<05:45,  1.14it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected beha

In [20]:
save_ground_truth_grouped(3, cls=True)

In [21]:
evaluate_results(3, cls=True)

Sample outputs: [(5169165, 'work'), (1833195, 'charging'), (3156685, 'england'), (5069455, 'zoom'), (2162545, '1886')]
total_scores: 222.66666666666654
total_tests: 396
acc: 0.562289562289562


In [22]:
396 + 239 + 2236 + 2175

5046

In [12]:
for i in range(4):
    gp_eval_path = f"json_folder/okvqa_train_jsons/gp{i}_val_cls_10.json"
    with open(gp_eval_path) as json_file:
        gp_eval_data = json.load(json_file)
    print(len(gp_eval_data))

2175
2236
239
396
