In [None]:
import sys
import jacinle
import jactorch
import numpy as np
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader



from experiments.desc_clevr_nesycoco import *

In [None]:
import jacinle.io as io
from os import path as osp

data_parses = [
    "data/clevr-parsings/transfer-questions-ncprogram-gt.json"
    ]
all_parses = dict()
for filename in data_parses:
    if filename.endswith('.p'):
        content = io.load_pkl(filename)
    else:
        content = io.load(filename)
    all_parses.update(content)

data_dir = "data/clevr-mini"
data_scenes_json = osp.join(data_dir, 'scenes.json')
data_image_root = osp.join(data_dir, 'images')
data_vocab_json = osp.join(data_dir, 'vocab.json')
data_output_vocab_json = osp.join(data_dir, 'output-vocab.json')
from left.domain import create_domain_from_parsing
### limit the number of parses
all_parses = dict(list(all_parses.items()))

domain = create_domain_from_parsing(all_parses)

evaluate_custom = "puzzle"
data_questions_json = "data/clevr-transfer/puzzle-20230513.json"


from left.data.clevr_custom_transfer import make_dataset
dataset = make_dataset(evaluate_custom, data_scenes_json, data_questions_json, data_image_root, data_output_vocab_json)
train_dataset = validation_dataset = dataset


In [None]:
batch_size = 1
num_workers = 96
train_dataloader = train_dataset.make_dataloader(batch_size, shuffle=False, drop_last=True, nr_workers=num_workers)
validation_dataloader = validation_dataset.make_dataloader(batch_size, shuffle=False, drop_last=False, nr_workers=num_workers)


In [None]:

from jacinle.config.g import g
g.concept_mapping = None
domain.print_summary()

In [None]:
len(dataset)

In [None]:
from jactorch.io import state_dict, load_state_dict
from left.data.referit3d.vocabulary import Vocabulary
from jactorch.train import TrainerEnv
from jactorch.optim import AdamW
from jactorch.cuda.copy import async_copy_to

vocab = Vocabulary()
model = make_model(None, domain, all_parses, train_dataset.output_vocab if hasattr(train_dataset, 'output_vocab') else train_dataset.unwrapped.output_vocab, custom_transfer=None)
model.cuda()
trainable_parameters = filter(lambda x: x.requires_grad, model.parameters())

optimizer = AdamW(trainable_parameters, 0.001 , weight_decay=1e-2)

trainer = TrainerEnv(model, optimizer)


filename = "[your model path]"
trainer.load_checkpoint(filename)

In [None]:

model.eval()
group_meters = jacinle.GroupMeters()
group_meters.reset()
from jacinle.utils.tqdm import tqdm_pbar
wrong_indices = []
correct_indices = []
parse_fails = []
all_accuracies = []
with tqdm_pbar(total=len(train_dataloader)) as pbar:
    for (index, feed_dict) in enumerate(train_dataloader):
        feed_dict = async_copy_to(feed_dict, 0)
        try:
            output_dict, extra_info = trainer.evaluate(feed_dict)
        except Exception as e:
            print(e)
            parse_fails.append(index)
            
            continue

        if evaluate_custom == 'ref':
            for result, groundtruth in zip(output_dict['executions'], feed_dict['answer']):
                if result is None:
                    this_accuracy = 0
                    wrong_indices.append(index)
                elif result.dtype.typename != 'Object' or result.total_batch_dims != 1:
                    this_accuracy = 0
                    wrong_indices.append(index)
                else:
                    this_accuracy = int(result.tensor.argmax().item() == groundtruth)
                    if this_accuracy == 0:
                        wrong_indices.append(index)
                    else:
                        correct_indices.append(index)
                all_accuracies.append(this_accuracy)
        elif evaluate_custom in ('puzzle', 'rpm'):
            for result, groundtruth in zip(output_dict['executions'], feed_dict['answer']):
                if result is None:
                    this_accuracy = 0
                    wrong_indices.append(index)
                elif result.dtype.typename != 'bool' or result.total_batch_dims != 0:
                    this_accuracy = 0
                    wrong_indices.append(index)
                else:
                    pred = (result.tensor.item() > 0.4)
                    
                    this_accuracy = int(pred == groundtruth)
                    if this_accuracy == 0:
                        print(result.tensor.item(), pred, groundtruth)
                    if this_accuracy == 0:
                        wrong_indices.append(index)
                    else:
                        correct_indices.append(index)
                all_accuracies.append(this_accuracy)
        else:
            raise NotImplementedError()
        acc = np.mean(np.array(all_accuracies))
        pbar.set_description(group_meters.format_simple(
            f'Validation Acc: {acc}',
            {k: v for k, v in group_meters.val.items() if k.startswith('validation') and k.count('/') <= 2},
            compressed=True
        ), refresh=False)
        pbar.update()


In [None]:
len(parse_fails), len(wrong_indices), len(correct_indices)

In [None]:
iterator = iter(train_dataloader)
data_all = [next(iterator) for _ in range(100)]

In [None]:
%matplotlib inline
import sys
## Open a real image from address draw bouding boxes for objects
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
import json
# plt.close()
from copy import deepcopy
from reascan import get_image
feed_dict = data_all[wrong_indices[3]]
# feed_dict = data_all[61]
# feed_dict = deepcopy(feed_dict)
# feed_dict["program"][0] = feed_dict["program"][0].replace("iota","miota")
# print(feed_dict["program"][0])
feed_dict = async_copy_to(feed_dict, 0)
print(feed_dict)
q = feed_dict["question_raw"][0]
print("program:", all_parses[q])


output_dict, extra_info = trainer.evaluate(feed_dict)

image = Image.open(osp.join(data_image_root, feed_dict["image_filename"][0])).convert('RGB')
print(feed_dict["question_raw"])
for i in range(len(output_dict["execution_traces"][0])):
    
    if len(str(output_dict["execution_traces"][0][i][0])) > 6:
        tensor_values = np.array(output_dict["execution_traces"][0][i][1].tensor.detach().cpu().numpy())
        ## normalize tensor_values from 0-1
        # tensor_values = (tensor_values - tensor_values.min()) / (tensor_values.max() - tensor_values.min())
        fig, ax = plt.subplots()
        ax.imshow(image)
        for obj_index,obj in enumerate(feed_dict["objects_raw"][0]):
            # Create a Rectangle patch
            rect = patches.Rectangle((obj[0], obj[1]), obj[2] - obj[0], obj[3] - obj[1], linewidth=1, edgecolor='r', facecolor="none")
            ax.add_patch(rect)
            #if round(float(tensor_values[obj_index]),4)
            ## check for nan values
            # if (isinstance(tensor_values, np.array) and np.isnan(tensor_values[obj_index])) or ( not isinstance(tensor_values, np.array) and np.isnan(tensor_values)):
            #     text = f"{obj_index} nan "
            # else:
            try:
                if len(tensor_values.shape) > 1:
                    text = f"{obj_index}"
                else:
                    text = f"{obj_index} {round(float(tensor_values[obj_index]),4)} "
            except:
                    
                print("##"*100)
                text = f"{obj_index} {tensor_values} "
            ax.text(obj[0], obj[1], text, color='white', fontsize=10, bbox=dict(facecolor='red', alpha=0.5))
        plt.title(str(output_dict["execution_traces"][0][i][0]))
        plt.show()
        from pprint import pprint
        if len(tensor_values.shape) == 2:
            for i in range(tensor_values.shape[0]):
                print(i, [round(x,3) for x in tensor_values[i]])
        else:
            pprint(tensor_values)

print(tensor_values.argmax().item())
print(feed_dict["answer"])


In [None]:
wrong_indices
address = "./data/clevr-transfer/puzzle-20230513.json"
with open(address, "r") as f:
    data = json.load(f)
    for i in wrong_indices:
        print(i)
        print(data["puzzles"][i]["question"])
        print(data["puzzles"][i]["answer"])

In [None]:
import requests
import os
from PIL import Image

hf_cache = "./cache"
os.environ["HF_HOME"] = hf_cache

import torch
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration

processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf",cache_dir=hf_cache)

model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True,cache_dir=hf_cache).to(torch.device("cuda:0"))



In [None]:

def answer(question, image_address):
  
  # Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
  # Each value in "content" has to be a list of dicts with types ("text", "image") 
  conversation = [
      {

        "role": "user",
        "content": [
            {"type": "text", "text": question},
            {"type": "image"},
          ],
      },
  ]
  prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

  raw_image = Image.open(image_address).convert("RGB")
  inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)

  output = model.generate(**inputs, max_new_tokens=200, do_sample=False)
  return processor.decode(output[0][2:])
    


In [None]:
import sys
import jacinle
import jactorch
import numpy as np
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader


# from experiments.desc_neuro_codex_clevr_vec import *

# with set_configs():
#     configs.model.embedding_type = 'fasttex'
import jacinle.io as io
from os import path as osp

data_dir = "data/clevr-mini"
data_scenes_json = osp.join(data_dir, 'scenes.json')
data_image_root = osp.join(data_dir, 'images')
data_vocab_json = osp.join(data_dir, 'vocab.json')
data_output_vocab_json = osp.join(data_dir, 'output-vocab.json')
evaluate_custom = "rpm"
data_questions_json = "data/clevr-transfer/rpm-20230513.json"

from left.data.clevr_custom_transfer import make_dataset
dataset = make_dataset(evaluate_custom, data_scenes_json, data_questions_json, data_image_root, data_output_vocab_json)
validation_dataset = dataset
batch_size = 1
num_workers = 4
validation_dataloader = validation_dataset.make_dataloader(batch_size, shuffle=False, drop_last=False, nr_workers=num_workers)



In [None]:
import re
group_meters = jacinle.GroupMeters()
group_meters.reset()
from jacinle.utils.tqdm import tqdm_pbar
wrong_indices = []
correct_indices = []
parse_fails = []
all_accuracies = []
with tqdm_pbar(total=len(validation_dataloader)) as pbar:
    for (index, feed_dict) in enumerate(validation_dataloader):
        if evaluate_custom in ('puzzle', 'rpm'):
            question = feed_dict["question_raw"][0] +  " \n Yes or No?"
            image_address = f"data/clevr-mini/images/{feed_dict['image_filename'][0]}"
            response = answer(question, image_address)
            gt_answer = "Yes" if feed_dict['answer'][0] else "No"
            ### extract yes ir no from response with regex
            response = response.split("ASSISTANT: ")[-1].lower()
            gt_answer = gt_answer.lower()
            ### extract yes|no from response regex
            response_extracted = re.findall(r"yes|no", response)
            if response_extracted:
                response_extracted = response_extracted[0]
                if response_extracted == gt_answer:
                    this_accuracy = 1
                else:
                    this_accuracy = 0  
            else:
                this_accuracy = 0
                print(response, response_extracted, gt_answer, this_accuracy)
                print("#####################################")
            all_accuracies.append(this_accuracy)
            
        else:### do ref manually
            raise NotImplementedError()
        acc = np.mean(np.array(all_accuracies))
        pbar.set_description(group_meters.format_simple(
            f'Validation Acc: {acc}',
            {k: v for k, v in group_meters.val.items() if k.startswith('validation') and k.count('/') <= 2},
            compressed=True
        ), refresh=False)
        pbar.update()
