In [1]:
import os
import torch
import json
import argparse
from tqdm import tqdm
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision.transforms import functional
from torchvision import transforms
from PIL import Image
from datasets import load_dataset

# from training_models import MyClipEnsemble, MyInternEnsemble

from my_datasets import collate_fn_image, ImageDataset, EnsembleImageDataset, AdvDiffVLMImageDataset
# from poison_utils import L2_norm


# Image Training

In [2]:
dataset_name = "mini_MathVision_grid"
method = "i2i_AdvDiffVLMAttack"
args = {
    "model": "clip",
    "poison_save_pth": f"./data/poisons/{dataset_name}+{method}",
    "iter_attack": 200,
    "lr_attack": 1,
    # "base_data_pth": f"data/{dataset_name}/base",
    "base_data_pth": f"data/{dataset_name}/base_512",
    "target_data_pth": f"data/target",
    "questions_pth": f"data/{dataset_name}/questions.json",
    "temperature": 0,
    "max_new_tokens": 200,
    "eps": 16,
    "ddim_steps": 200, 
    "ddim_eta": 0.05,
    "scale": 5.0,  
}


# def read_json(path):
#     with open(path, 'r', encoding='utf-8') as f:
#         return json.load(f)


In [3]:
# encoder_model = MyClip()
# encoder_model = MyClipEnsemble()
# encoder_model = MyInternEnsemble()
# device = encoder_model.device
device = "cuda"

def read_json(path):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)

target_caps = read_json(os.path.join(args["target_data_pth"], "caps.json"))

In [4]:
ds = load_dataset("MathLLMs/MathVision", split="testmini")

In [5]:
target_dataset = AdvDiffVLMImageDataset(target_caps["annotations"])

target_dataloader = torch.utils.data.DataLoader(
    target_dataset, batch_size=1, 
    shuffle=False, collate_fn=collate_fn_image
)

In [6]:
class PadToSquare:
    def __call__(self, img):
        w, h = img.size
        max_wh = max(w, h)
        hp = (max_wh - w) // 2
        vp = (max_wh - h) // 2
        padding = (hp, vp, max_wh - w - hp, max_wh - h - vp)  # (left, top, right, bottom)
        img = functional.pad(img, padding, fill=255, padding_mode='constant')  # fill=0 for black
        return img

squareTransform = transforms.Compose([
    transforms.Lambda(lambda img: img.convert("RGB")),
    PadToSquare(),  
    transforms.Resize(512),
    transforms.ToTensor()
])

def squareData(example):
    image = example['decoded_image'][0]
    image = squareTransform(image)
    example['image_tensor'] = [image]
    return example

ds = ds.with_transform(squareData)

In [7]:
def collate_fn(batch):
    images = [example['image_tensor'] for example in batch]
    return torch.stack(images), {
        'name': batch[0]['id']
    } 

base_dataloader = DataLoader(ds, batch_size=1, shuffle=False, collate_fn=collate_fn)


In [8]:
import matplotlib.pyplot as plt
def show_img(img, caption):
    print(caption)
    plt.imshow(img.squeeze(0).permute(1,2,0))
    plt.axis("off")
    plt.show()

In [9]:
def i2i_attack(model, image_base, image_target, iters=100, lr=1/255, eps=8):
      '''
      optimizing x_adv to minimize emb_dist( img_embed of x_adv, img_embed of image_target ) within Lp constraint using PGD

      model: model class with image embedding functionality (e.g. CLIP, EVA)
      image_base, image_target: images between [0,1] 
      emb_dist: the distance metrics for vision embedding (such as L2): take a batch of bs image pairs as input, \
            and output EACH of pair-wise distances of the whole batch (size = [bs])

      eps: for Lp constraint
      lr: the step size. The update is grad.sign * lr
      diff_aug: using differentiable augmentation, e.g. RandomResizeCrop

      return: X_adv between [0,1]
      '''

      bs = image_base.size(0)
      device = image_base.device

      with torch.no_grad():
            embedding_targets = model.encode_image(image_target)
            embedding_targets = embedding_targets / embedding_targets.norm(dim=-1, keepdim=True)

      X_adv = image_base.clone().detach()
      X_adv.requires_grad_(True) 

      loss_best = 1e8
      X_adv_best = X_adv.clone().detach()

      # for i in tqdm(range(iters), leave=False):
      for i in range(iters):
            embs = model.encode_image(X_adv)
            embs = embs / embs.norm(dim=-1, keepdim=True)  # Normalize embeddings

            loss = L2_norm(embs, embedding_targets)
            # loss = torch.mean(torch.sum(embs * embedding_targets, dim=1))

            if loss < loss_best:
                  loss_best = loss.clone().detach()
                  X_adv_best = X_adv.clone().detach()

            # optimizer.zero_grad()
            loss.backward()

            # print losses at increments
            if i% max(int(iters/20),1) == 0:
                  # print('Iter :{} loss:{:.4f}, lr * 255:{:.4f}'.format(i,loss.item()/bs, scheduler.get_last_lr()[0]*255))
                  print('Iter :{} loss:{:.4f}, lr * 255:{:.4f}'.format(i,loss.item()/bs, lr*255))

            grad = X_adv.grad
            assert grad != None
            grad = grad / torch.mean(torch.abs(grad), dim=(1,2,3), keepdim=True)           
            
            perturbation = lr * grad.sign()
            # print(perturbation.shape)
            X_adv.data = X_adv.data.detach() + perturbation
            X_adv.data = torch.minimum(torch.maximum(X_adv, image_base - eps), image_base + eps) 
            X_adv.data = X_adv.data.clamp(0,255)
            X_adv.grad = None  

            if torch.isnan(loss):
                  print('Encounter nan loss at iteration {}'.format(i))
                  break                 

      print('Best Total loss:{:.4f}'.format(loss.item()))

      return X_adv_best, loss_best




In [10]:
def SGAImageAttack(model, image, cap, iters=100, lr=1/255, eps=8):
      '''
      optimizing x_adv to maximize emb_dist( img_embed of x_adv, text_embed of captions ) within Lp constraint using PGD

      model: model class with image and text embedding functionality (e.g. CLIP, EVA)
      image, cap: image between [0,255] (float) caption dict for that image
      emb_dist: the distance metrics for vision embedding (such as L2): take a batch of bs image pairs as input, \
            and output EACH of pair-wise distances of the whole batch (size = [bs])

      eps: for Lp constraint
      lr: the step size. The update is grad.sign * lr

      return: X_adv between [0,255]
      '''

      bs = image.size(0)
      device = image.device

      with torch.no_grad():
            embedding_targets = model.encode_text(cap['caption'])
            embedding_targets = embedding_targets / embedding_targets.norm(dim=-1, keepdim=True)

      X_adv = image.clone().detach()
      X_adv.requires_grad_(True) 

      loss_best = 1e8
      X_adv_best = X_adv.clone().detach()

      # for i in tqdm(range(iters), leave=False):
      for i in range(iters):
            embs = model.encode_image(X_adv)
            embs = embs / embs.norm(dim=-1, keepdim=True)  # Normalize embeddings

            loss = -L2_norm(embs, embedding_targets)

            if loss < loss_best:
                  loss_best = loss.clone().detach()
                  X_adv_best = X_adv.clone().detach()

            loss.backward()

            # print losses at increments
            if i% max(int(iters/20),1) == 0:
                  print('Iter :{} loss:{:.4f}, lr * 255:{:.4f}'.format(i,loss.item()/bs, lr*255))

            grad = X_adv.grad
            assert grad != None
            grad = grad / torch.mean(torch.abs(grad), dim=(1,2,3), keepdim=True)           
            
            perturbation = lr * grad.sign()
            # print(perturbation.shape)
            X_adv.data = X_adv.data.detach() - perturbation
            X_adv.data = torch.minimum(torch.maximum(X_adv, image_base - eps), image_base + eps) 
            X_adv.data = X_adv.data.clamp(0,255)
            X_adv.grad = None  

            if torch.isnan(loss):
                  print('Encounter nan loss at iteration {}'.format(i))
                  break                 

      print('Best Total loss:{:.4f}'.format(loss.item()))

      return X_adv_best, loss_best

In [11]:
def i2i_EnsembleAttack(model, image, target, iters=100, lr=1/255, eps=8):
      '''
      optimizing x_adv to maximize emb_dist( img_embed of x_adv, text_embed of captions ) within Lp constraint using PGD

      model: model class with image and text embedding functionality (e.g. CLIP, EVA)
      image, cap: image between [0,255] (float) caption dict for that image
      emb_dist: the distance metrics for vision embedding (such as L2): take a batch of bs image pairs as input, \
            and output EACH of pair-wise distances of the whole batch (size = [bs])

      eps: for Lp constraint
      lr: the step size. The update is grad.sign * lr

      return: X_adv between [0,255]
      '''

      bs = image.size(0)
      device = image.device

      with torch.no_grad():
            embedding_targets = model.encode_image(target)

      X_adv = image.clone().detach()
      X_adv.requires_grad_(True) 

      loss_best = -1e8
      X_adv_best = X_adv.clone().detach()

      # for i in tqdm(range(iters), leave=False):
      for i in range(iters):
            embs = model.encode_image(X_adv, use_grad=True)

            # loss = L2_norm(embs, embedding_targets)
            grad, loss = model.get_gradients(embs, embedding_targets, X_adv) 

            torch.cuda.empty_cache()
            if loss > loss_best:
                  loss_best = loss.clone().detach()
                  X_adv_best = X_adv.clone().detach()

            # print losses at increments
            if i% max(int(iters/20),1) == 0:
                  print('Iter :{} loss:{:.4f}, lr:{:.4f}'.format(i,loss.item()/bs, lr))

            
            perturbation = lr * grad.sign()
            # print(perturbation.shape)
            with torch.no_grad():
                  X_adv = X_adv + perturbation
                  X_adv = torch.min(torch.max(X_adv, image_base - eps), image_base + eps)
                  X_adv = torch.clamp(X_adv, 0, 255)
            X_adv.requires_grad_(True)

            if torch.isnan(loss):
                  print('Encounter nan loss at iteration {}'.format(i))
                  break                 

      print('Best Total loss:{:.4f}'.format(loss.item()))

      # return X_adv.clone().detach(), loss_best
      return X_adv_best, loss_best

### SGA Image Attack

In [12]:
# import matplotlib.pyplot as plt

# ###### Running SGA Image Attack ######
# original_sizes_list = []
# X_adv_list = []
# all_losses = []
# loss_attack_list = []

# target_path = os.path.join(args['poison_save_pth'], "SGA")

# if not os.path.exists(target_path):
#     os.makedirs(target_path)
# saved_images = set(os.listdir(target_path))
# saved_image_ids = {int(fname.split('.')[0]) for fname in saved_images if fname.endswith(('.png', '.jpg', '.jpeg'))}


# for i, (image_base, base_cap) in  tqdm(enumerate(base_dataloader), desc=f"Processing all images", total=len(base_dataloader)):
#     if base_cap['name'] in saved_image_ids:
#             print(f"{base_cap['name']} already processed for {target_cap['name']}, skipping...")
#             continue

#     print('image name = ', base_cap['name'])
#     image_base = image_base.to(device)

#     X_adv, loss_attack = SGAImageAttack(
#             model=encoder_model,
#             image=image_base,
#             cap=base_cap,
#             iters=args['iter_attack'],
#             lr=args['lr_attack'],
#             eps=args['eps']
#     )


#     ###### Save poisoned images after each batch ######
#     img_pth = os.path.join(target_path, f"{base_cap['name']}.png")
#     image_to_save = X_adv/255.0
#     print()
#     if base_cap['name'] not in saved_image_ids:  # Only save if it doesn't already exist
#         print("Max Pixel Difference between Adversarial Image and Base *255:", round(torch.max(torch.abs(X_adv-image_base)).item(), 4))
#         save_image(image_to_save.cpu(), img_pth)
#         print(f"Saved poisoned image {base_cap['name']} to {img_pth}") # mathvista

#     # show_img(((X_adv-image_base).to(torch.uint8).cpu()*10), ("Adversarial Noise", img_pth))
#     # show_img(image_to_save.cpu(), ("Adversarial Image", img_pth))

        

# print("Done.")

### i2i attack

In [13]:
# ##### Running i2i attack ######
# original_sizes_list = []
# X_adv_list = []
# all_losses = []
# loss_attack_list = []
# start_idx = 0
# end_idx = 0
# # for j, (image_target, target_cap) in  tqdm(enumerate(target_dataloader)):
# for j, (image_target, target_cap) in  tqdm(enumerate(target_dataloader), desc="Target Dataloader", position=0, leave=True):
#     target_name = target_cap["name"]
#     target_path = os.path.join(args["poison_save_pth"], target_cap["name"])
#     if not os.path.exists(target_path):
#         os.makedirs(target_path)

#     ###### Resume by checking already saved images ######
#     saved_images = set(os.listdir(target_path))
#     saved_image_ids = {int(fname.split('.')[0]) for fname in saved_images if fname.endswith(('.png', '.jpg', '.jpeg'))}

#     # show_img(image_target, ("Working on target:", target_name))

#     for i, (image_base, base_cap) in  tqdm(enumerate(base_dataloader), desc=f"Processing {target_name}", total=len(base_dataloader), position=1, leave=False):
#         if base_cap['name'] in saved_image_ids:
#                 print(f"{base_cap['name']} already processed for {target_cap['name']}, skipping...")
#                 continue

#         print('image name = ', base_cap['name'])
#         image_base, image_target = image_base.to(device), image_target.to(device)

#         X_adv, loss_attack = i2i_attack(
#                 model=encoder_model,
#                 image_base=image_base,
#                 image_target=image_target,
#                 iters=args['iter_attack'],
#                 lr=args['lr_attack'],
#                 eps=args['eps']
#         )


#         ###### Save poisoned images after each batch ######
#         img_pth = os.path.join(target_path, f"{base_cap['name']}.png")
#         image_to_save = X_adv/255.0
#         print()
#         if base_cap['name'] not in saved_image_ids:  # Only save if it doesn't already exist
#             print("Max Pixel Difference between Adversarial Image and Base *255:", round(torch.max(torch.abs(X_adv-image_base)).item(), 4))
#             save_image(image_to_save.cpu(), img_pth)
#             print(f"Saved poisoned image {base_cap['name']} to {img_pth}") # mathvista

#         # show_img(((X_adv-image_base).to(torch.uint8).cpu()*10), ("Adversarial Noise", img_pth))
#         # show_img(image_to_save.cpu(), ("Adversarial Image", img_pth))

        

# print("Done.")

## i2i Ensemble attack

In [14]:
# ##### Running i2i attack ######
# original_sizes_list = []
# X_adv_list = []
# all_losses = []
# loss_attack_list = []
# start_idx = 0
# end_idx = 0
# # for j, (image_target, target_cap) in  tqdm(enumerate(target_dataloader)):
# for j, (image_target, target_cap) in  tqdm(enumerate(target_dataloader), desc="Target Dataloader", position=0, leave=True):
#     target_name = target_cap["name"]
#     target_path = os.path.join(args["poison_save_pth"], target_cap["name"])
#     if not os.path.exists(target_path):
#         os.makedirs(target_path)

#     ###### Resume by checking already saved images ######
#     saved_images = set(os.listdir(target_path))
#     saved_image_ids = {fname.split('.')[0] for fname in saved_images if fname.endswith(('.png', '.jpg', '.jpeg'))}
#     if all(name in saved_image_ids for name in [base_cap['name'] for (_, base_cap) in base_dataloader]):
#         print(f"{target_cap['name']} entirely complete")
#         continue

#     # show_img(image_target, ("Working on target:", target_name))

#     for i, (image_base, base_cap) in  tqdm(enumerate(base_dataloader), desc=f"Processing {target_name}", total=len(base_dataloader), position=1, leave=False):
#         if base_cap['name'] in saved_image_ids:
#                 print(f"{base_cap['name']} already processed for {target_cap['name']}, skipping...")
#                 continue

#         print('image name = ', base_cap['name'])
#         image_base, image_target = image_base.to(device), image_target.to(device)

#         X_adv, loss_attack = i2i_EnsembleAttack(
#                 model=encoder_model,
#                 image=image_base,
#                 target=image_target,
#                 iters=args['iter_attack'],
#                 lr=args['lr_attack'],
#                 eps=args['eps']
#         )


#         ###### Save poisoned images after each batch ######
#         img_pth = os.path.join(target_path, f"{base_cap['name']}.png")
#         image_to_save = X_adv / 255.0
#         print()
#         if base_cap['name'] not in saved_image_ids:  # Only save if it doesn't already exist
#             print("Max Pixel Difference between Adversarial Image and Base *255:", torch.max(torch.abs(X_adv-image_base)).item())
#             save_image(image_to_save.cpu(), img_pth)
#             print(f"Saved poisoned image {base_cap['name']} to {img_pth}") # mathvista

#         # show_img(((X_adv-image_base).to(torch.uint8).cpu()*10), ("Adversarial Noise", img_pth))
#         # show_img(image_to_save.cpu(), ("Adversarial Image", img_pth))

        

# print("Done.")

## AdvDiffVLM i2i grid attack

In [15]:
# import os
# import torch
# import torch.nn.functional as F
# import numpy as np
# import cv2
# import torchvision
# from torchvision import models, transforms
# from PIL import Image

# # ----------------- GradCAM++ Implementation -----------------
# class GradCAMPlusPlus:
#     def __init__(self, model, target_layer):
#         self.model = model
#         self.model.eval()
#         self.target_layer = target_layer
#         self.gradients = None
#         self.activations = None

#         # Register hooks to save gradients and activations
#         target_layer.register_forward_hook(self.save_activation)
#         target_layer.register_backward_hook(self.save_gradient)

#     def save_activation(self, module, input, output):
#         self.activations = output

#     def save_gradient(self, module, grad_in, grad_out):
#         self.gradients = grad_out[0]

#     def forward(self, x):
#         return self.model(x)

#     def generate_cam(self, target_class):
#         # Compute weights based on Grad-CAM++ formulation
#         gradients = self.gradients[0].cpu().data.numpy()
#         activations = self.activations[0].cpu().data.numpy()
        
#         alpha_num = gradients ** 2
#         alpha_denom = gradients ** 2 * 2 + np.sum(activations * (gradients ** 3), axis=(1, 2), keepdims=True)
#         # Avoid division by zero
#         alpha_denom = np.where(alpha_denom != 0, alpha_denom, np.ones(alpha_denom.shape))
#         alphas = alpha_num / alpha_denom
#         weights = np.maximum(gradients, 0) * alphas
#         weights = np.sum(weights, axis=(1, 2))
#         cam = np.sum(weights[:, np.newaxis, np.newaxis] * activations, axis=0)
#         cam = np.maximum(cam, 0)  # ReLU
#         # Resize CAM to 224x224 (same as input image size)
#         cam = cv2.resize(cam, (224, 224))
#         # Normalize CAM to [0, 1]
#         cam = cam - np.min(cam)
#         if np.max(cam) != 0:
#             cam = cam / np.max(cam)
#         return cam

#     def __call__(self, input_tensor, target_class):
#         # Forward pass
#         output = self.forward(input_tensor)
#         # Zero gradients and backward pass with respect to target class
#         self.model.zero_grad()
#         class_loss = output[0, target_class]
#         class_loss.backward()
#         # Generate CAM
#         cam = self.generate_cam(target_class)
#         return cam

# # ----------------- Preprocessing function -----------------
# preprocess = torchvision.transforms.Compose(
#     [
#         torchvision.transforms.Resize(224, interpolation=torchvision.transforms.InterpolationMode.BICUBIC, antialias=True),
#         # torchvision.transforms.Lambda(lambda img: torch.clamp(img, 0.0, 255.0) / 255.0),
#         torchvision.transforms.CenterCrop(224),
#         torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), # CLIP imgs mean and std.
#     ]
# )

# # ----------------- Main Script -----------------

# # Define paths
# mask_output_dir = os.path.join(args['poison_save_pth']+"_masked")    # directory where binary masks will be saved

# # Create output directory if it doesn't exist
# if not os.path.exists(mask_output_dir):
#     os.makedirs(mask_output_dir)

# # Load pretrained model (ResNet50) and specify the target convolutional layer
# model = models.resnet50(pretrained=True)
# target_layer = model.layer4[-1]  # last conv layer of ResNet50
# grad_cam_pp = GradCAMPlusPlus(model, target_layer)

# # Process each image file in this subdirectory (assume common image extensions)
# for image, caps in tqdm(base_dataloader, desc="ResNet Masks"):
#     try:
#         input_tensor = preprocess(image)
#     except Exception as e:
#         print(f"Error loading image {img_path}: {e}")
#         continue
    
#     # Forward pass through the model to get prediction
#     with torch.no_grad():
#         output = model(input_tensor)
#     target_class = output.argmax().item()  # or you can specify a target class if needed
    
#     # Generate GradCAM++ heatmap for the target class
#     cam = grad_cam_pp(input_tensor, target_class)
    
#     # Convert heatmap to binary mask using thresholding (e.g., threshold at 0.5)
#     cam = torch.tensor(cam).abs()
#     cam = cam.pow(4)
#     binary_mask = (((cam - cam.min()) / (cam.max() - cam.min()))*255).numpy().astype(np.uint8)  # 255 for white regions    
    
#     # Optionally, convert to PIL Image (if you want to save in common formats)
#     mask_img = Image.fromarray(binary_mask)
    
#     # Save binary mask image with a modified filename
#     mask_filename = f"{caps['name']}_mask.png"
#     mask_save_path = os.path.join(mask_output_dir, mask_filename)
#     mask_img.save(mask_save_path)
# print(f"Saved binary masks for all images")


In [16]:
# # ---- clip_cam_mask.py ---------------------------------------------------
# import os, cv2, torch, numpy as np
# from PIL import Image
# from tqdm import tqdm
# import clip
# import torchvision.transforms as T

# device          = "cuda" if torch.cuda.is_available() else "cpu"
# clip_model, _   = clip.load("ViT-B/32", device=device)
# clip_model.eval()

# clip_preprocess = T.Compose([
#     T.Resize(224, interpolation=T.InterpolationMode.BICUBIC, antialias=True),
#     T.CenterCrop(224),
#     T.ToTensor(),                # -> float32 [0,1]
#     T.Normalize(                 # CLIP mean/std
#         (0.48145466,0.4578275,0.40821073),
#         (0.26862954,0.26130258,0.27577711)
#     )
# ])

# def clip_binary_mask(pil_img,
#                      size            = 128,
#                      top_percent     = 30,      # keep this % most-salient
#                      dilation_pixels = 2):      # 0 = no dilation
#     """
#     Returns a uint8 array of shape [size,size] where
#         0   => *protected* (keep fidelity)
#         255 => *editable*
#     """
#     with torch.enable_grad():
#         img = clip_preprocess(pil_img).unsqueeze(0).to(device)
#         img.requires_grad_(True)

#         score = clip_model.encode_image(img).norm()     # scalar
#         clip_model.zero_grad()
#         score.backward()

#         sal = img.grad.abs().sum(1)[0]                  # [H,W] float
#         sal = torch.nn.functional.interpolate(
#             sal[None,None], (size,size),
#             mode="bilinear", align_corners=False
#         )[0,0]
#         sal = (sal - sal.min()) / (sal.max() - sal.min() + 1e-8)

#     # ---------- percentile threshold -> binary --------------------------
#     thresh = torch.quantile(sal.flatten(), 1-top_percent/100.)
#     bin_mask = (sal >= thresh).cpu().numpy().astype("uint8")  # 1/0
#     if dilation_pixels > 0:
#         k = dilation_pixels
#         kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
#         bin_mask = cv2.dilate(bin_mask, kernel, iterations=1)

#     # --- NEW: morphological opening to clean artifacts ---------------
#     bin_mask = cv2.morphologyEx(bin_mask, cv2.MORPH_OPEN,
#                                 np.ones((3,3), np.uint8))

#     bin_mask = (1 - bin_mask) * 255
#     return bin_mask.astype("uint8")

# import pytesseract, cv2, numpy as np
# from PIL import Image

# def ocr_mask(pil_img, size=128, pad=4):
#     """Return uint8 mask: 0=text boxes, 255=editable."""
#     # pytesseract works on BGR np array
#     np_img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
#     h0, w0 = np_img.shape[:2]

#     boxes = pytesseract.image_to_boxes(np_img, config="--psm 6")
#     mask = np.ones((h0, w0), np.uint8) * 255      # start editable (white)

#     for b in boxes.splitlines():
#         _, x1, y1, x2, y2, _ = b.split()          # char, x1,y1,x2,y2
#         x1,y1,x2,y2 = map(int, (x1,y1,x2,y2))
#         # pytesseract's origin is bottom-left
#         y1 = h0 - y1;  y2 = h0 - y2
#         cv2.rectangle(mask,
#                       (max(0,x1-pad), max(0,y2-pad)),
#                       (min(w0,x2+pad), min(h0,y1+pad)),
#                       0, -1)                      # 0 = protect

#     # down- or up-sample to latent size
#     mask = cv2.resize(mask, (size, size), interpolation=cv2.INTER_NEAREST)
#     return mask.astype("uint8")


# # ------------------------------------------------------------------------
# # 3.  Batch over a folder
# # ------------------------------------------------------------------------
# mask_dir   = args['poison_save_pth'] + "_masked"         # new folder

# os.makedirs(mask_dir, exist_ok=True)

# for image, caps in tqdm(base_dataloader, desc="Generating OCR masks"):
#     img_pil = transforms.Compose([
#         transforms.ToPILImage(),
#         transforms.Resize(512)               # keeps aspect ratio ⇢ shorter side = 512
#     ])(image.squeeze(0))

#     mask = 255-clip_binary_mask(img_pil,
#                         size=224,          # latent grid
#                         top_percent=50,    # tighten/relax here
#                         dilation_pixels=2) # halo around glyphs
#     # mask = 255-ocr_mask(img_pil, size=128, pad=4)


#     out_name  = os.path.splitext(caps['name'])[0] + "_mask.png"
#     Image.fromarray(mask).save(os.path.join(mask_dir, out_name))
# # ------------------------------------------------------------------------


In [17]:
import sys
import os 
import random

os.environ["CUDA_VISIBLE_DEVICES"]="0"

sys.path.append(".")
sys.path.append('./taming-transformers')
from taming.models import vqgan 

import torch
from omegaconf import OmegaConf

from ldm.util import instantiate_from_config

from ldm.models.diffusion.ddim_main import DDIMSampler
from torchvision.models import resnet50, ResNet50_Weights

from torchvision.utils import save_image
from torch.backends import cudnn
import numpy as np
import time



def to_tensor(pic):
    mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
    img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
    img = img.view(pic.size[1], pic.size[0], len(pic.getbands()))
    img = img.permute((2, 0, 1)).contiguous()
    return img.to(dtype=torch.get_default_dtype())

def load_model_from_config(config, ckpt):
    print(f"Loading model from {ckpt}")
    pl_sd = torch.load(ckpt)#, map_location="cpu")
    sd = pl_sd["state_dict"]
    model = instantiate_from_config(config.model)
    m, u = model.load_state_dict(sd, strict=False)
    model.cuda()
    model.eval()
    return model

# def get_model(): 
#     config = OmegaConf.load("ldm.yaml")  # configs/latent-diffusion/cin256-v2.yaml
#     model = load_model_from_config(config, "model.ckpt")  # https://ommer-lab.com/files/latent-diffusion/nitro/cin/model.ckpt
def get_model():  # 下载地址
    config = OmegaConf.load("configs/latent-diffusion/cin256-v2.yaml")  
    model = load_model_from_config(config, "models/ldm/cin256-v2/model.ckpt")
    return model

## 设置随机种子

seed = 0
cudnn.benchmark = False
cudnn.deterministic = True
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
# seed for everything
# credit: https://www.kaggle.com/code/rhythmcam/random-seed-everything
DEFAULT_RANDOM_SEED = 0
device = "cuda" if torch.cuda.is_available() else "cpu"

# basic random seed
def seedBasic(seed=DEFAULT_RANDOM_SEED):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

# torch random seed
def seedTorch(seed=DEFAULT_RANDOM_SEED):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# combine
def seedEverything(seed=DEFAULT_RANDOM_SEED):
    seedBasic(seed)
    seedTorch(seed)

seedEverything()
#------------------------------------------------------------------ #  

model = get_model()  # 扩散模型

### CLIP模型
import clip 
import torchvision
device = model.device

clip_rn_50,_ = clip.load('RN50', device=device)
clip_rn_101,_ = clip.load('RN101', device=device)
clip_vit_b_16,_ = clip.load('ViT-B/16', device=device)
clip_vit_b_32,_ = clip.load('ViT-B/32', device=device)
clip_vit_l_14,_ = clip.load('ViT-L/14', device=device)
models = [clip_rn_50, clip_rn_101, clip_vit_b_16, clip_vit_b_32]
# models = [clip_vit_b_32]
clip_preprocess = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(clip_vit_b_32.visual.input_resolution, interpolation=torchvision.transforms.InterpolationMode.BICUBIC, antialias=True),
        # torchvision.transforms.Lambda(lambda img: torch.clamp(img, 0.0, 255.0) / 255.0),
        torchvision.transforms.CenterCrop(clip_vit_b_32.visual.input_resolution),
        torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711)), # CLIP imgs mean and std.
    ]
)
final_preprocess = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(clip_vit_b_32.visual.input_resolution, interpolation=torchvision.transforms.InterpolationMode.BICUBIC, antialias=True),
        # torchvision.transforms.Lambda(lambda img: torch.clamp(img, 0.0, 255.0) / 255.0),
        torchvision.transforms.CenterCrop(clip_vit_b_32.visual.input_resolution),
    ]
)


class ImageFolderWithPaths(torchvision.datasets.ImageFolder):
    def __getitem__(self, index: int):
        original_tuple = super().__getitem__(index)
        path, _ = self.samples[index]
        return original_tuple + (path,)

 # 读取数据
input_res = 224
# 建立映射关系


batch_size = 1
transform_fn = torchvision.transforms.Compose(
        [
            torchvision.transforms.Resize(224, interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
            torchvision.transforms.CenterCrop(224),
            torchvision.transforms.Lambda(lambda img: img.convert("RGB")),
            torchvision.transforms.Lambda(lambda img: to_tensor(img)),
            torchvision.transforms.Lambda(lambda img: torch.clamp(img, 0.0, 255.0) / 255.0),
        ]
    )
transform_fn_org = torchvision.transforms.Compose(
    [
        torchvision.transforms.Resize(256, interpolation=torchvision.transforms.InterpolationMode.BICUBIC),
        torchvision.transforms.CenterCrop(256),
        torchvision.transforms.ToTensor(), # [0, 1]
        torchvision.transforms.Lambda(lambda img: (img * 2 - 1)),
        # torchvision.transforms.Lambda(lambda img: img.convert("RGB")),
        # torchvision.transforms.Lambda(lambda img: to_tensor(img)),
    ]
)

sampler = DDIMSampler(model, models=models, preprocess=clip_preprocess)  # 使用ddim采样器


import numpy as np 
from PIL import Image
from einops import rearrange  # 用于重新排列Tensor的维度
import cv2

classes = [2]  # 定义要生成的类
n_samples_per_class = 1
# 读取类别
import pandas as pd

ddim_steps = args["ddim_steps"] # 200
ddim_eta = args["ddim_eta"]
scale = args["scale"]   

img_transformed_list = []
cam_root = args['poison_save_pth']+"_masked"

for j, (image_target, target_cap) in  tqdm(enumerate(target_dataloader), desc="Target Dataloader", position=0, leave=True):
    target_name = target_cap["name"]
    target_path = os.path.join(args["poison_save_pth"], target_cap["name"])
    if not os.path.exists(target_path):
        os.makedirs(target_path)

    ###### Resume by checking already saved images ######
    saved_images = set(os.listdir(target_path))
    saved_image_ids = {fname.split('.')[0] for fname in saved_images if fname.endswith(('.png', '.jpg', '.jpeg'))}
    if all(name in saved_image_ids for name in [base_cap['name'] for (_, base_cap) in base_dataloader]):
        print(f"{target_cap['name']} entirely complete")
        continue
    for i, (image_base, base_cap) in  tqdm(enumerate(base_dataloader), desc=f"Processing {target_name}", total=len(base_dataloader), position=1, leave=False):
        if base_cap['name'] in saved_image_ids:
                print(f"{base_cap['name']} already processed for {target_cap['name']}, skipping...")
                continue

        print('image name = ', base_cap['name'])
        image_org, image_tgt = image_base.to(device), image_target.to(device)

        start = time.time()

        # # get tgt featutres
        with torch.no_grad():
            tgt_image_features_list=[]
            image_tgt = clip_preprocess(image_tgt)
            for clip_model in models:
                tgt_image_features = clip_model.encode_image(image_tgt)  # [bs, 512]
                tgt_image_features = tgt_image_features / tgt_image_features.norm(dim=1, keepdim=True)
                tgt_image_features_list.append(tgt_image_features)

        adv_image_feature_list = []
        for clip_model in models:
            adv_image_features = clip_model.encode_image(clip_preprocess(image_base).to(device))
            adv_image_features = adv_image_features / adv_image_features.norm(dim=1, keepdim=True)
            adv_image_feature_list.append(adv_image_features)
        crit_list = []
        for pred_i, target_i in zip(adv_image_feature_list, tgt_image_features_list):
            crit =  torch.mean(torch.sum(pred_i * target_i, dim=1))  # 有目标攻击
            crit_list.append(crit.data.detach().cpu().numpy())
        print("base loss (before generation):", crit_list)
        
        with torch.no_grad():
            with model.ema_scope():  # 指数平滑
                uc = model.get_learned_conditioning(
                    {model.cond_stage_key: torch.tensor(n_samples_per_class*[1000]).to(model.device)}
                    ) # 类别为0-999，因此无条件用1000表示
                # label_id = path[0].split('/')[-2]
                # label_id = name_key[label_id] # 用于映射
                # class_label = labeles[label_id]
                # print("class_label:", class_label)
                # for class_label in classes:
                all_samples = list()
                all_labels = list() 
                # print(f"rendering {n_samples_per_class} examples of class '{class_label}' in {ddim_steps} steps and using s={scale:.2f}.")
                xc = torch.tensor(n_samples_per_class*[0]) #### IDK WHAT THIS DOES BE WARNED AND PROBABLY EDIT LATER AHHHHHH
                c = model.get_learned_conditioning({model.cond_stage_key: xc.to(model.device)})
                encoder_posterior = model.encode_first_stage(image_org)
                z = model.get_first_stage_encoding(encoder_posterior).detach()
                cam = cv2.imread(os.path.join(cam_root, base_cap['name']+'_mask.png'), 0) / 255.
                cam = cv2.resize(cam, (128, 128)) # CHANGED TO 128,128 FROM 64,64. WORKS NOT SURE WHY. NOT SURE HOW IT TRANSFORMS IT
                cam = torch.tensor(cam).float()

                # print("size of mask:", cam.size())
                # print("size of tgt image feats:", tgt_image_features_list[0].size())
                
                samples_ddim, _ = sampler.sample(S=ddim_steps,
                                                conditioning=c,
                                                # conditioning=None,
                                                x_T=z,
                                                batch_size=n_samples_per_class,
                                                shape=[3, 128, 128],
                                                verbose=False,
                                                unconditional_guidance_scale=scale,
                                                unconditional_conditioning=uc, 
                                                eta=ddim_eta,
                                                label=xc.to(model.device),
                                                # label=None,
                                                tgt_image_features_list=tgt_image_features_list,
                                                org_image_features_list=None,
                                                cam=cam,
                                                K=1,s=30,a=5)
                
                for _ in range(1):
                    samples_ddim, _ = sampler.sample(S=ddim_steps,
                                                conditioning=c,
                                                x_T=samples_ddim,
                                                batch_size=n_samples_per_class,
                                                shape=[3, 128, 128],
                                                verbose=False,
                                                unconditional_guidance_scale=scale,
                                                unconditional_conditioning=uc, 
                                                eta=ddim_eta,
                                                label=xc.to(model.device),
                                                tgt_image_features_list=tgt_image_features_list,
                                                org_image_features_list=None,
                                                cam=cam,
                                                K=1,s=30,a=5)
                
                x_samples_ddim = model.decode_first_stage(samples_ddim)
                x_samples_ddim = torch.clamp((x_samples_ddim+1.0)/2.0, 
                                            min=0.0, max=1.0)


        img_transformed = clip_preprocess(x_samples_ddim).to(device) # image transformation to model input
        img_transformed_list.append(x_samples_ddim)
        adv_image_feature_list = []
        for clip_model in models:
            adv_image_features = clip_model.encode_image(img_transformed)
            adv_image_features = adv_image_features / adv_image_features.norm(dim=1, keepdim=True)
            adv_image_feature_list.append(adv_image_features)
        crit_list = []
        for pred_i, target_i in zip(adv_image_feature_list, tgt_image_features_list):
            crit =  torch.mean(torch.sum(pred_i * target_i, dim=1))  # 有目标攻击
            crit_list.append(crit.data.detach().cpu().numpy())
        print("loss:", crit_list)

        ###### Save poisoned images after each batch ######
        img_pth = os.path.join(target_path, f"{base_cap['name']}.png")
        # image_to_save = X_adv / 255.0
        # print()
        if base_cap['name'] not in saved_image_ids:  # Only save if it doesn't already exist
            # print("Max Pixel Difference between Adversarial Image and Base *255:", torch.max(torch.abs(X_adv-image_base)).item())
            save_image(x_samples_ddim, img_pth)
            print(f"Saved poisoned image {base_cap['name']} to {img_pth}") # mathvista

        end = time.time()
        print("Time:", end-start)
        
        # for path_idx in range(len(path)):
        #     folder, name = path[path_idx].split("/")[-2], path[path_idx].split("/")[-1]
        #     # folder_to_save = os.path.join('../_output_img', , folder)
        #     folder_to_save = "/data/gq/result/adversarial_attack/dda/result/second_paper/samples"
        #     if not os.path.exists(folder_to_save):
        #         os.makedirs(folder_to_save, exist_ok=True)
        #     if 'JPEG' in name:
        #         torchvision.utils.save_image(final_preprocess(x_samples_ddim)[path_idx], os.path.join(folder_to_save, name[:-4]) + 'png')
        #     elif 'png' in name:
        #         torchvision.utils.save_image(final_preprocess(x_samples_ddim)[path_idx], os.path.join(folder_to_save, name))

Loading model from models/ldm/cin256-v2/model.ckpt


  from torch.distributed._sharded_tensor import pre_load_state_dict_hook, state_dict_hook


LatentDiffusion: Running in eps-prediction mode
DiffusionWrapper has 400.92 M params.
making attention of type 'vanilla' with 512 in_channels
Working with z of shape (1, 3, 64, 64) = 12288 dimensions.
making attention of type 'vanilla' with 512 in_channels


Target Dataloader: 1it [00:05,  5.48s/it]

apple entirely complete


Target Dataloader: 2it [00:10,  5.41s/it]

bar entirely complete


Target Dataloader: 3it [00:16,  5.40s/it]

docs entirely complete


Target Dataloader: 4it [00:21,  5.23s/it]

tbl entirely complete


Target Dataloader: 5it [00:26,  5.30s/it]

synth entirely complete


Target Dataloader: 6it [00:31,  5.31s/it]

nat entirely complete


Target Dataloader: 7it [00:37,  5.34s/it]

pie entirely complete


Target Dataloader: 8it [00:42,  5.34s/it]

sci entirely complete


Target Dataloader: 9it [00:48,  5.38s/it]

vlin entirely complete


Target Dataloader: 10it [00:53,  5.37s/it]

rdar entirely complete


Target Dataloader: 11it [00:58,  5.36s/it]

pzle entirely complete


Target Dataloader: 12it [01:03,  5.28s/it]

med entirely complete


Target Dataloader: 13it [01:09,  5.33s/it]

sctr entirely complete


Target Dataloader: 14it [01:14,  5.36s/it]

func entirely complete


Target Dataloader: 15it [01:20,  5.36s/it]

line entirely complete


Target Dataloader: 16it [01:25,  5.36s/it]

map entirely complete


Target Dataloader: 17it [01:30,  5.39s/it]

abst entirely complete


Target Dataloader: 18it [01:36,  5.40s/it]

heat entirely complete


Target Dataloader: 19it [01:41,  5.39s/it]

word entirely complete




4 already processed for geo, skipping...
5 already processed for geo, skipping...
6 already processed for geo, skipping...
7 already processed for geo, skipping...
8 already processed for geo, skipping...
10 already processed for geo, skipping...
11 already processed for geo, skipping...
16 already processed for geo, skipping...
20 already processed for geo, skipping...
23 already processed for geo, skipping...




26 already processed for geo, skipping...
27 already processed for geo, skipping...
28 already processed for geo, skipping...
29 already processed for geo, skipping...
32 already processed for geo, skipping...
33 already processed for geo, skipping...
34 already processed for geo, skipping...
35 already processed for geo, skipping...
38 already processed for geo, skipping...
39 already processed for geo, skipping...
41 already processed for geo, skipping...
43 already processed for geo, skipping...




44 already processed for geo, skipping...
45 already processed for geo, skipping...
46 already processed for geo, skipping...
49 already processed for geo, skipping...
50 already processed for geo, skipping...
52 already processed for geo, skipping...
53 already processed for geo, skipping...
55 already processed for geo, skipping...




58 already processed for geo, skipping...
59 already processed for geo, skipping...
60 already processed for geo, skipping...
61 already processed for geo, skipping...
62 already processed for geo, skipping...
63 already processed for geo, skipping...
64 already processed for geo, skipping...




65 already processed for geo, skipping...
66 already processed for geo, skipping...
71 already processed for geo, skipping...
75 already processed for geo, skipping...
78 already processed for geo, skipping...
79 already processed for geo, skipping...
80 already processed for geo, skipping...




86 already processed for geo, skipping...
89 already processed for geo, skipping...
90 already processed for geo, skipping...
91 already processed for geo, skipping...
92 already processed for geo, skipping...
93 already processed for geo, skipping...
95 already processed for geo, skipping...
99 already processed for geo, skipping...
100 already processed for geo, skipping...
104 already processed for geo, skipping...




105 already processed for geo, skipping...
107 already processed for geo, skipping...
110 already processed for geo, skipping...
114 already processed for geo, skipping...
115 already processed for geo, skipping...
117 already processed for geo, skipping...
118 already processed for geo, skipping...
120 already processed for geo, skipping...
123 already processed for geo, skipping...
124 already processed for geo, skipping...
125 already processed for geo, skipping...
126 already processed for geo, skipping...
130 already processed for geo, skipping...




131 already processed for geo, skipping...
133 already processed for geo, skipping...
149 already processed for geo, skipping...
152 already processed for geo, skipping...
157 already processed for geo, skipping...
159 already processed for geo, skipping...
162 already processed for geo, skipping...
164 already processed for geo, skipping...
167 already processed for geo, skipping...
168 already processed for geo, skipping...
173 already processed for geo, skipping...
175 already processed for geo, skipping...
176 already processed for geo, skipping...
180 already processed for geo, skipping...




181 already processed for geo, skipping...
183 already processed for geo, skipping...
187 already processed for geo, skipping...
190 already processed for geo, skipping...
193 already processed for geo, skipping...
195 already processed for geo, skipping...
201 already processed for geo, skipping...
203 already processed for geo, skipping...
206 already processed for geo, skipping...
210 already processed for geo, skipping...
211 already processed for geo, skipping...
213 already processed for geo, skipping...
214 already processed for geo, skipping...
215 already processed for geo, skipping...
216 already processed for geo, skipping...
217 already processed for geo, skipping...
218 already processed for geo, skipping...
219 already processed for geo, skipping...
222 already processed for geo, skipping...
223 already processed for geo, skipping...
224 already processed for geo, skipping...




230 already processed for geo, skipping...
231 already processed for geo, skipping...
233 already processed for geo, skipping...
234 already processed for geo, skipping...
242 already processed for geo, skipping...
246 already processed for geo, skipping...
250 already processed for geo, skipping...
253 already processed for geo, skipping...
254 already processed for geo, skipping...
255 already processed for geo, skipping...
259 already processed for geo, skipping...
261 already processed for geo, skipping...
263 already processed for geo, skipping...
269 already processed for geo, skipping...
270 already processed for geo, skipping...
272 already processed for geo, skipping...




273 already processed for geo, skipping...
277 already processed for geo, skipping...
279 already processed for geo, skipping...
284 already processed for geo, skipping...
285 already processed for geo, skipping...
286 already processed for geo, skipping...
290 already processed for geo, skipping...
291 already processed for geo, skipping...
292 already processed for geo, skipping...
293 already processed for geo, skipping...
295 already processed for geo, skipping...
296 already processed for geo, skipping...
297 already processed for geo, skipping...
300 already processed for geo, skipping...
307 already processed for geo, skipping...
318 already processed for geo, skipping...
319 already processed for geo, skipping...




325 already processed for geo, skipping...
330 already processed for geo, skipping...
333 already processed for geo, skipping...
336 already processed for geo, skipping...
351 already processed for geo, skipping...
354 already processed for geo, skipping...
357 already processed for geo, skipping...
364 already processed for geo, skipping...
366 already processed for geo, skipping...
373 already processed for geo, skipping...
386 already processed for geo, skipping...
390 already processed for geo, skipping...
403 already processed for geo, skipping...
414 already processed for geo, skipping...
415 already processed for geo, skipping...




416 already processed for geo, skipping...
417 already processed for geo, skipping...
418 already processed for geo, skipping...
423 already processed for geo, skipping...
427 already processed for geo, skipping...
437 already processed for geo, skipping...
439 already processed for geo, skipping...
441 already processed for geo, skipping...
442 already processed for geo, skipping...
455 already processed for geo, skipping...
463 already processed for geo, skipping...
467 already processed for geo, skipping...
472 already processed for geo, skipping...
473 already processed for geo, skipping...




474 already processed for geo, skipping...
514 already processed for geo, skipping...
521 already processed for geo, skipping...
522 already processed for geo, skipping...
524 already processed for geo, skipping...
525 already processed for geo, skipping...
526 already processed for geo, skipping...
534 already processed for geo, skipping...
537 already processed for geo, skipping...
544 already processed for geo, skipping...
545 already processed for geo, skipping...




549 already processed for geo, skipping...
550 already processed for geo, skipping...
556 already processed for geo, skipping...
559 already processed for geo, skipping...
568 already processed for geo, skipping...
589 already processed for geo, skipping...
608 already processed for geo, skipping...
621 already processed for geo, skipping...
628 already processed for geo, skipping...
641 already processed for geo, skipping...




648 already processed for geo, skipping...
654 already processed for geo, skipping...
662 already processed for geo, skipping...
675 already processed for geo, skipping...
701 already processed for geo, skipping...
706 already processed for geo, skipping...
730 already processed for geo, skipping...
739 already processed for geo, skipping...
742 already processed for geo, skipping...
748 already processed for geo, skipping...
761 already processed for geo, skipping...
764 already processed for geo, skipping...
766 already processed for geo, skipping...
767 already processed for geo, skipping...
771 already processed for geo, skipping...
773 already processed for geo, skipping...




785 already processed for geo, skipping...
811 already processed for geo, skipping...
812 already processed for geo, skipping...
813 already processed for geo, skipping...
819 already processed for geo, skipping...
823 already processed for geo, skipping...
850 already processed for geo, skipping...
855 already processed for geo, skipping...
861 already processed for geo, skipping...
870 already processed for geo, skipping...
873 already processed for geo, skipping...




893 already processed for geo, skipping...
913 already processed for geo, skipping...
923 already processed for geo, skipping...
934 already processed for geo, skipping...
946 already processed for geo, skipping...
951 already processed for geo, skipping...
961 already processed for geo, skipping...
965 already processed for geo, skipping...
1008 already processed for geo, skipping...
1011 already processed for geo, skipping...
1039 already processed for geo, skipping...
1050 already processed for geo, skipping...
1060 already processed for geo, skipping...
1064 already processed for geo, skipping...
1101 already processed for geo, skipping...
1113 already processed for geo, skipping...




1137 already processed for geo, skipping...
1167 already processed for geo, skipping...
1168 already processed for geo, skipping...
1174 already processed for geo, skipping...
1203 already processed for geo, skipping...
1211 already processed for geo, skipping...
1215 already processed for geo, skipping...
1222 already processed for geo, skipping...
1226 already processed for geo, skipping...
1246 already processed for geo, skipping...
1250 already processed for geo, skipping...
1255 already processed for geo, skipping...
1284 already processed for geo, skipping...
1301 already processed for geo, skipping...




1335 already processed for geo, skipping...
1336 already processed for geo, skipping...
1343 already processed for geo, skipping...
1355 already processed for geo, skipping...
1388 already processed for geo, skipping...
1389 already processed for geo, skipping...
1394 already processed for geo, skipping...
1399 already processed for geo, skipping...
1414 already processed for geo, skipping...
1426 already processed for geo, skipping...
1451 already processed for geo, skipping...
1512 already processed for geo, skipping...
1546 already processed for geo, skipping...




1547 already processed for geo, skipping...
1559 already processed for geo, skipping...
1564 already processed for geo, skipping...
1634 already processed for geo, skipping...
1668 already processed for geo, skipping...
1680 already processed for geo, skipping...
1696 already processed for geo, skipping...
1709 already processed for geo, skipping...
1727 already processed for geo, skipping...
1781 already processed for geo, skipping...




1787 already processed for geo, skipping...
1813 already processed for geo, skipping...
1814 already processed for geo, skipping...
1831 already processed for geo, skipping...
1861 already processed for geo, skipping...
1864 already processed for geo, skipping...
1889 already processed for geo, skipping...
1893 already processed for geo, skipping...
1915 already processed for geo, skipping...
1920 already processed for geo, skipping...
1969 already processed for geo, skipping...
2280 already processed for geo, skipping...
image name =  2288
base loss (before generation): [array(0.6943, dtype=float16), array(0.85, dtype=float16), array(0.7812, dtype=float16), array(0.7354, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.58it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.59it/s]


loss: [array(0.816, dtype=float16), array(0.8696, dtype=float16), array(0.841, dtype=float16), array(0.8125, dtype=float16)]
Saved poisoned image 2288 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2288.png
Time: 47.83651375770569
image name =  2293
base loss (before generation): [array(0.747, dtype=float16), array(0.8833, dtype=float16), array(0.833, dtype=float16), array(0.845, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.54it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.54it/s]


loss: [array(0.89, dtype=float16), array(0.9224, dtype=float16), array(0.909, dtype=float16), array(0.912, dtype=float16)]
Saved poisoned image 2293 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2293.png
Time: 47.581368923187256
image name =  2316
base loss (before generation): [array(0.761, dtype=float16), array(0.8936, dtype=float16), array(0.81, dtype=float16), array(0.8247, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.52it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.58it/s]


loss: [array(0.8574, dtype=float16), array(0.927, dtype=float16), array(0.858, dtype=float16), array(0.8774, dtype=float16)]
Saved poisoned image 2316 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2316.png
Time: 47.52842617034912
image name =  2398
base loss (before generation): [array(0.7925, dtype=float16), array(0.877, dtype=float16), array(0.777, dtype=float16), array(0.7803, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.61it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.52it/s]


loss: [array(0.8604, dtype=float16), array(0.909, dtype=float16), array(0.8237, dtype=float16), array(0.863, dtype=float16)]
Saved poisoned image 2398 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2398.png
Time: 47.4705970287323
image name =  2443
base loss (before generation): [array(0.955, dtype=float16), array(0.969, dtype=float16), array(0.933, dtype=float16), array(0.919, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.64it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.49it/s]


loss: [array(0.9688, dtype=float16), array(0.9717, dtype=float16), array(0.9326, dtype=float16), array(0.9243, dtype=float16)]
Saved poisoned image 2443 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2443.png
Time: 47.4814076423645
image name =  2474
base loss (before generation): [array(0.805, dtype=float16), array(0.9, dtype=float16), array(0.864, dtype=float16), array(0.847, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.68it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.48it/s]


loss: [array(0.8735, dtype=float16), array(0.9263, dtype=float16), array(0.9263, dtype=float16), array(0.9233, dtype=float16)]
Saved poisoned image 2474 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2474.png
Time: 47.40573000907898
image name =  2513
base loss (before generation): [array(0.557, dtype=float16), array(0.687, dtype=float16), array(0.6235, dtype=float16), array(0.6133, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


loss: [array(0.7617, dtype=float16), array(0.834, dtype=float16), array(0.825, dtype=float16), array(0.797, dtype=float16)]
Saved poisoned image 2513 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2513.png
Time: 47.85052943229675
image name =  2555
base loss (before generation): [array(0.6826, dtype=float16), array(0.7915, dtype=float16), array(0.6772, dtype=float16), array(0.64, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.55it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.49it/s]


loss: [array(0.7896, dtype=float16), array(0.8447, dtype=float16), array(0.8228, dtype=float16), array(0.814, dtype=float16)]
Saved poisoned image 2555 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2555.png
Time: 47.69851636886597
image name =  2565
base loss (before generation): [array(0.6284, dtype=float16), array(0.723, dtype=float16), array(0.608, dtype=float16), array(0.585, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.58it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


loss: [array(0.892, dtype=float16), array(0.9062, dtype=float16), array(0.8896, dtype=float16), array(0.8516, dtype=float16)]
Saved poisoned image 2565 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2565.png
Time: 47.60448431968689
image name =  2588
base loss (before generation): [array(0.439, dtype=float16), array(0.6533, dtype=float16), array(0.4744, dtype=float16), array(0.5522, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.62it/s]


loss: [array(0.7944, dtype=float16), array(0.869, dtype=float16), array(0.7993, dtype=float16), array(0.8354, dtype=float16)]
Saved poisoned image 2588 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2588.png
Time: 47.48912763595581
image name =  2589
base loss (before generation): [array(0.778, dtype=float16), array(0.8477, dtype=float16), array(0.8125, dtype=float16), array(0.802, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:22<00:00,  8.73it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.63it/s]


loss: [array(0.8647, dtype=float16), array(0.8853, dtype=float16), array(0.8477, dtype=float16), array(0.8647, dtype=float16)]
Saved poisoned image 2589 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2589.png
Time: 46.84327220916748
image name =  2634
base loss (before generation): [array(0.5693, dtype=float16), array(0.706, dtype=float16), array(0.6216, dtype=float16), array(0.568, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.53it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


loss: [array(0.8164, dtype=float16), array(0.8833, dtype=float16), array(0.8687, dtype=float16), array(0.8423, dtype=float16)]
Saved poisoned image 2634 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2634.png
Time: 47.77852749824524
image name =  2636
base loss (before generation): [array(0.5054, dtype=float16), array(0.6357, dtype=float16), array(0.4592, dtype=float16), array(0.4731, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.56it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.48it/s]


loss: [array(0.792, dtype=float16), array(0.848, dtype=float16), array(0.8477, dtype=float16), array(0.8247, dtype=float16)]
Saved poisoned image 2636 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2636.png
Time: 47.724663496017456
image name =  2648
base loss (before generation): [array(0.9033, dtype=float16), array(0.895, dtype=float16), array(0.869, dtype=float16), array(0.8066, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.62it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.48it/s]


loss: [array(0.9165, dtype=float16), array(0.9014, dtype=float16), array(0.8716, dtype=float16), array(0.862, dtype=float16)]
Saved poisoned image 2648 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2648.png
Time: 47.550102949142456
image name =  2659
base loss (before generation): [array(0.7563, dtype=float16), array(0.782, dtype=float16), array(0.7075, dtype=float16), array(0.698, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.51it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.65it/s]


loss: [array(0.8525, dtype=float16), array(0.904, dtype=float16), array(0.8716, dtype=float16), array(0.879, dtype=float16)]
Saved poisoned image 2659 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2659.png
Time: 47.354485750198364
image name =  2665
base loss (before generation): [array(0.6587, dtype=float16), array(0.7954, dtype=float16), array(0.6553, dtype=float16), array(0.7134, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.48it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.58it/s]


loss: [array(0.84, dtype=float16), array(0.8555, dtype=float16), array(0.8057, dtype=float16), array(0.8276, dtype=float16)]
Saved poisoned image 2665 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2665.png
Time: 47.655097246170044
image name =  2673
base loss (before generation): [array(0.68, dtype=float16), array(0.7896, dtype=float16), array(0.712, dtype=float16), array(0.7456, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.57it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.56it/s]


loss: [array(0.738, dtype=float16), array(0.8643, dtype=float16), array(0.794, dtype=float16), array(0.8174, dtype=float16)]
Saved poisoned image 2673 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2673.png
Time: 47.45662569999695
image name =  2688
base loss (before generation): [array(0.6143, dtype=float16), array(0.6836, dtype=float16), array(0.581, dtype=float16), array(0.4844, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.57it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.51it/s]


loss: [array(0.8257, dtype=float16), array(0.872, dtype=float16), array(0.847, dtype=float16), array(0.8433, dtype=float16)]
Saved poisoned image 2688 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2688.png
Time: 47.627983808517456
image name =  2741
base loss (before generation): [array(0.5083, dtype=float16), array(0.637, dtype=float16), array(0.4434, dtype=float16), array(0.528, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.66it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.62it/s]


loss: [array(0.8413, dtype=float16), array(0.919, dtype=float16), array(0.888, dtype=float16), array(0.876, dtype=float16)]
Saved poisoned image 2741 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2741.png
Time: 47.06367492675781
image name =  2743
base loss (before generation): [array(0.7207, dtype=float16), array(0.846, dtype=float16), array(0.8115, dtype=float16), array(0.8125, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.61it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:22<00:00,  8.74it/s]


loss: [array(0.831, dtype=float16), array(0.906, dtype=float16), array(0.885, dtype=float16), array(0.896, dtype=float16)]
Saved poisoned image 2743 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2743.png
Time: 46.87690544128418
image name =  2757
base loss (before generation): [array(0.7305, dtype=float16), array(0.78, dtype=float16), array(0.7065, dtype=float16), array(0.6626, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.61it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.49it/s]


loss: [array(0.897, dtype=float16), array(0.9043, dtype=float16), array(0.901, dtype=float16), array(0.886, dtype=float16)]
Saved poisoned image 2757 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2757.png
Time: 47.54756307601929
image name =  2773
base loss (before generation): [array(0.736, dtype=float16), array(0.8354, dtype=float16), array(0.7954, dtype=float16), array(0.7495, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.53it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.57it/s]


loss: [array(0.853, dtype=float16), array(0.899, dtype=float16), array(0.8535, dtype=float16), array(0.8604, dtype=float16)]
Saved poisoned image 2773 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2773.png
Time: 47.553611278533936
image name =  2774
base loss (before generation): [array(0.601, dtype=float16), array(0.728, dtype=float16), array(0.7085, dtype=float16), array(0.646, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.56it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:22<00:00,  8.71it/s]


loss: [array(0.8516, dtype=float16), array(0.901, dtype=float16), array(0.829, dtype=float16), array(0.8604, dtype=float16)]
Saved poisoned image 2774 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2774.png
Time: 47.18687033653259
image name =  2786
base loss (before generation): [array(0.7134, dtype=float16), array(0.79, dtype=float16), array(0.722, dtype=float16), array(0.662, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.49it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.55it/s]


loss: [array(0.8545, dtype=float16), array(0.8857, dtype=float16), array(0.843, dtype=float16), array(0.824, dtype=float16)]
Saved poisoned image 2786 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2786.png
Time: 47.855570793151855
image name =  2955
base loss (before generation): [array(0.79, dtype=float16), array(0.856, dtype=float16), array(0.8413, dtype=float16), array(0.8, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:22<00:00,  8.72it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.51it/s]


loss: [array(0.849, dtype=float16), array(0.892, dtype=float16), array(0.878, dtype=float16), array(0.8545, dtype=float16)]
Saved poisoned image 2955 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2955.png
Time: 47.21005392074585
image name =  2984
base loss (before generation): [array(0.7617, dtype=float16), array(0.86, dtype=float16), array(0.817, dtype=float16), array(0.813, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.53it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.52it/s]


loss: [array(0.9004, dtype=float16), array(0.9277, dtype=float16), array(0.8945, dtype=float16), array(0.892, dtype=float16)]
Saved poisoned image 2984 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/2984.png
Time: 47.68133783340454
image name =  3033
base loss (before generation): [array(0.793, dtype=float16), array(0.8975, dtype=float16), array(0.8477, dtype=float16), array(0.8594, dtype=float16)]
Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.50it/s]


Data shape for DDIM sampling is (1, 3, 128, 128), eta 0.05
Running Adversarial Sampling at 0 step
Running DDIM Sampling with 200 timesteps



[A
[A
[A
[A
DDIM Sampler: 100%|██████████| 200/200 [00:23<00:00,  8.64it/s]
Target Dataloader: 20it [23:15, 69.77s/it] 

loss: [array(0.8726, dtype=float16), array(0.9395, dtype=float16), array(0.906, dtype=float16), array(0.9214, dtype=float16)]
Saved poisoned image 3033 to ./data/poisons/mini_MathVision_grid+i2i_AdvDiffVLMAttack/geo/3033.png
Time: 47.43624424934387





In [18]:
raise Exception("Here to stop execution for this section")

Exception: Here to stop execution for this section

# Mathvista Evaluation Pipeline

### Generate Response

In [None]:
attacking_model="internvl"
captioning_model = "gpt-3.5-turbo"
method = "i2i_EnsembleAttack"

In [None]:
import os
import io
import time
import argparse

from tqdm import tqdm

import sys
from poison_utils import *

from eval_models import internlm, internvl
from eval_models import gpt

from build_query import create_query_data



In [None]:


def verify_response(response):
    if isinstance(response, str):
        response = response.strip() 
    if response == "" or response == None:
        return False
    if "Response Error" in response:
        return False
    return True


def evaluate_code(code_string):
    # execute_code_and_capture_output
    # Backup the original stdout
    old_stdout = sys.stdout
    
    # Redirect stdout to capture the output
    new_stdout = io.StringIO()
    sys.stdout = new_stdout
    
    # Try executing the code and capture any exception
    error = None
    try:
        exec(code_string)
    except Exception as e:
        error = e
    
    # Restore the original stdout
    sys.stdout = old_stdout
    
    # Get the captured output
    captured_output = new_stdout.getvalue()
    if isinstance(captured_output, str):
        captured_output = captured_output.strip()
    
    # Return the captured output or error
    return captured_output, error


: 

In [None]:
task_name = "mini_MathVista_grid"
class Args:
    def __init__(self, task_name, attacking_model, captioning_model):
        # Input
        self.poison_data_dir = f'data/poisons/{task_name}+{method}'
        self.input_file = 'questions.json'
        self.task_data_pth = f'data/{task_name}'

        # Output
        self.output_dir = f'results/{attacking_model}'
        self.output_file = f'output_{attacking_model}.json'

        # Model
        self.model = attacking_model
        self.key = ""

        # Query
        self.query_file = None
        # self.caption_file = '../data/texts/captions_bard.json'
        # self.ocr_file = '../data/texts/ocrs_easyocr.json'
        self.shot_type = 'solution'
        self.shot_num = 0
        self.use_caption = False
        self.use_ocr = False

        # Other settings
        self.rerun = False
        self.debug = False

    def __repr__(self):
        return f"Args({self.__dict__})"

args = Args(task_name, attacking_model, captioning_model)

In [None]:

    # load data
    input_file = os.path.join(args.task_data_pth, args.input_file)
    print(f"Reading {input_file}...")
    data = read_json(input_file)
    # load or create query data
    if args.query_file:
        query_file = os.path.join(args.task_data_pth, args.query_file)
        if os.path.exists(query_file):
            print(f"Loading existing {query_file}...")
            query_data = read_json(query_file)
    else:
        print("\nCreating new query...")
        # load caption
        caption_data = {}
        if args.use_caption:
            caption_file = args.caption_file
            if os.path.exists(caption_file):
                print(f"Reading {caption_file}...")
                try:
                    caption_data = read_json(caption_file)["texts"]
                    print("Caption data loaded.")
                except:
                    print("Caption data not found!! Please Check.")                    
        # load ocr
        ocr_data = {}
        if args.use_ocr:
            ocr_file = args.ocr_file
            if os.path.exists(ocr_file):
                print(f"Reading {ocr_file}...")
                try:
                    ocr_data = read_json(ocr_file)["texts"]
                    print("OCR data loaded.")
                except:
                    print("OCR data not found!! Please Check.")
        # create query
        query_data = create_query_data(data, caption_data, ocr_data, args)

    # output file
    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(args.output_dir, args.output_file)
    
    # load results
    if os.path.exists(output_file):
        print("\nResults already exist.")
        print(f"Reading {output_file}...")
        results = read_json(output_file)
    else:
        results = {}

    # load model
    print(f"\nLoading {args.model}...")
    if args.model == 'bard':
        if args.key == '':
            print("Loading key from environment variable")
            key = os.environ['_BARD_API_KEY']
        else:
            key = args.key
        model = bard.Bard_Model(key)
    
    elif "gpt" in args.model:
        if args.key == '':
            print("Loading token from environment variable")
            key = os.getenv("OPENAI_API_KEY")
        else:
            key = args.key
        model = gpt.GPT_Model(args.model, key)
    
    elif "claude" in args.model:
        if args.key == '':
            print("Loading token from environment variable")
            key = os.environ.get("ANTHROPIC_API_KEY")
        else:
            key = args.key
        model = claude.Claude_Model(args.model, key)
    elif "internlm" in args.model:
        model = internlm.InternLM_Model()
    elif "llava_one_v" in args.model:
        model = llava_one_v.Llava_One_V()
    elif "internvl" in args.model:
        model = internvl.InternVL_Model()
            
    
    print(f"Model loaded.")
    
    # build final test pid list
    test_pids = list(data.keys())
    print("\nNumber of test problems in total:", len(test_pids))

    available_directories = [d for d in os.listdir(args.poison_data_dir) if os.path.isdir(os.path.join(args.poison_data_dir, d))]
    target_names = read_json(os.path.join(args.task_data_pth, "target/caps.json"))
    if not all([name in available_directories for name in target_names]):
        print("Not all targets have directories. Working with:", available_directories)
    target_names = [item["name"] for item in target_names["annotations"] if item["name"] in available_directories]

    skip_pids = []
    if not args.rerun:
        print("\nRemoving problems with existing valid response...")
        for i, name in enumerate(target_names):
            skip_pids.append([])
            for pid in test_pids:
                # print(f"Checking {pid}...")
                if pid in results and 'response' in results[pid]:
                    response = results[pid][name]['response']
                    if verify_response(response):
                        # print(f"Valid response found for {pid}.")
                        skip_pids[i].append(pid)
    else:
        print("\nRerun answer extraction for all problems...")

    test_pids = [[pid for pid in test_pids if pid not in target_skip_pids] for target_skip_pids in skip_pids]
    print("Number of test problems to run for each target:", {target_names[i]: len(target_pids) for i, target_pids in enumerate(test_pids)})
    # print(test_pids)

    

    # tqdm, enumerate results
    for i, target_name in enumerate(target_names):
        for _, pid in enumerate(tqdm(test_pids[i])):
            problem = data[pid]
            query = query_data[pid]
            image = problem['image']
            image_path = os.path.join(args.poison_data_dir, target_name, f'{int(image.split(".")[0])+1}.png')

            if args.debug:
                print("--------------------------------------------------------------")
            print(f"\nGenerating response for {pid}...")
            try:
                response = model.get_response(image_path, query)
                new_caption = model.get_response(image_path, "describe what is in this image")
                # print(f"Response: {response}")
                if pid not in results:
                    results[pid] = problem
                if "targets" not in results[pid]:
                    results[pid]["targets"] = {}
                if target_name not in results[pid]["targets"]:
                    results[pid]["targets"][target_name] = {}

                results[pid]["targets"][target_name]['query'] = query
                results[pid]["targets"][target_name]['model_description'] = new_caption
                if args.shot_type == 'solution':
                    results[pid]["targets"][target_name]['response'] = response
                else:
                    output, error = evaluate_code(response)
                    results[pid]["targets"][target_name]['response'] = response
                    results[pid]["targets"][target_name]['execution'] = output
                    results[pid]["targets"][target_name]['error'] = str(error)
                if args.debug:
                    print(f"\n#Query: \n{query}")
                    print(f"\n#Response: \n{response}")
            except Exception as e:
                print(e)
                print(f"Error in extracting answer for {pid}")
                results[pid][target_name]['error'] = e
        
            try:
                print(f"Saving results to {output_file}...")
                save_json(results, output_file)
                print(f"Results saved.")
            except Exception as e:
                print(e)
                print(f"Error in saving {output_file}")

Reading data/mini_MathVista_grid/questions.json...

Creating new query...

Results already exist.
Reading results/internvl/output_internvl.json...

Loading internvl...
FlashAttention2 is not installed.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Model loaded.

Number of test problems in total: 100
Not all targets have directories. Working with: ['bar', 'docs', 'tbl', 'synth', 'nat', 'pie', 'sci', 'vlin', 'rdar', 'pzle', 'med', 'sctr', 'func', 'line', 'map', 'abst', 'heat']

Removing problems with existing valid response...
Number of test problems to run for each target: {'bar': 100, 'docs': 100, 'tbl': 100, 'synth': 100, 'nat': 100, 'pie': 100, 'sci': 100, 'vlin': 100, 'rdar': 100, 'pzle': 100, 'med': 100, 'sctr': 100, 'func': 100, 'line': 100, 'map': 100, 'abst': 100, 'heat': 100}


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:20<34:07, 20.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:22<15:53,  9.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:45<25:39, 15.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:47<16:37, 10.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [01:02<18:48, 11.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:10<16:48, 10.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:13<12:43,  8.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:20<11:52,  7.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:30<12:46,  8.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:37<11:43,  7.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:39<08:56,  6.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:42<07:40,  5.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [01:50<08:51,  6.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [01:59<09:48,  6.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:07<10:12,  7.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:12<09:15,  6.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:22<10:33,  7.64s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:26<09:02,  6.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:29<07:28,  5.54s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:35<07:28,  5.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:36<05:38,  4.29s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:39<05:10,  3.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [02:54<09:18,  7.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [02:59<08:11,  6.47s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:06<08:20,  6.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:11<07:43,  6.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:14<06:06,  5.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:24<08:06,  6.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:28<06:47,  5.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [03:41<09:18,  7.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [03:47<08:35,  7.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [03:59<10:02,  8.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:11<10:45,  9.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [04:30<13:55, 12.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:00<19:09, 17.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:02<13:47, 12.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:17<14:11, 13.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:19<10:40, 10.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:33<11:24, 11.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:34<08:21,  8.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [05:37<06:24,  6.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [05:39<05:02,  5.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [05:52<07:04,  7.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:00<07:19,  7.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:08<07:10,  7.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:13<06:16,  6.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:20<06:07,  6.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:35<08:00,  9.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:38<06:19,  7.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [06:44<05:55,  7.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [06:49<05:21,  6.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [06:52<04:16,  5.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [06:55<03:33,  4.54s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:04<04:34,  5.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:19<06:36,  8.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:25<05:47,  7.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:28<04:30,  6.29s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [07:34<04:27,  6.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [07:47<05:35,  8.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [07:51<04:37,  6.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [07:57<04:26,  6.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:14<06:17,  9.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:21<05:33,  9.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [08:34<06:04, 10.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [08:48<06:32, 11.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [08:52<05:15,  9.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [08:56<04:05,  7.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:07<04:35,  8.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:10<03:31,  6.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:13<02:51,  5.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [09:18<02:41,  5.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [09:23<02:35,  5.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [09:56<06:10, 13.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [10:02<04:57, 11.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [10:09<04:06,  9.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:16<03:42,  9.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:22<03:10,  8.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:29<02:50,  7.75s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [10:48<03:55, 11.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [10:56<03:25, 10.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [11:07<03:15, 10.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [11:20<03:19, 11.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:27<02:52, 10.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:45<03:16, 12.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [11:52<02:43, 10.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [12:07<02:46, 11.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [12:13<02:14, 10.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:25<02:08, 10.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:28<01:33,  8.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:31<01:08,  6.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:35<00:54,  6.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:40<00:43,  5.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [12:48<00:44,  6.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [12:59<00:46,  7.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [13:17<00:54, 10.99s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:22<00:36,  9.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:26<00:22,  7.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:30<00:12,  6.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:33<00:05,  5.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [13:40<00:00,  8.21s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:23<38:18, 23.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:25<17:59, 11.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:47<25:33, 15.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:49<16:36, 10.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [01:02<18:18, 11.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:12<16:55, 10.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:14<12:34,  8.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:22<12:01,  7.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:33<13:28,  8.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:41<13:08,  8.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:44<10:08,  6.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:47<08:30,  5.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [01:51<07:36,  5.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [01:59<08:49,  6.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:09<10:04,  7.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:15<09:32,  6.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:25<10:52,  7.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:29<09:14,  6.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:32<07:20,  5.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:40<08:30,  6.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:41<06:19,  4.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:45<05:35,  4.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [02:55<07:43,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [02:59<07:03,  5.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:10<09:08,  7.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:17<08:39,  7.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:19<06:46,  5.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:33<09:37,  8.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:37<08:06,  6.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [03:51<10:32,  9.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [03:56<08:58,  7.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:09<10:37,  9.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:18<10:25,  9.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [04:37<13:22, 12.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [04:57<15:37, 14.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [04:59<11:25, 10.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:12<12:04, 11.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:15<09:11,  8.89s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:29<10:35, 10.41s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:31<07:54,  7.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [05:33<06:08,  6.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [05:35<04:50,  5.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [05:47<06:42,  7.05s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [05:55<06:51,  7.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:04<07:12,  7.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:09<06:21,  7.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:18<06:39,  7.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:28<07:02,  8.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:31<05:46,  6.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [06:40<06:09,  7.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [06:45<05:21,  6.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [06:47<04:17,  5.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [06:50<03:34,  4.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:00<04:40,  6.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:18<07:14,  9.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:24<06:20,  8.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:26<04:51,  6.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [07:34<04:50,  6.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [07:55<07:39, 11.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:01<06:26,  9.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:06<05:28,  8.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:21<06:35, 10.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:32<06:24, 10.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [08:44<06:34, 10.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [08:59<07:10, 12.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:06<06:00, 10.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:09<04:34,  8.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:23<05:18,  9.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:25<04:00,  7.75s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:30<03:22,  6.75s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [09:36<03:05,  6.41s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [09:40<02:45,  5.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [09:51<03:16,  7.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [09:57<03:04,  7.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [10:00<02:23,  5.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:08<02:35,  6.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:13<02:20,  6.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:20<02:18,  6.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [10:33<02:55,  8.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [10:43<02:55,  8.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [10:53<02:54,  9.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [11:09<03:20, 11.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:18<02:57, 10.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:38<03:32, 13.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [11:45<02:50, 11.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [12:01<03:02, 13.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [12:13<02:42, 12.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:29<02:44, 13.70s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:33<01:57, 10.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:39<01:32,  9.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:43<01:08,  7.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:47<00:52,  6.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [12:54<00:46,  6.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [13:08<00:53,  8.95s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [13:23<00:54, 10.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:28<00:36,  9.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:31<00:21,  7.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:34<00:11,  5.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:38<00:05,  5.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [13:41<00:00,  8.21s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:19<32:16, 19.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:22<16:06,  9.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:48<27:33, 17.05s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:50<18:14, 11.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [01:04<19:29, 12.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:14<17:49, 11.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:16<13:05,  8.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:24<12:23,  8.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:38<15:10, 10.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [02:10<25:05, 16.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [02:12<18:03, 12.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [02:14<13:41,  9.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [02:19<11:25,  7.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:27<11:22,  7.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:35<11:05,  7.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:41<10:15,  7.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:52<11:49,  8.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:57<10:10,  7.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:59<07:54,  5.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [03:06<08:04,  6.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [03:07<06:12,  4.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [03:11<05:46,  4.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [03:33<12:33,  9.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:38<10:24,  8.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:45<09:47,  7.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:49<08:28,  6.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:52<06:38,  5.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [04:03<08:45,  7.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [04:07<07:15,  6.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [04:26<11:45, 10.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [04:31<09:53,  8.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:45<11:28, 10.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:54<11:04,  9.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [05:28<18:39, 16.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:54<21:35, 19.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:57<15:37, 14.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [06:09<14:27, 13.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [06:12<11:02, 10.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [06:27<12:08, 11.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [06:33<10:12, 10.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [06:35<07:42,  7.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [06:37<05:55,  6.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [06:50<07:45,  8.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:58<07:35,  8.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [07:08<07:44,  8.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [07:14<06:57,  7.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [07:17<05:32,  6.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [07:26<06:10,  7.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [07:29<05:09,  6.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [07:37<05:24,  6.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [07:42<04:53,  5.99s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [07:44<03:54,  4.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:47<03:21,  4.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:57<04:37,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [08:14<06:57,  9.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [08:19<05:57,  8.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [08:21<04:27,  6.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [08:27<04:21,  6.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [08:45<06:40,  9.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:50<05:29,  8.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:59<05:35,  8.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [09:14<06:39, 10.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [09:21<05:48,  9.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [09:35<06:22, 10.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [09:43<05:46,  9.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:46<04:28,  7.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:50<03:36,  6.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [10:12<06:02, 11.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [10:15<04:30,  8.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [10:18<03:29,  6.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [10:23<03:07,  6.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [10:27<02:45,  5.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [11:01<06:23, 14.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [11:08<05:16, 12.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [11:11<03:52,  9.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [11:19<03:34,  8.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [11:24<02:54,  7.58s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [11:32<02:50,  7.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [11:52<03:59, 11.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [12:00<03:30, 10.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [12:12<03:26, 10.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [12:25<03:25, 11.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [12:32<02:54, 10.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [12:51<03:23, 12.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [12:56<02:36, 10.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [13:12<02:50, 12.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [13:24<02:38, 12.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [13:37<02:29, 12.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [13:40<01:44,  9.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [13:41<01:11,  7.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [13:45<00:54,  6.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [13:49<00:42,  5.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [13:53<00:36,  5.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [14:10<00:52,  8.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [14:33<01:05, 13.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [14:39<00:42, 10.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [14:42<00:25,  8.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [14:44<00:13,  6.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [14:48<00:05,  5.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [14:57<00:00,  8.98s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:18<31:16, 18.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:21<15:26,  9.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:37<19:57, 12.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:40<13:55,  8.70s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [00:52<15:37,  9.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [00:59<13:54,  8.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:03<11:02,  7.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:09<10:32,  6.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:18<11:30,  7.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:43<19:14, 12.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:45<14:29,  9.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:50<11:57,  8.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [01:56<10:50,  7.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:04<11:12,  7.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:13<11:35,  8.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:20<10:33,  7.54s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:29<11:11,  8.09s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:32<09:12,  6.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:35<07:12,  5.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:42<07:50,  5.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:43<05:57,  4.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:47<05:34,  4.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [02:59<08:27,  6.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:02<07:10,  5.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:13<08:50,  7.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:20<08:48,  7.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:22<06:42,  5.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:33<08:36,  7.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:37<07:28,  6.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [03:50<09:32,  8.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [03:55<08:30,  7.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:07<09:59,  8.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:17<10:05,  9.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [04:45<16:19, 14.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:02<16:40, 15.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:05<12:28, 11.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:14<11:31, 10.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:17<08:53,  8.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:31<10:18, 10.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:33<07:47,  7.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [05:36<06:04,  6.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [05:38<04:54,  5.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [05:51<07:03,  7.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [05:59<07:07,  7.64s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:07<07:08,  7.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:13<06:30,  7.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:20<06:15,  7.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:31<07:03,  8.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:34<05:37,  6.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [06:49<07:47,  9.34s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [06:58<07:25,  9.09s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [07:00<05:37,  7.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:03<04:34,  5.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:13<05:26,  7.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:31<07:45, 10.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:37<06:34,  8.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:42<05:35,  7.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [07:48<05:04,  7.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [08:07<07:23, 10.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:11<05:54,  8.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:17<05:02,  7.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:31<06:12,  9.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:34<04:39,  7.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [08:47<05:32,  9.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [08:55<05:14,  8.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:01<04:33,  8.05s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:04<03:37,  6.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:19<04:51,  9.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:21<03:40,  7.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:24<02:54,  5.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [09:30<02:46,  5.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [09:37<02:53,  6.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [09:48<03:27,  7.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [09:54<03:03,  7.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [09:57<02:30,  6.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:06<02:40,  6.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:12<02:30,  6.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:22<02:45,  7.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [10:34<03:09,  9.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [10:44<03:06,  9.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [10:55<03:03,  9.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [11:08<03:14, 10.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:13<02:31,  8.89s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:28<02:53, 10.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [11:33<02:16,  9.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [11:53<02:53, 12.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [12:04<02:35, 12.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:17<02:26, 12.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:19<01:41,  9.22s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:21<01:11,  7.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:26<00:58,  6.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:30<00:45,  5.64s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [12:35<00:38,  5.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [12:52<00:53,  8.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [13:07<00:53, 10.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:12<00:35,  8.95s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:15<00:21,  7.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:18<00:12,  6.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:21<00:05,  5.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [13:28<00:00,  8.09s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:28<46:31, 28.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:30<21:31, 13.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:53<28:26, 17.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:56<18:39, 11.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [01:15<22:36, 14.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:27<21:08, 13.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:30<15:43, 10.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:38<14:36,  9.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:50<15:47, 10.41s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [02:24<26:08, 17.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [02:25<18:47, 12.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [02:28<14:07,  9.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [02:32<11:30,  7.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:41<11:38,  8.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:50<12:01,  8.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:56<10:48,  7.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [03:09<12:41,  9.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [03:12<10:14,  7.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [03:14<07:56,  5.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [03:20<07:51,  5.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [03:21<05:53,  4.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [03:24<05:12,  4.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [03:36<08:07,  6.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:40<07:13,  5.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:46<07:02,  5.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:53<07:29,  6.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:55<06:02,  4.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [04:07<08:34,  7.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [04:11<07:11,  6.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [04:24<09:22,  8.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [04:30<08:46,  7.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:41<09:41,  8.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:51<10:07,  9.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [05:20<16:26, 14.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:41<18:02, 16.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:42<13:02, 12.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:50<11:28, 10.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:53<08:48,  8.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [06:09<10:47, 10.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [06:11<08:06,  8.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [06:13<06:03,  6.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [06:15<04:44,  4.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [06:29<07:14,  7.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:37<07:26,  7.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:46<07:31,  8.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:51<06:32,  7.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:53<05:01,  5.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [07:14<08:43, 10.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [07:18<07:02,  8.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [07:26<06:57,  8.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [07:32<06:11,  7.58s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [07:34<04:39,  5.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:37<04:01,  5.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:47<04:54,  6.41s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [08:04<07:15,  9.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [08:11<06:27,  8.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [08:13<04:57,  6.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [08:26<06:05,  8.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [08:43<07:41, 11.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:47<06:06,  9.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:54<05:27,  8.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [09:13<07:15, 11.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [09:15<05:24,  8.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [09:29<06:12, 10.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [09:51<08:02, 13.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:54<06:01, 10.64s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:58<04:40,  8.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [10:17<06:18, 11.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [10:21<04:52,  9.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [10:24<03:44,  7.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [10:30<03:20,  6.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [10:36<03:09,  6.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [10:45<03:17,  7.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [10:51<02:58,  6.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [10:54<02:23,  5.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [11:02<02:35,  6.47s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [11:07<02:18,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [11:12<02:09,  5.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [11:29<03:09,  9.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [11:42<03:24, 10.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [11:53<03:20, 10.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [12:06<03:22, 11.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [12:13<02:47,  9.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [12:29<03:09, 11.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [12:34<02:25,  9.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [12:48<02:35, 11.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [13:01<02:31, 11.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [13:10<02:10, 10.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [13:13<01:33,  8.47s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [13:15<01:05,  6.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [13:18<00:50,  5.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [13:22<00:40,  5.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [13:29<00:38,  5.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [13:45<00:52,  8.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [14:02<00:55, 11.05s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [14:07<00:37,  9.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [14:10<00:22,  7.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [14:13<00:11,  6.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [14:16<00:05,  5.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [14:24<00:00,  8.65s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:19<31:40, 19.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:23<16:37, 10.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:38<20:22, 12.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:40<13:10,  8.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [00:54<16:50, 10.64s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:02<15:15,  9.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:05<11:28,  7.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:13<11:31,  7.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:24<13:08,  8.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:30<11:34,  7.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:32<08:52,  5.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:34<07:00,  4.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [01:43<08:59,  6.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [01:51<09:24,  6.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:00<10:27,  7.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:05<09:20,  6.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:16<11:04,  8.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:20<09:19,  6.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:22<07:17,  5.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:29<07:53,  5.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:31<06:01,  4.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:34<05:23,  4.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [02:44<07:30,  5.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [02:49<07:09,  5.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:02<09:44,  7.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:08<09:06,  7.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:10<07:03,  5.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:21<08:49,  7.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:26<07:37,  6.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [03:41<10:42,  9.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [03:47<09:25,  8.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:00<11:05,  9.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:10<10:48,  9.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [04:42<18:00, 16.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:03<19:12, 17.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:05<14:04, 13.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:14<12:27, 11.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:17<09:26,  9.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:34<11:41, 11.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:37<09:01,  9.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [05:40<07:00,  7.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [05:42<05:20,  5.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [05:54<07:08,  7.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:02<07:10,  7.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:10<07:07,  7.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:15<06:17,  7.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:18<05:02,  5.70s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:37<08:29,  9.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:42<07:10,  8.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [06:48<06:16,  7.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [06:56<06:22,  7.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [06:58<04:53,  6.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:01<03:58,  5.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:10<04:51,  6.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:28<07:11,  9.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:33<06:08,  8.37s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:35<04:36,  6.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [07:44<04:59,  7.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [07:58<06:25,  9.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:02<05:05,  7.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:11<05:09,  7.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:25<06:14,  9.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:34<05:54,  9.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [08:48<06:35, 10.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [08:56<05:53, 10.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:00<04:40,  8.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:03<03:39,  6.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:19<05:06,  9.58s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:22<03:48,  7.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:25<03:03,  6.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [09:31<02:53,  5.99s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [09:36<02:40,  5.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [09:45<03:06,  6.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [09:50<02:44,  6.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [09:56<02:30,  6.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:03<02:37,  6.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:08<02:18,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:13<02:07,  5.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [10:25<02:40,  7.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [10:34<02:38,  7.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [10:44<02:44,  8.63s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [10:58<03:01, 10.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:05<02:37,  9.27s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:20<02:56, 11.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [11:25<02:17,  9.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [11:44<02:47, 12.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [11:54<02:31, 11.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:05<02:14, 11.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:08<01:36,  8.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:10<01:08,  6.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:14<00:54,  6.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:17<00:41,  5.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [12:23<00:38,  5.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [12:40<00:52,  8.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [12:55<00:53, 10.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:00<00:35,  8.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:04<00:22,  7.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:06<00:11,  5.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:09<00:05,  5.09s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [13:16<00:00,  7.97s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:23<38:47, 23.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:26<18:42, 11.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:50<27:37, 17.09s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:57<21:08, 13.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [01:09<20:20, 12.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:18<17:54, 11.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:20<13:08,  8.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:27<12:08,  7.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:41<14:42,  9.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:51<14:42,  9.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:53<11:03,  7.46s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:56<08:53,  6.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [02:00<08:07,  5.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:10<09:41,  6.76s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:20<11:09,  7.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:26<10:18,  7.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:37<11:24,  8.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:41<09:33,  7.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:44<07:50,  5.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:50<08:05,  6.07s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:52<06:04,  4.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:54<05:15,  4.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [03:09<09:15,  7.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:15<08:31,  6.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:28<10:44,  8.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:36<10:24,  8.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:38<07:54,  6.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:49<09:38,  8.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:52<07:48,  6.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [04:11<12:01, 10.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [04:16<09:54,  8.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:28<10:46,  9.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:39<11:20, 10.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [05:09<17:36, 16.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:31<19:25, 17.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:34<14:03, 13.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:42<12:17, 11.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:45<09:20,  9.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:57<10:10, 10.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:59<07:34,  7.58s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [06:01<05:55,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [06:04<04:44,  4.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [06:17<07:07,  7.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:25<07:13,  7.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:35<07:32,  8.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:42<07:06,  7.91s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:45<05:44,  6.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:54<06:19,  7.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:57<05:07,  6.02s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [07:05<05:25,  6.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [07:10<04:52,  5.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [07:12<03:52,  4.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:15<03:17,  4.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:24<04:25,  5.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:41<06:49,  9.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:47<05:57,  8.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:49<04:31,  6.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [07:59<05:17,  7.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [08:17<07:18, 10.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:23<06:06,  9.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:32<05:51,  9.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:50<07:29, 11.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:58<06:39, 10.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [09:11<06:53, 11.50s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [09:20<06:13, 10.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:26<05:10,  9.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:29<04:04,  7.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:51<06:14, 11.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:54<04:44,  9.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:58<03:44,  7.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [10:04<03:24,  7.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [10:09<02:59,  6.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [10:17<03:11,  7.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [10:24<03:02,  7.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [10:27<02:25,  5.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:36<02:40,  6.71s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:41<02:20,  6.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:53<02:52,  7.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [11:04<03:06,  8.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [11:13<02:58,  8.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [11:25<03:05,  9.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [11:37<03:11, 10.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:45<02:46,  9.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:58<02:52, 10.75s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [12:03<02:16,  9.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [12:21<02:45, 11.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [12:28<02:12, 10.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:41<02:13, 11.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:45<01:36,  8.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:47<01:08,  6.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:50<00:52,  5.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:54<00:41,  5.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [13:02<00:42,  6.13s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [13:18<00:53,  8.85s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [13:34<00:55, 11.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:39<00:37,  9.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:42<00:22,  7.36s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:51<00:15,  7.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:54<00:06,  6.42s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [13:58<00:00,  8.38s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:20<34:27, 20.88s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:23<16:13,  9.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:38<19:54, 12.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:40<13:11,  8.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [00:52<15:13,  9.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:00<14:27,  9.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:03<10:46,  6.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:12<11:39,  7.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:27<15:22, 10.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:52<21:52, 14.59s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [01:55<16:18, 10.99s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [01:58<12:33,  8.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [02:07<12:37,  8.70s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:15<12:05,  8.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:24<12:10,  8.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:29<10:55,  7.81s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:39<11:37,  8.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:45<10:20,  7.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:48<08:33,  6.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:55<08:30,  6.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:56<06:22,  4.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:59<05:38,  4.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [03:14<09:32,  7.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:18<08:08,  6.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:32<10:45,  8.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:37<09:26,  7.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:39<07:17,  6.00s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:50<08:59,  7.49s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:54<07:28,  6.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [04:10<10:56,  9.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [04:16<09:33,  8.32s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:30<11:18,  9.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:41<11:20, 10.16s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


 34%|███▍      | 34/100 [05:00<14:20, 13.04s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 34...


 35%|███▌      | 35/100 [05:19<16:01, 14.79s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 35...


 36%|███▌      | 36/100 [05:21<11:41, 10.96s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 36...


 37%|███▋      | 37/100 [05:36<12:42, 12.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 37...


 38%|███▊      | 38/100 [05:39<09:37,  9.31s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 38...


 39%|███▉      | 39/100 [05:53<10:50, 10.67s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 39...


 40%|████      | 40/100 [05:55<08:04,  8.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 40...


 41%|████      | 41/100 [05:57<06:06,  6.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 41...


 42%|████▏     | 42/100 [05:58<04:40,  4.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 42...


 43%|████▎     | 43/100 [06:09<06:25,  6.77s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 43...


 44%|████▍     | 44/100 [06:18<06:42,  7.19s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 44...


 45%|████▌     | 45/100 [06:26<06:54,  7.54s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 45...


 46%|████▌     | 46/100 [06:32<06:24,  7.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 46...


 47%|████▋     | 47/100 [06:35<05:07,  5.80s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 47...


 48%|████▊     | 48/100 [06:53<08:08,  9.39s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 48...


 49%|████▉     | 49/100 [06:56<06:31,  7.68s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 49...


 50%|█████     | 50/100 [07:13<08:45, 10.51s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 50...


 51%|█████     | 51/100 [07:18<07:07,  8.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 51...


 52%|█████▏    | 52/100 [07:20<05:23,  6.74s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 52...


 53%|█████▎    | 53/100 [07:22<04:15,  5.43s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 53...


 54%|█████▍    | 54/100 [07:32<05:09,  6.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 54...


 55%|█████▌    | 55/100 [07:43<05:52,  7.83s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 55...


 56%|█████▌    | 56/100 [07:49<05:20,  7.28s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 56...


 57%|█████▋    | 57/100 [07:52<04:17,  5.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 57...


 58%|█████▊    | 58/100 [08:03<05:21,  7.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 58...


 59%|█████▉    | 59/100 [08:21<07:21, 10.78s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 59...


 60%|██████    | 60/100 [08:27<06:12,  9.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 60...


 61%|██████    | 61/100 [08:37<06:11,  9.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 61...


 62%|██████▏   | 62/100 [08:51<06:46, 10.70s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 62...


 63%|██████▎   | 63/100 [08:59<06:09,  9.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 63...


 64%|██████▍   | 64/100 [09:11<06:21, 10.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 64...


 65%|██████▌   | 65/100 [09:19<05:46,  9.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 65...


 66%|██████▌   | 66/100 [09:24<04:42,  8.30s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 66...


 67%|██████▋   | 67/100 [09:28<03:58,  7.23s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 67...


 68%|██████▊   | 68/100 [09:46<05:30, 10.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 68...


 69%|██████▉   | 69/100 [09:49<04:09,  8.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 69...


 70%|███████   | 70/100 [09:53<03:24,  6.82s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 70...


 71%|███████   | 71/100 [09:59<03:15,  6.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 71...


 72%|███████▏  | 72/100 [10:04<02:54,  6.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 72...


 73%|███████▎  | 73/100 [10:11<02:52,  6.40s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 73...


 74%|███████▍  | 74/100 [10:18<02:47,  6.45s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 74...


 75%|███████▌  | 75/100 [10:24<02:38,  6.33s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 75...


 76%|███████▌  | 76/100 [10:32<02:44,  6.84s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 76...


 77%|███████▋  | 77/100 [10:39<02:39,  6.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 77...


 78%|███████▊  | 78/100 [10:45<02:23,  6.54s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 78...


 79%|███████▉  | 79/100 [10:58<02:59,  8.56s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 79...


 80%|████████  | 80/100 [11:06<02:51,  8.55s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 80...


 81%|████████  | 81/100 [11:17<02:51,  9.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 81...


 82%|████████▏ | 82/100 [11:30<03:04, 10.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 82...


 83%|████████▎ | 83/100 [11:39<02:50, 10.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 83...


 84%|████████▍ | 84/100 [11:57<03:19, 12.47s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 84...


 85%|████████▌ | 85/100 [12:02<02:32, 10.15s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 85...


 86%|████████▌ | 86/100 [12:23<03:09, 13.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 86...


 87%|████████▋ | 87/100 [12:33<02:40, 12.38s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 87...


 88%|████████▊ | 88/100 [12:43<02:20, 11.73s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 88...


 89%|████████▉ | 89/100 [12:46<01:39,  9.08s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 89...


 90%|█████████ | 90/100 [12:48<01:09,  6.97s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 90...


 91%|█████████ | 91/100 [12:51<00:51,  5.75s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 91...


 92%|█████████▏| 92/100 [12:55<00:40,  5.06s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 92...


 93%|█████████▎| 93/100 [13:02<00:39,  5.61s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 93...


 94%|█████████▍| 94/100 [13:18<00:53,  8.93s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 94...


 95%|█████████▌| 95/100 [13:42<01:06, 13.37s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 95...


 96%|█████████▌| 96/100 [13:48<00:44, 11.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 96...


 97%|█████████▋| 97/100 [13:51<00:25,  8.65s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 97...


 98%|█████████▊| 98/100 [13:54<00:13,  6.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 98...


 99%|█████████▉| 99/100 [13:58<00:06,  6.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 99...


100%|██████████| 100/100 [14:01<00:00,  8.42s/it]


Saving results to results/internvl/output_internvl.json...
Results saved.


  0%|          | 0/100 [00:00<?, ?it/s]


Generating response for 0...


  1%|          | 1/100 [00:19<32:51, 19.92s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 1...


  2%|▏         | 2/100 [00:22<15:24,  9.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 2...


  3%|▎         | 3/100 [00:41<22:25, 13.87s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 3...


  4%|▍         | 4/100 [00:43<14:42,  9.20s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 4...


  5%|▌         | 5/100 [00:55<16:13, 10.24s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 5...


  6%|▌         | 6/100 [01:03<14:54,  9.52s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 6...


  7%|▋         | 7/100 [01:05<11:13,  7.25s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 7...


  8%|▊         | 8/100 [01:13<11:28,  7.48s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 8...


  9%|▉         | 9/100 [01:25<13:26,  8.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 9...


 10%|█         | 10/100 [01:59<24:50, 16.57s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 10...


 11%|█         | 11/100 [02:01<17:57, 12.10s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 11...


 12%|█▏        | 12/100 [02:04<13:42,  9.35s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 12...


 13%|█▎        | 13/100 [02:09<11:34,  7.98s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 13...


 14%|█▍        | 14/100 [02:17<11:19,  7.90s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 14...


 15%|█▌        | 15/100 [02:26<11:35,  8.18s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 15...


 16%|█▌        | 16/100 [02:31<10:09,  7.26s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 16...


 17%|█▋        | 17/100 [02:40<11:05,  8.01s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 17...


 18%|█▊        | 18/100 [02:45<09:22,  6.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 18...


 19%|█▉        | 19/100 [02:47<07:33,  5.60s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 19...


 20%|██        | 20/100 [02:53<07:33,  5.66s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 20...


 21%|██        | 21/100 [02:55<05:57,  4.53s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 21...


 22%|██▏       | 22/100 [02:58<05:22,  4.14s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 22...


 23%|██▎       | 23/100 [03:09<07:50,  6.11s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 23...


 24%|██▍       | 24/100 [03:14<07:12,  5.69s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 24...


 25%|██▌       | 25/100 [03:27<10:02,  8.03s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 25...


 26%|██▌       | 26/100 [03:32<08:53,  7.21s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 26...


 27%|██▋       | 27/100 [03:35<06:57,  5.72s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 27...


 28%|██▊       | 28/100 [03:46<08:55,  7.44s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 28...


 29%|██▉       | 29/100 [03:49<07:17,  6.17s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 29...


 30%|███       | 30/100 [04:01<09:15,  7.94s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 30...


 31%|███       | 31/100 [04:07<08:11,  7.12s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 31...


 32%|███▏      | 32/100 [04:19<09:45,  8.62s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 32...


 33%|███▎      | 33/100 [04:28<09:53,  8.86s/it]

Saving results to results/internvl/output_internvl.json...
Results saved.

Generating response for 33...


: 

### Extract Answer

In [None]:
import os
import re
import time
import argparse

from tqdm import tqdm
from poison_utils import *

# OpenAI
import openai
from dotenv import load_dotenv
from prompts.ext_ans import demo_prompt

: 

In [None]:
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

: 

In [None]:
def verify_extraction(extraction):
    extraction = extraction.strip()
    if extraction == "" or extraction == None:
        return False
    return True


def create_test_prompt(demo_prompt, query, response):
    demo_prompt = demo_prompt.strip()
    test_prompt = f"{query}\n\n{response}"
    full_prompt = f"{demo_prompt}\n\n{test_prompt}\n\nExtracted answer: "
    return full_prompt


def extract_answer(response, problem, target_name, model=None, quick_extract=False):
    question_type = problem['question_type']
    answer_type = problem['answer_type']
    choices = problem['choices']
    query = problem[target_name]['query']
    pid = problem['pid']

    if response == "":
        return ""
    
    if question_type == 'multi_choice' and response in choices:
        return response
    
    if answer_type == "integer":
        try:
            extraction = int(response)
            return str(extraction)
        except:
            pass

    if answer_type == "float":
        try:
            extraction = str(float(response))
            return extraction
        except:
            pass

    # quick extraction
    if quick_extract:
        print("Quickly extracting answer...")
        # The answer is "text". -> "text"
        try:
            result = re.search(r'The answer is "(.*)"\.', response)
            if result:
                extraction = result.group(1)
                return extraction
        except:
            pass

    # general extraction
    try:
        full_prompt = create_test_prompt(demo_prompt, query, response)
        # extraction = model.get_chat_response(full_prompt)
        extraction = get_chat_response(full_prompt, openai.api_key)
        return extraction
    except Exception as e:
        print(e)
        print(f"Error in extracting answer for {pid}")

    return ""

    

: 

In [None]:
args = {
    "output_dir": f"results/{model}",
    "output_file": f"output_{model}.json",
    "response_label": ,
    "llm_engine": ,
    "number": ,
    "quick_extract": ,
    "rerun": , 
    "save_every": ,
    "output_label": 
}

: 

In [None]:
# args
label = args.response_label
result_file = os.path.join(args.output_dir, args.output_file)

if args.output_label != '':
    output_file = result_file.replace('.json', f'_{args.output_label}.json')
else:
    output_file = result_file

# read results
print(f"Reading {result_file}...")
results = read_json(result_file)

# model = llama_3_1.LLaMA()

# full pids
full_pids = list(results.keys())
if args.number > 0:
    full_pids = full_pids[:min(args.number, len(full_pids))]
print("Number of testing problems:", len(full_pids))

# test pids
if args.rerun:
    test_pids = full_pids
else:
    test_pids = []
    for pid in full_pids:
        # print(pid)
        if 'extraction' not in results[pid] or not verify_extraction(results[pid]['extraction']):
            test_pids.append(pid)


test_num = len(test_pids)
print("Number of problems to run:", test_num)
# print(test_pids)

# tqdm, enumerate results
for i, pid in enumerate(tqdm(test_pids)):
    target_names = results[pid]["targets"].keys()
    for name in target_names:
        problem = results[pid]

        assert label in problem["targets"][name]
        response = problem["targets"][name][label]       

        
        # extraction  = extract_answer(response, problem, name, model, args.quick_extract)
        extraction  = extract_answer(response, problem, name, None, args.quick_extract)
        results[pid]["targets"][name]['extraction'] = extraction

        if i % args.save_every == 0 or i == test_num - 1:
            print(f"Saving results to {output_file}...")
            save_json(results, output_file)
            print(f"Results saved.")

: 

: 