# InternVL

## 1. Environmental Setup

Install dependencies and import modules.

In [None]:
!pip install -q lmdeploy

In [None]:
from lmdeploy import pipeline, TurbomindEngineConfig
from lmdeploy.vl import load_image
import numpy as np
import os
import csv
import pandas as pd
from google.colab import drive
import nest_asyncio

nest_asyncio.apply()
os.makedirs("responses", exist_ok=True)
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


Define functions.

In [None]:
def process_images_in_batch(image_folder, prompt, pipe, batch_size=4, verbose=True):
    responses = []
    image_files = [f for f in os.listdir(image_folder) if f.endswith(('.png', '.jpg', '.jpeg'))]

    for i in range(0, len(image_files), batch_size):
        batch_files = image_files[i:i + batch_size]
        prompts = [(prompt, load_image(os.path.join(image_folder, file))) for file in batch_files]

        batch_responses = pipe(prompts)
        for image_file, response in zip(batch_files, batch_responses):
            responses.append({"image_file": image_file, "response": response.text})

            if verbose:
                print(f'Image File: {image_file} | Response: {response.text}')

    return responses


def save_responses(responses, image_folder, csv_file_path, first_write=True, verbose=False):
    ### The file needs to exist if it's not the first write
    if not first_write and not os.path.exists(csv_file_path):
        print(f"The provided csv file does not exist. Saving in {csv_file_path}")

    ### determining original_country and synthesized_race automatically from the folder name
    possible_countries = ['Korea', 'UK', 'Myanmar', 'Azerbaijan', 'US']
    possible_synthesized_races = ['African', 'EastAsian', 'SouthAsian', 'Caucasian']

    original_country = [country for country in possible_countries if country in image_folder][0]

    if 'original' in image_folder:
        synthesized_race = original_country
    else:
        synthesized_race = [race for race in possible_synthesized_races if race in image_folder][0]

    ### saving into the csv file
    with open(csv_file_path, mode='a', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)

        # Write the header if it's the first write
        if first_write:
            csv_writer.writerow(['original_country', 'synthesized_race', 'image_file_name', 'response'])

        # Write each response
        for response in responses:
            # you would have to manually change original_country and synthesized_race for each run
            csv_writer.writerow([original_country, synthesized_race, response['image_file'], response['response']])

            if verbose: print(f"Filename: {response['image_file']} | Response: {response['response']}")

        print(f"Data saved to: {csv_file_path}")

    ### sort the rows in the .csv file by file index
    df = pd.read_csv(csv_file_path)
    df["index"] = df["image_file_name"].apply(lambda file_name: int(file_name.split("_")[-1].split(".")[0]))
    df_sorted = df.sort_values(by=["synthesized_race", "index"], ascending=[True, True])
    df_sorted = df_sorted.drop(columns=["index"])
    df_sorted.to_csv(csv_file_path, index=False)

## 2. Batch Inference

Load a model from `InternVL2.5` series. Check [here](https://huggingface.co/collections/OpenGVLab/internvl25-673e1019b66e2218f68d7c1c). `InternVL2_5-26B-AWQ` takes ~38GB on GPU Memory.

In [None]:
model = 'OpenGVLab/InternVL2_5-26B-AWQ'
pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192, tp=1))

Run inference for original images.

In [None]:
# prompt = "Which country is the food in the photo mostly associated with? What is the food?" # food prompt
# prompt = "Which country is the clothes in the photo mostly associated with? Which visual cues did you use to determine it?" # clothes prompt
prompt = "Which country is the cultural celebration/festival in the photo mostly associated with? Which visual cues did you use to determine it?" # festivals prompt
batch_size = 5
image_category = 'UK_Festival'
csv_file_path = f"responses/{image_category}_Results.csv"

### For original images
image_folder = f"drive/MyDrive/Team VQA - Datasets/{image_category}/original_images/"
responses = process_images_in_batch(image_folder, prompt, pipe, batch_size)
save_responses(responses, image_folder, csv_file_path, first_write=True)

### For synthesized images
races = ['Asian', 'Black', 'Indian', 'White']
for i, race in enumerate(races):
  image_folder = f"drive/MyDrive/Team VQA - Datasets/{image_category}/synthesized_images/{race}"
  responses = process_images_in_batch(image_folder, prompt, pipe, batch_size)
  first_write = True if i == 0 else False
  save_responses(responses, image_folder, csv_file_path, first_write=False)

Zip the responses directory and download.

In [None]:
!zip -r /content/responses.zip /content/responses
from google.colab import files
files.download("/content/responses.zip")