Filter rows where 'questions' and 'answers' columns are not null or empty in region-level and grounded_level conversation files.

In [9]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('train_classes_grounded_level.csv')

# Filter rows where 'questions' and 'answers' columns are not null or empty
filtered_df = df.dropna(subset=['questions', 'answers'])

# Further filter out rows where 'questions' or 'answers' might be empty strings
filtered_df = filtered_df[(filtered_df['questions'].str.strip() != '') & (filtered_df['answers'].str.strip() != '')]

# Save the filtered DataFrame to a new CSV file
filtered_df.to_csv('filtered_grounded_level.csv', index=False)


Preparing JSON file for image-level conversations.

In [3]:
import os
import pandas as pd
import json
import random
import uuid
import tqdm

# List of prompts
prompts = [
    "Describe the image concisely.",
    "Provide a brief description of the given image.",
    "Offer a succinct explanation of the picture presented.",
    "Summarize the visual content of the image.",
    "Give a short and clear explanation of the subsequent image.",
    "Share a concise interpretation of the image provided.",
    "Present a compact description of the photo’s key features.",
    "Relay a brief, clear account of the picture shown.",
    "Render a clear and concise summary of the photo.",
    "Write a terse but informative summary of the picture.",
    "Create a compact narrative representing the image presented."  
]

# Load the first CSV file
df = pd.read_csv('./train_classes_with_modified_answers_v2.csv')

# Create the initial JSON structure
res = []

for img_name, answer in zip(df['image_name'], df['answers']):
    res.append({
        "id": str(uuid.uuid4()),
        "image": f"amazon/train/train_jpg/{img_name}.jpg",
        "conversations": [
            {
                "from": "human",
                "value": f"<image>\n{random.choice(prompts)}"
            },
            {
                "from": "gpt",
                "value": answer
            },
        ]  
    })

with open('train_classes2.json', 'w') as f:
    json.dump(res, f, indent=2)


Adding region-level and grounded level conversations with 10 shots for each class. 

In [4]:
import pandas as pd
import json
import random
from tqdm import tqdm

# Load the existing JSON file
with open('train_classes2.json', 'r') as f:
    res = json.load(f)

# Convert the list to a dictionary for faster lookups
res_dict = {entry['image']: entry for entry in res}

# Load the second CSV file
df_region = pd.read_csv('./filtered_region_level.csv')

# Update the JSON with additional conversations from the second CSV
for index, row in tqdm(df_region.iterrows(), total=len(df_region)):
    img_name = row['image_name']
    question = row['questions']
    answer = row['answers']
    
    # Construct the image path
    image_path = f"amazon/train/train_jpg/{img_name}.jpg"
    
    # Check if the image exists in the dictionary
    if image_path in res_dict:
        entry = res_dict[image_path]
        # Append the new question and answer to the conversations
        entry['conversations'].append({
            "from": "human",
            "value": f"<image>\n{question}"
        })
        entry['conversations'].append({
            "from": "gpt",
            "value": answer
        })

# Convert the dictionary back to a list
res_updated = list(res_dict.values())

# Write the updated JSON to a file
with open('train_classes2.json', 'w') as f:
    json.dump(res_updated, f, indent=2)


100%|██████████| 142/142 [00:00<00:00, 22672.78it/s]


In [5]:
import pandas as pd
import json
import random
from tqdm import tqdm

# Load the existing JSON file
with open('train_classes2.json', 'r') as f:
    res = json.load(f)

# Convert the list to a dictionary for faster lookups
res_dict = {entry['image']: entry for entry in res}

# Load the second CSV file
df_region = pd.read_csv('./filtered_grounded_level.csv')

# Update the JSON with additional conversations from the second CSV
for index, row in tqdm(df_region.iterrows(), total=len(df_region)):
    img_name = row['image_name']
    question = row['questions']
    answer = row['answers']
    
    # Construct the image path
    image_path = f"amazon/train/train_jpg/{img_name}.jpg"
    
    # Check if the image exists in the dictionary
    if image_path in res_dict:
        entry = res_dict[image_path]
        # Append the new question and answer to the conversations
        entry['conversations'].append({
            "from": "human",
            "value": f"<image>\n{question}"
        })
        entry['conversations'].append({
            "from": "gpt",
            "value": answer
        })

# Convert the dictionary back to a list
res_updated = list(res_dict.values())

# Write the updated JSON to a file
with open('train_classes2.json', 'w') as f:
    json.dump(res_updated, f, indent=2)


100%|██████████| 140/140 [00:00<00:00, 23175.69it/s]


Extract the images from train/train-jpg which only occurs in image_name column of region_grounded_mix 


In [6]:
import os
import pandas as pd
import shutil

# Define paths
csv_path = '/ssd_1/sanjar/rsvlm/datasets/region_grounded_mix.csv'  # Update with the correct path if needed
image_folder = '/ssd_1/sanjar/rsvlm/datasets/amazon/train/train-jpg/'
output_folder = '/ssd_1/sanjar/rsvlm/datasets/amazon/region_ground_mix'  # Update the output path as needed

# Create the output directory if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_path)

# Get the list of image names from the img_name column
image_names = [f"{img_name}.jpg" for img_name in df['image_name'].tolist()]

# Copy each image to the new folder
for img_name in image_names:
    src_path = os.path.join(image_folder, img_name)
    dst_path = os.path.join(output_folder, img_name)
    if os.path.exists(src_path):
        shutil.copy(src_path, dst_path)
    else:
        print(f"Image {img_name} not found in {image_folder}")

print("Image extraction complete.")


Image extraction complete.


Prepare json file of above condition

In [7]:
import os
import pandas as pd
import json
import uuid
from tqdm import tqdm

# Load the CSV file
df = pd.read_csv('/ssd_1/sanjar/rsvlm/datasets/region_grounded_mix.csv')

# Create the JSON structure
res = []

for img_name, question, answer in zip(df['image_name'], df['questions'], df['answers']):
    res.append({
        "id": str(uuid.uuid4()),
        "image": f"amazon/train/train_jpg/{img_name}.jpg",
        "conversations": [
            {
                "from": "human",
                "value": f"<image>\n{question}"
            },
            {
                "from": "gpt",
                "value": answer
            },
        ]  
    })

# Write the new JSON to a file
with open('region_grounded_mix.json', 'w') as f:
    json.dump(res, f, indent=2)

print("New JSON file created successfully.")


New JSON file created successfully.
