## 0. Import Packages
Note Use Correct Kernel
# IMPORTANT: PLEASE REFER TO README.md

In [None]:
!pip install diffusers transformers accelerate scipy safetensors
!pip install salesforce-lavis
# (For Colab, if running locally, refer to README.md)

In [4]:
import os
from PIL import Image
import torch
from diffusers import StableDiffusionInpaintPipeline

## 1-1. Folder Designations (Colab)

In [5]:
# Folder Setup
parent_folder = "images"
input_folder = os.path.join(parent_folder, "input_images")
mask_folder = os.path.join(parent_folder, "mask_images")
output_folder = os.path.join(parent_folder, "output_images")

In [6]:
using_colab = True

if using_colab:
    if not os.path.isdir(parent_folder):
        !mkdir -p $parent_folder
    !unzip -d $parent_folder input_images.zip 
    !unzip -d $parent_folder mask_images.zip

### 1-2. Folder Designations (Local)

In [None]:
# Folder Setup
input_folder = "folder_path_for_input_images"
mask_folder = "folder_path_for_mask_images"
output_folder = "folder_path_for_result_images"

## 2. Mask Image Folder Generation
##### Note: White = Area to Inpaint
- Input images should be 3-channel images with resolution 512x512
- Save masks to "mask_folder" with names corresponding to the input images + "_mask"
- Masks should be 1-channel images with the same size as the input images (greyscale, resolution 512x512)

## 3. Creation of Synthesized Images
##### Suggesting: Creation of autoprompter, perhaps another model(?)

In [None]:
# Load the model
pipe = StableDiffusionInpaintPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-inpainting",
    torch_dtype = torch.float16,
)
pipe.to("cuda")

#########
# Set Prompt Here
prompt = "(e.g.) Traditional fisherman fishing on a large lake."
#########

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Iterate through the images in the input folder
for image_filename in sorted(os.listdir(input_folder)):
    if image_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        # Construct full file paths
        image_path = os.path.join(input_folder, image_filename)
        mask_filename = os.path.splitext(image_filename)[0] + "_mask.png"
        mask_path = os.path.join(mask_folder, mask_filename)
        
        # Check if the corresponding mask exists
        if os.path.exists(mask_path):
            # Open the images
            image = Image.open(image_path).convert("RGB")
            mask_image = Image.open(mask_path).convert("L")

            # Ensure the mask size matches the image size
            if image.size != mask_image.size:
                print(f"Resizing mask for {image_filename} to match the image size.")
                mask_image = mask_image.resize(image.size)

            # Perform inpainting
            inpainted_image = pipe(prompt=prompt, image=image, mask_image=mask_image).images[0]

            # Save the inpainted image
            output_filename = os.path.splitext(image_filename)[0] + "_inpainted.png"
            output_path = os.path.join(output_folder, output_filename)
            inpainted_image.save(output_path)
            print(f"Saved inpainted image to {output_path}")
        else:
            print(f"Mask for {image_filename} not found. Skipping.")


### 4. VQA Testing
(BLIP VQA)

In [None]:
import os
import csv
import torch
from PIL import Image
from lavis.models import load_model_and_preprocess

# setup device to use
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the VQA model and preprocessing tools
model, vis_processors, txt_processors = load_model_and_preprocess(name="blip_vqa", model_type="vqav2", is_eval=True, device=device)

# define the question for the VQA
question_text = "What country is this food from?"

# folder containing images
# output_folder

# folder to save the CSV file
csv_output_path = "path to save BLIP-VQA responses"


# initialize the CSV file and write headers
with open(csv_output_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Image File', 'VQA Response'])

    # iterate through all images in the output folder
    for image_file in os.listdir(output_folder):
        # only process image files
        if image_file.endswith(('.png', '.jpg', '.jpeg')):
            # load and process the image
            image_path = os.path.join(output_folder, image_file)
            raw_image = Image.open(image_path).convert("RGB")
            image = vis_processors["eval"](raw_image).unsqueeze(0).to(device)
            
            # process the question
            question = txt_processors["eval"](question_text)
            
            # get the model's prediction
            response = model.predict_answers(samples={"image": image, "text_input": question}, inference_method="generate")
            
            # write the image filename and model response to the CSV
            writer.writerow([image_file, response[0]])  # response[0] to get the answer

print(f"VQA responses saved to {csv_output_path}")
