In [None]:
!pip install diffusers transformers accelerate --quiet

In [None]:
import torch
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler

model_id = "runwayml/stable-diffusion-v1-5"


pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe.to("cuda")

pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)

num_inference_steps = 75
guidance_scale = 9.0
width, height = 512, 512
seed = 42

generator = torch.Generator("cuda").manual_seed(seed)
negative_prompt = "blurry, oversaturated, low resolution, deformed"

In [None]:
from google.colab import files
import json


uploaded = files.upload()

with open("captions.json", "r") as f:
    data = json.load(f)

caption_groups = data.get("image_captions", [])
print(f"Found {len(caption_groups)} caption groups.")

In [None]:
import os


output_folder = "generated_images"


if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print(f"Created folder: {output_folder}")


for idx, group in enumerate(caption_groups):

    if not isinstance(group, list):
        print(f"Skipping index {idx} as it is not a list of captions.")
        continue


    prompt = ", ".join(group[:5])
    print(f"Generating image {idx} with prompt: {prompt}")


    result = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        guidance_scale=guidance_scale,
        generator=generator,
        width=width,
        height=height
    )
    image = result.images[0]
    local_filename = os.path.join(output_folder, f"generated_image_{idx}.png")
    image.save(local_filename)
    print(f"Saved image {idx} as {local_filename}")


In [None]:
!zip -r generated_images_json.zip generated_images

from google.colab import files
files.download("generated_images_json.zip")