In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:

coco_path = "/content/drive/MyDrive/coco2017_micro"


train_dir = f"{coco_path}/train2017"
val_dir   = f"{coco_path}/val2017"
ann_dir   = f"{coco_path}/annotations"

print("Train images path:", train_dir)
print("Val images path:", val_dir)
print("Annotations path:", ann_dir)


Train images path: /content/drive/MyDrive/coco2017_micro/train2017
Val images path: /content/drive/MyDrive/coco2017_micro/val2017
Annotations path: /content/drive/MyDrive/coco2017_micro/annotations


In [5]:
import os

print("Train images:", len(os.listdir(train_dir)))
print("Val images:", len(os.listdir(val_dir)))
print("Annotation files:", os.listdir(ann_dir))


Train images: 875
Val images: 5013
Annotation files: ['dataset-metadata.json', 'instances_val2017.json', 'person_keypoints_train2017.json', 'captions_train2017.json', 'instances_train2017.json', 'captions_val2017.json', 'person_keypoints_val2017.json']


In [6]:
!pip install diffusers transformers accelerate safetensors
!pip install torch torchvision --upgrade




In [7]:
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import json
import os


model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")


save_path = "/content/drive/MyDrive/generated_images"
os.makedirs(save_path, exist_ok=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


In [8]:
prompt = "a red sports car driving on a mountain road"
image = pipe(prompt, guidance_scale=7.5).images[0]


image.show()


image.save(f"{save_path}/sports_car.png")
print("Image saved to:", save_path)


  0%|          | 0/50 [00:00<?, ?it/s]

Image saved to: /content/drive/MyDrive/generated_images


In [9]:
!wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -O annotations_trainval2017.zip


--2025-09-10 15:02:49--  http://images.cocodataset.org/annotations/annotations_trainval2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 16.182.40.9, 3.5.28.46, 52.217.18.252, ...
Connecting to images.cocodataset.org (images.cocodataset.org)|16.182.40.9|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 252907541 (241M) [application/zip]
Saving to: ‘annotations_trainval2017.zip’


2025-09-10 15:03:07 (13.9 MB/s) - ‘annotations_trainval2017.zip’ saved [252907541/252907541]



In [10]:
!unzip annotations_trainval2017.zip -d annotations2017


Archive:  annotations_trainval2017.zip
replace annotations2017/annotations/instances_train2017.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: ALL
  inflating: annotations2017/annotations/instances_train2017.json  
  inflating: annotations2017/annotations/instances_val2017.json  
  inflating: annotations2017/annotations/captions_train2017.json  
  inflating: annotations2017/annotations/captions_val2017.json  
  inflating: annotations2017/annotations/person_keypoints_train2017.json  
  inflating: annotations2017/annotations/person_keypoints_val2017.json  


In [11]:
import shutil, os


src_folder = "/content/annotations2017/annotations"
dst_folder = "/content/drive/MyDrive/coco2017_micro/annotations"


for file_name in os.listdir(src_folder):
    full_src = os.path.join(src_folder, file_name)
    full_dst = os.path.join(dst_folder, file_name)
    shutil.copy(full_src, full_dst)

print("✅ All annotation files copied to Drive:", dst_folder)


✅ All annotation files copied to Drive: /content/drive/MyDrive/coco2017_micro/annotations


In [12]:
import os

ann_dir = "/content/drive/MyDrive/coco2017_micro/annotations"
print("Files in annotations folder:", os.listdir(ann_dir))


Files in annotations folder: ['dataset-metadata.json', 'instances_val2017.json', 'person_keypoints_train2017.json', 'captions_train2017.json', 'instances_train2017.json', 'captions_val2017.json', 'person_keypoints_val2017.json']


In [13]:
ann_file = f"{ann_dir}/captions_val2017.json"

import json

with open(ann_file) as f:
    captions_data = json.load(f)


print("Keys in file:", captions_data.keys())
print("Example annotation:", captions_data["annotations"][0])


prompts = []
for ann in captions_data["annotations"][:5]:
    if "caption" in ann:
        prompts.append(ann["caption"])

print("Sample prompts:", prompts)


for i, prompt in enumerate(prompts):
    img = pipe(prompt, guidance_scale=7.5).images[0]
    file_path = f"{save_path}/coco_generated_{i}.png"
    img.save(file_path)
    print(f"Prompt {i+1}: {prompt}")
    print(f"Saved -> {file_path}\n")


Keys in file: dict_keys(['info', 'licenses', 'images', 'annotations'])
Example annotation: {'image_id': 179765, 'id': 38, 'caption': 'A black Honda motorcycle parked in front of a garage.'}
Sample prompts: ['A black Honda motorcycle parked in front of a garage.', 'A Honda motorcycle parked in a grass driveway', 'An office cubicle with four different types of computers.', 'A small closed toilet in a cramped space.', 'Two women waiting at a bench next to a street.']


  0%|          | 0/50 [00:00<?, ?it/s]

Prompt 1: A black Honda motorcycle parked in front of a garage.
Saved -> /content/drive/MyDrive/generated_images/coco_generated_0.png



  0%|          | 0/50 [00:00<?, ?it/s]

Prompt 2: A Honda motorcycle parked in a grass driveway
Saved -> /content/drive/MyDrive/generated_images/coco_generated_1.png



  0%|          | 0/50 [00:00<?, ?it/s]

Prompt 3: An office cubicle with four different types of computers.
Saved -> /content/drive/MyDrive/generated_images/coco_generated_2.png



  0%|          | 0/50 [00:00<?, ?it/s]

Prompt 4: A small closed toilet in a cramped space.
Saved -> /content/drive/MyDrive/generated_images/coco_generated_3.png



  0%|          | 0/50 [00:00<?, ?it/s]

Prompt 5: Two women waiting at a bench next to a street.
Saved -> /content/drive/MyDrive/generated_images/coco_generated_4.png



In [14]:
!pip install fastapi uvicorn nest_asyncio pyngrok diffusers transformers torch accelerate safetensors




In [15]:
import gradio as gr
from diffusers import StableDiffusionPipeline
import torch

pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
pipe = pipe.to("cuda")

def generate(prompt):
    image = pipe(prompt, guidance_scale=7.5).images[0]
    return image

demo = gr.Interface(fn=generate, inputs="text", outputs="image", title="Text to Image")
demo.launch(share=True)


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://54cb5c0697c43b8115.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install fastapi uvicorn nest_asyncio pyngrok diffusers transformers torch accelerate safetensors

from fastapi import FastAPI
from pydantic import BaseModel
from diffusers import StableDiffusionPipeline
import torch, nest_asyncio, uvicorn
from pyngrok import ngrok
from io import BytesIO
from PIL import Image
import base64


model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")


app = FastAPI()

class Prompt(BaseModel):
    text: str

@app.post("/generate")
def generate(prompt: Prompt):
    image = pipe(prompt.text, guidance_scale=7.5).images[0]
    buf = BytesIO()
    image.save(buf, format="PNG")
    img_bytes = buf.getvalue()
    img_b64 = base64.b64encode(img_bytes).decode("utf-8")
    return {"image_base64": img_b64}


ngrok.set_auth_token("32HwskmPR0oV34U0nwXei4I5rsR_76VXFVgeTqXxdTpzmhE8h")
public_url = ngrok.connect(8000).public_url
print("🚀 Backend running at:", public_url)

nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=8000)




Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

🚀 Backend running at: https://35886bb86744.ngrok-free.app


INFO:     Started server process [24330]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
