In [None]:
import os
import base64
import google.generativeai as genai
from diffusers import StableDiffusionInstructPix2PixPipeline
import torch
from PIL import Image

In [None]:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
def load_image_as_base64(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

In [None]:
image_path = "car.jpg"   # your local image
image_b64 = load_image_as_base64(image_path)

In [None]:
model = genai.GenerativeModel("gemini-1.5-flash")

In [None]:
prompt = "Please rectify and describe the car image: adjust brightness/contrast, fix perspective issues, and highlight key details."


In [None]:
response = model.generate_content(
    [
        {"mime_type": "image/jpeg", "data": image_b64},
        prompt
    ]
)

In [None]:
instructions = response.text
print("Gemini says:", instructions)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
    "timbrooks/instruct-pix2pix", torch_dtype=torch.float16
).to(device)

In [None]:
image = Image.open(image_path).convert("RGB")
edited_image = pipe(instructions, image=image, num_inference_steps=30).images[0]

In [None]:
edited_image.save("car_rectified.png")
print("✅ Rectified image saved as car_rectified.png")

/Users/Spandan_Pan/My_Folders/repos/image_process/test.ipynb