In [1]:
import os
import base64
import json
from openai import OpenAI
from config import OPENAI_API_KEY

## GPT-4.1-mini Pipeline 
**User Input:**

+ The user uploads an **image** and provides a **prompt** describing what changes should be made to the image.

**Steps : GPT-4.1-mini Pipeline** 

+ The uploaded image and the user's prompt are sent to the **GPT-4.1-mini** model.
+ **GPT-4.1-mini** analyzes the visual content of the image and generates a new image based on the prompt and uploaded image.

**Output:**

+ A newly generated image that reflects both the original style and the desired changes from the prompt.
+ The output is saved and logged along with the image paths and generated caption.

In [None]:
client = OpenAI(api_key=OPENAI_API_KEY)

def encode_image_base64(image_path):
    with open(image_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode("utf-8")

# read  rest_input.json file
with open("test_inputs.json", "r", encoding="utf-8") as f:
    prompt_items = json.load(f)

# Took 2 - 5 samples, the hardest ones
# prompt_items = prompt_items[2:5]

os.makedirs("outputs/gpt_4_1_prompt", exist_ok=True)

results = []

for item in prompt_items:
    image_path = item["filename"]
    user_prompt = item["user_prompt"]

    if not os.path.exists(image_path):
        print(f"File not found: {image_path}")
        continue

    print(f"Generating: {image_path} / prompt: {user_prompt}")
    image_b64 = encode_image_base64(image_path)

    try:
        response = client.responses.create(
            model="gpt-4.1-mini",  
            input=[
                {
                    "role": "user",
                    "content": [
                        {"type": "input_text", "text": user_prompt},
                        {"type": "input_image", "image_url": f"data:image/jpeg;base64,{image_b64}"}
                    ]
                }
            ],
            tools=[{"type": "image_generation"}],
        )

        
        outputs = [
            output.result
            for output in response.output
            if output.type == "image_generation_call"
        ]

        output_path = ""
        if outputs:
            output_path = f"outputs/gpt_4_1_prompt/{os.path.splitext(os.path.basename(image_path))[0]}_gen.png"
            with open(output_path, "wb") as f:
                f.write(base64.b64decode(outputs[0]))
            print(f"Saved: {output_path}")

        results.append({
            "original_image": image_path,
            "user_prompt": user_prompt,
            "generated_image_path": output_path
        })

    except Exception as e:
        print(f"Error: {e}")

# save json file
with open("outputs/gpt_4_1_prompt/gpt_4_1_prompt_results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print("\nCompleted.")


Generating: test_images/01_Apple.png / prompt: A soft red tomato with seeds, in flat cartoon vector style, no background
Saved: outputs/gpt_4_1_prompt/01_Apple_gen.png
Generating: test_images/S_02_Apricot.png / prompt: Draw a shiny red apple with a vibrant green leaf in the same digital illustration style as 'S_02_Apricot.png'. The apple should be centered, well-lit, and have soft shadows like the apricot image.
Saved: outputs/gpt_4_1_prompt/S_02_Apricot_gen.png
Generating: test_images/S_03_Avocado_1.png / prompt: Draw a yellow lemon and a sliced lemon in the same illustration style as the uploaded image (S_03_Avocado_1.png). Match the lighting, colors, and vector look.
Saved: outputs/gpt_4_1_prompt/S_03_Avocado_1_gen.png
Generating: test_images/04_Banana.png / prompt: Draw an apricot with a glossy surface in the same cartoon vector style as the image I uploaded.
Saved: outputs/gpt_4_1_prompt/04_Banana_gen.png
Generating: test_images/05_Black_currant.png / prompt: Using the same style 