<a href="https://colab.research.google.com/github/Saadpie1/SteveAI-Image-Generator/blob/main/SteveAI_image_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **ULTRA REALISTIC IMAGE GENERATOR**

In [6]:
# @title Installing
import torch
gpu = torch.cuda.is_available()

!pip install diffusers transformers accelerate safetensors openai deep-translator numba --upgrade
from IPython.display import clear_output
from diffusers import DiffusionPipeline

clear_output()

# Define the model ID for Stable Diffusion XL Base 1.0
MODEL_ID = "stabilityai/stable-diffusion-xl-base-1.0"

# --- Global pipe creation with DiffusionPipeline ---
global pipe # Declare pipe as global

if gpu:
    pipe = DiffusionPipeline.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float16,
        variant="fp16",
        use_safetensors=True
    ).to("cuda")
    clear_output()
    print("✅ GPU usage enabled")
    print("OK! Model: Stable Diffusion XL Base 1.0")
else:
    pipe = DiffusionPipeline.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float32,
        use_safetensors=True
    )
    clear_output()
    print("⚠️ CPU usage (very slow)")
    print("OK! Model: Stable Diffusion XL Base 1.0")

✅ GPU usage enabled
OK! Model: Stable Diffusion XL Base 1.0


In [34]:
# @title Generation1

# Note: The prompt was changed to a safe placeholder.
# SDXL Base 1.0 works best with a Guidance Scale (CFG) between 5.0 and 8.0.
# It also needs more steps than a Turbo model, typically 25 to 50.

from openai import OpenAI
from deep_translator import GoogleTranslator
from diffusers import DiffusionPipeline
from pprint import pprint
from google.colab import files
from google.colab import drive # Import drive

# --- CONFIGURATION ---
a4f_api_key = "ddc-a4f-d61cbe09b0f945ea93403a420dba8155"
a4f_base_url = "https://api.a4f.co/v1"

Prompt = "An aesthetic photograph from 1900 of the best PC gaming setup at the time, realistic, dymanic lighting, 8k."  # @param {type:"string"}
Negative_Prompt = "blurry, low quality, deformed, worst quality, noise, disfigured, text"  # @param {type:"string"}
Steps = 100  # @param {type:"number"}
CFG_Scale = 8.0  # @param {type:"number"}

# --- CLIENT SETUP ---
client = OpenAI(
    api_key=a4f_api_key,
    base_url=a4f_base_url,
)

# --- CHAT COMPLETION WITH A4F ---
def refine_prompt():
    try:
        completion = client.chat.completions.create(
            model="provider-3/gpt-5-nano",
            messages=[
                {"role": "system", "content": "You are a helpful Image generation Prompt refiner. Only give a snippet and chose the best option yourself. Use less than 77 tokens because we are using CLIP text encoder ."},
                {"role": "user", "content": Prompt},
            ],
            temperature=0.7,
            max_tokens=150,
        )
        refined = completion.choices[0].message.content
        print("Refined Prompt:", refined)
        return refined
    except Exception as error:
        print("Error getting chat completion:", error)
        return Prompt

# --- PROMPT TRANSLATION ---
def translate_prompt(prompt):
    try:
        translated = GoogleTranslator(source="auto", target="en").translate(prompt)
        if translated.strip().lower() == prompt.strip().lower():
            print("Prompt already in English. Skipping translation.")
            return prompt
        else:
            print(f"Translated prompt: {translated}")
            return translated
    except Exception as e:
        print("Translation failed:", e)
        return prompt

# --- IMAGE GENERATION ---
def generate_image(prompt_en, negative_prompt, steps, cfg_scale, gpu=True):
    if "pipe" not in globals():

        print("Error: The model pipeline ('pipe') has not been initialized. Please run the installation cell first.")
        return None
    params = dict(
        prompt=prompt_en,
        negative_prompt=negative_prompt,
        num_inference_steps=steps,
        guidance_scale=cfg_scale,
        width=1024,
        height=1024,
    )

    if not gpu:
        print("Warning: Running on CPU will be extremely slow.")

    image = pipe(**params).images[0]
    return image

# --- WORKFLOW ---
refined_prompt = refine_prompt()
PromptEN = translate_prompt(refined_prompt)
image = generate_image(PromptEN, Negative_Prompt, Steps, CFG_Scale, gpu=True)

# Save or display the result
image

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the image in your Drive
save_path = '/content/drive/MyDrive/generated_image_1.png' # You can change the filename

# Save the image to Drive
try:
    image.save(save_path)
    print(f"Image saved to {save_path}")
except Exception as e:
    print(f"Error saving image to Drive: {e}")

Refined Prompt: An aesthetic photograph set in the 1900s, sepia-toned, a reimagined PC gaming setup using brass, wood, vacuum tubes, analog reels, early CRT monitor glow, hyper-detailed, dramatic dynamic lighting, ultra-realistic 8k resolution, cinematic depth of field.
Prompt already in English. Skipping translation.


  0%|          | 0/100 [00:00<?, ?it/s]

Mounted at /content/drive
Image saved to /content/drive/MyDrive/generated_image_1.png


In [36]:
# @title Generation2

from openai import OpenAI
from IPython.display import Image
from deep_translator import GoogleTranslator # Import GoogleTranslator
from google.colab import drive # Import drive
import requests # Import requests

prompt="An aesthetic photograph set in the 1900s, sepia-toned, a reimagined PC gaming setup using brass, wood, vacuum tubes, analog reels, early CRT monitor glow, hyper-detailed, dramatic dynamic lighting, ultra-realistic 8k resolution, cinematic depth of field." # @param {type:"string"}

def refine_prompt():
    try:
        completion = client.chat.completions.create(
            model="provider-3/gpt-5-nano",
            messages=[
                {"role": "system", "content": "You are a helpful Image generation Prompt refiner. Only give a snippet and chose the best option yourself. Use more than detailed tokens because we are using IMAGEgen 4."},
                {"role": "user", "content": prompt},
            ],
            temperature=0.7,
            max_tokens=150,
        )
        refined = completion.choices[0].message.content
        print("Refined Prompt:", refined)
        return refined
    except Exception as error:
        print("Error getting chat completion:", error)
        return prompt

# --- PROMPT TRANSLATION ---
def translate_prompt(prompt):
    try:
        translated = GoogleTranslator(source="auto", target="en").translate(prompt)
        if translated.strip().lower() == prompt.strip().lower():
            print("Prompt already in English. Skipping translation.")
            return prompt
        else:
            print(f"Translated prompt: {translated}")
            return translated
    except Exception as e:
        print("Translation failed:", e)
        return prompt

client = OpenAI(
    api_key="ddc-a4f-d61cbe09b0f945ea93403a420dba8155",
    base_url="https://api.a4f.co/v1"
)

# Refine and translate the prompt
refined_prompt = refine_prompt()
PromptEN = translate_prompt(refined_prompt)


response = client.images.generate(
    prompt=PromptEN, # Moved prompt to the correct position
    model="provider-4/imagen-4",
    n=1,
    size="1024x1024")

image_url = response.data[0].url
print(f"Generated image URL: {image_url}")

display(Image(url=image_url))

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to save the image in your Drive
save_path = '/content/drive/MyDrive/generated_image_2.png' # You can change the filename

# Download and save the image from the URL
try:
    img_data = requests.get(image_url).content
    with open(save_path, 'wb') as handler:
        handler.write(img_data)
    print(f"Image saved to {save_path}")
except Exception as e:
    print(f"Error saving image to Drive: {e}")

Refined Prompt: Ultra-detailed photorealistic 8k image, set in the early 1900s with sepia toning. A reimagined PC gaming setup built from gleaming brass, aged walnut wood, and exposed vacuum tubes, with analog reel-to-reel devices tucked beside a vintage CRT monitor casting a warm amber glow. Rich patina, micro-scratches, and riveted brass trim on every surface. A meticulously polished desk with ornate brass inlays and subtle gear-work, dust motes drifting in sunbeams. Dramatic dynamic lighting: a warm tungsten key light from the left, strong rim light along brass edges, and a faint backlight to separate scene from depthy background. Cinematic depth of field with ultra-shallow DOF—foreground hardware in razor-sharp focus, monitor glow softly reflected on brass and glass, background gently blurred. Hyper-real reflections, film grain, and a vignette to heighten antique atmosphere. Color grading: deep sepia, burnt umber shadows, creamy highlights. Composition: low-angle, close-up on the d

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Image saved to /content/drive/MyDrive/generated_image_2.png
