<a href="https://colab.research.google.com/github/AmritBag/text_to_image/blob/main/text_to_image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [40]:
from diffusers import StableDiffusionPipeline
import torch

def load_pipeline(model_name="runwayml/stable-diffusion-v1-5", use_cuda=True):
    device = "cuda" if use_cuda and torch.cuda.is_available() else "cpu"
    pipe = StableDiffusionPipeline.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
    )
    pipe = pipe.to(device)
    return pipe

def get_user_input():
    def get_input(prompt_text, cast_type=str, default=None):
        user_input = input(f"{prompt_text} " + (f"[default: {default}] " if default else ""))
        if not user_input.strip():
            return default
        try:
            return cast_type(user_input)
        except ValueError:
            print(f"Invalid input. Using default: {default}")
            return default

    prompt = input("Enter your prompt: ")
    negative_prompt = input("Enter your negative prompt (optional): ")

    guidance_scale = get_input("Enter guidance scale (e.g., 7.5):", float, 7.5)
    num_inference_steps = get_input("Enter number of inference steps (e.g., 50):", int, 50)
    height = get_input("Enter image height (e.g., 512):", int, 512)
    width = get_input("Enter image width (e.g., 512):", int, 512)
    path = get_input("Enter path to save image (e.g., ./generated_image.png):", str, "./generated_image.png")

    return {
        "prompt": prompt,
        "negative_prompt": negative_prompt if negative_prompt else "blurry, distorted, low quality",
        "guidance_scale": guidance_scale,
        "num_inference_steps": num_inference_steps,
        "height": height,
        "width": width,
        "path": path
    }


def generate_image(pipe, config):
    image = pipe(
        prompt=config["prompt"],
        negative_prompt=config["negative_prompt"],
        guidance_scale=config["guidance_scale"],
        num_inference_steps=config["num_inference_steps"],
        height=config["height"],
        width=config["width"],
    ).images[0]
    return image
print("Loading Stable Diffusion pipeline...")
pipe = load_pipeline()


Loading Stable Diffusion pipeline...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [35]:
def main():

    print("Getting user input...")
    config = get_user_input()

    print("Generating image...")
    image = generate_image(pipe, config)

    save_path = config.get("path", "generated_image.png")
    image.save(save_path)
    print(f"Image saved at '{save_path}'")

    print("Image generation complete!")

if __name__ == "__main__":
    main()


Getting user input...
Enter your prompt: Humanoid robot wearing over-head headphone
Enter your negative prompt (optional): 
Enter guidance scale (e.g., 7.5): [default: 7.5] 
Enter number of inference steps (e.g., 50): [default: 50] 
Enter image height (e.g., 512): [default: 512] 
Enter image width (e.g., 512): [default: 512] 
Enter path to save image (e.g., ./generated_image.png): [default: ./generated_image.png] ./robot.png
Generating image...


  0%|          | 0/50 [00:00<?, ?it/s]

Image saved at './robot.png'
Image generation complete!
