In [None]:
# Step 1: Install Required Dependencies
# This cell installs all necessary libraries for the AI Image Generator

!pip install -q diffusers transformers accelerate safetensors
!pip install -q torch torchvision
!pip install -q Pillow
!pip install -q gradio  # Using Gradio instead of Streamlit for Colab compatibility

print("✓ All dependencies installed successfully!")

✓ All dependencies installed successfully!


In [None]:
# Step 2: Import Libraries and Check GPU Availability

import torch
import os
from datetime import datetime
from PIL import Image
import json
import re
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
import gradio as gr
import warnings
warnings.filterwarnings('ignore')

# Check device availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\u2713 Using device: {device}")

if device == "cuda":
    print(f"  GPU: {torch.cuda.get_device_name(0)}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("  ⚠ Running on CPU - Generation will be slower")

# Create output directory
os.makedirs('/content/outputs', exist_ok=True)
print(f"\u2713 Output directory created at: /content/outputs")

✓ Using device: cuda
  GPU: Tesla T4
  Memory: 15.83 GB
✓ Output directory created at: /content/outputs


In [None]:
# Step 3: Load Stable Diffusion Model
# Using Stable Diffusion v1.5 for compatibility and speed

print("Loading Stable Diffusion model...")
print("This may take a few minutes on first run...\n")

model_id = "runwayml/stable-diffusion-v1-5"

try:
    # Load pipeline with optimizations
    pipe = StableDiffusionPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        safety_checker=StableDiffusionSafetyChecker.from_pretrained(
            "CompVis/stable-diffusion-safety-checker"
        ),
    )

    # Use efficient scheduler
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

    # Move to device
    pipe = pipe.to(device)

    # Enable memory optimization for CPU
    if device == "cpu":
        pipe.enable_attention_slicing()
        print("✓ Enabled attention slicing for CPU optimization")
    else:
        # Enable memory efficient attention for GPU
        try:
            pipe.enable_xformers_memory_efficient_attention()
            print("✓ Enabled xformers memory efficient attention")
        except:
            print("⚠ xformers not available, using default attention")

    print(f"\n✓ Model loaded successfully on {device}!")
    print(f"  Model: {model_id}")

except Exception as e:
    print(f"❌ Error loading model: {str(e)}")
    print("\nTrying alternative loading method...")
    # Fallback to basic loading
    pipe = StableDiffusionPipeline.from_pretrained(model_id)
    pipe = pipe.to(device)
    print("✓ Model loaded with basic configuration")

Loading Stable Diffusion model...
This may take a few minutes on first run...



config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 13 files:   0%|          | 0/13 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!
You have passed a non-standard module StableDiffusionSafetyChecker(
  (vision_model): CLIPVisionModel(
    (vision_model): CLIPVisionTransformer(
      (embeddings): CLIPVisionEmbeddings(
        (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14), bias=False)
        (position_embedding): Embedding(257, 1024)
      )
      (pre_layrnorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (encoder): CLIPEncoder(
        (layers): ModuleList(
          (0-23): 24 x CLIPEncoderLayer(
            (self_attn): CLIPAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
            )
            (layer_norm1): LayerNorm((1024,), eps=1e-05, el

⚠ xformers not available, using default attention

✓ Model loaded successfully on cuda!
  Model: runwayml/stable-diffusion-v1-5


In [None]:
# Step 4: Prompt Engineering and Content Filtering

NSFW_KEYWORDS = ['nude', 'naked', 'nsfw', 'xxx', 'porn', 'sexual', 'explicit', 'violent', 'gore', 'blood', 'weapon']

def check_prompt_safety(prompt):
    prompt_lower = prompt.lower()
    for keyword in NSFW_KEYWORDS:
        if keyword in prompt_lower:
            return False, f"❌ Inappropriate content: '{keyword}'"
    return True, "✓ Safe"

def enhance_prompt(prompt, style="photorealistic"):
    styles = {
        "photorealistic": "highly detailed, 4K, professional photography, sharp focus, realistic lighting",
        "artistic": "beautiful, artistic, detailed painting, masterpiece, trending on artstation",
        "cartoon": "cartoon style, colorful, vibrant, digital art, cute, animated",
        "cinematic": "cinematic lighting, dramatic, epic scene, movie poster style",
        "fantasy": "fantasy art, magical, ethereal, detailed, mystical atmosphere"
    }
    return f"{prompt}, {styles.get(style, styles['photorealistic'])}"

def add_watermark(img):
    from PIL import ImageDraw, ImageFont
    draw = ImageDraw.Draw(img)
    text = "AI Generated - Talrn Assessment"
    font = ImageFont.load_default()
    w, h = img.size
    draw.text((w-180, h-20), text, fill=(255,255,255), font=font)
    draw.text((w-181, h-21), text, fill=(0,0,0), font=font)  # shadow
    return img

print("✓ Safety and prompt engineering ready")


✓ Safety and prompt engineering ready


In [None]:
# Step 5: Image Generation Function with Storage

def generate_images(prompt, num_images=1, style="photorealistic", negative_prompt="", steps=20):
    """
    Generate images from text prompt with safety checks and metadata storage
    """
    # Safety check
    is_safe, safety_msg = check_prompt_safety(prompt)
    if not is_safe:
        return None, safety_msg

    # Enhance prompt
    enhanced_prompt = enhance_prompt(prompt, style)

    # Default negative prompt
    if not negative_prompt:
        negative_prompt = "blurry, bad quality, distorted, ugly, low resolution"

    print(f"\n✓ Generating {num_images} image(s)...")
    print(f"  Original prompt: {prompt}")
    print(f"  Enhanced prompt: {enhanced_prompt}")
    print(f"  Style: {style}")
    print(f"  Inference steps: {steps}")

    generated_images = []
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    try:
        for i in range(num_images):
            print(f"\n  Generating image {i+1}/{num_images}...")

            # Generate image
            with torch.no_grad():
                image = pipe(
                    enhanced_prompt,
                    negative_prompt=negative_prompt,
                    num_inference_steps=steps,
                    guidance_scale=7.5,
                    height=512,
                    width=512
                ).images[0]

            # Add watermark
            image = add_watermark(image)

            # Save image with metadata
            filename = f"image_{timestamp}_{i+1}.png"
            filepath = os.path.join('/content/outputs', filename)
            image.save(filepath)

            # Save metadata
            metadata = {
                "filename": filename,
                "prompt": prompt,
                "enhanced_prompt": enhanced_prompt,
                "style": style,
                "negative_prompt": negative_prompt,
                "steps": steps,
                "timestamp": timestamp,
                "device": device
            }

            metadata_file = filepath.replace('.png', '_metadata.json')
            with open(metadata_file, 'w') as f:
                json.dump(metadata, f, indent=2)

            generated_images.append(image)
            print(f"  ✓ Saved: {filepath}")

        print(f"\n✓ Successfully generated {num_images} image(s)!")
        return generated_images, f"✓ Successfully generated {num_images} image(s)"

    except Exception as e:
        error_msg = f"❌ Error: {str(e)}"
        print(error_msg)
        return None, error_msg

print("✓ Image generation function ready")

✓ Image generation function ready


In [None]:
# Step 6: Build Gradio Web Interface

def gradio_generate(prompt, num_images, style, negative_prompt, steps):
    """
    Wrapper function for Gradio interface
    """
    images, message = generate_images(prompt, int(num_images), style, negative_prompt, int(steps))
    if images is None:
        return [], message
    return images, message

# Create Gradio interface
with gr.Blocks(title="AI Image Generator - Talrn Assessment") as demo:
    gr.Markdown("""
    # 🎨 AI-Powered Image Generator
    ### Talrn ML Internship Task - Text-to-Image Generation

    Generate high-quality images from text descriptions using Stable Diffusion.

    **Features:**
    - Multiple style presets (Photorealistic, Artistic, Cartoon, Cinematic, Fantasy)
    - Content filtering for safe generation
    - Automatic watermarking
    - Metadata storage with each image
    """)

    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="📝 Prompt",
                placeholder="Describe the image you want to generate...",
                lines=3
            )

            style_dropdown = gr.Dropdown(
                choices=["photorealistic", "artistic", "cartoon", "cinematic", "fantasy"],
                value="photorealistic",
                label="🎨 Style"
            )

            with gr.Row():
                num_images_slider = gr.Slider(
                    minimum=1,
                    maximum=4,
                    value=1,
                    step=1,
                    label="🔢 Number of Images"
                )

                steps_slider = gr.Slider(
                    minimum=10,
                    maximum=50,
                    value=20,
                    step=5,
                    label="🔄 Inference Steps (Higher = Better Quality)"
                )

            negative_prompt_input = gr.Textbox(
                label="⛔ Negative Prompt (Optional)",
                placeholder="What to avoid in the image...",
                lines=2,
                value="blurry, bad quality, distorted, ugly, low resolution"
            )

            generate_btn = gr.Button("✨ Generate Images", variant="primary", size="lg")

        with gr.Column():
            output_gallery = gr.Gallery(
                label="🖼️ Generated Images",
                columns=2,
                height="auto"
            )
            status_output = gr.Textbox(label="📊 Status", interactive=False)

    # Examples
    gr.Markdown("### 💡 Example Prompts")
    gr.Examples(
        examples=[
            ["A futuristic city at sunset", 1, "cinematic", "", 20],
            ["Portrait of a robot in Van Gogh style", 1, "artistic", "", 25],
            ["A cute cat wearing a wizard hat", 1, "cartoon", "", 20],
            ["A mystical forest with glowing mushrooms", 1, "fantasy", "", 25],
            ["Professional photo of a mountain landscape", 1, "photorealistic", "", 30]
        ],
        inputs=[prompt_input, num_images_slider, style_dropdown, negative_prompt_input, steps_slider]
    )

    # Connect button to function
    generate_btn.click(
        fn=gradio_generate,
        inputs=[prompt_input, num_images_slider, style_dropdown, negative_prompt_input, steps_slider],
        outputs=[output_gallery, status_output]
    )

    gr.Markdown("""
    ---
    **ℹ️ Note:** Images are automatically saved to `/content/outputs/` with metadata.

    **⚠️ Responsible Use Guidelines:**
    - Do not generate inappropriate, violent, or harmful content
    - All images are watermarked to indicate AI generation
    - Content filtering is active to prevent misuse
    """)

print("✓ Gradio interface created!")
print("\nLaunching web interface...\n")

✓ Gradio interface created!

Launching web interface...



In [None]:
# Step 7: Launch the Web Interface

# Launch with share=True to get a public URL
demo.launch(share=True, debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2c71e48cf6897dc564.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




---

# 📚 PROJECT DOCUMENTATION

## AI-Powered Image Generator - Talrn ML Internship Assessment

### 🎯 Project Overview
This project implements a complete **Text-to-Image Generation System** using Stable Diffusion v1.5, featuring a modern Gradio web interface, content filtering, prompt engineering, and comprehensive metadata storage.

### ✨ Key Features
- **Multi-Style Generation**: 5 style presets (Photorealistic, Artistic, Cartoon, Cinematic, Fantasy)
- **Safety First**: Automatic content filtering for inappropriate prompts
- **Smart Prompts**: Built-in prompt engineering to enhance quality
- **Metadata Storage**: Every image saved with full generation parameters
- **Watermarking**: All images marked as AI-generated
- **GPU/CPU Support**: Automatic device detection with CPU fallback
- **Web Interface**: User-friendly Gradio UI with live preview

### 🛠️ Technology Stack
- **Model**: Stable Diffusion v1.5 (runwayml/stable-diffusion-v1-5)
- **Framework**: PyTorch
- **UI**: Gradio
- **Libraries**: Diffusers, Transformers, Accelerate, PIL

### 💻 Hardware Requirements
- **GPU**: Recommended (15GB+ VRAM for optimal performance)
- **CPU**: Supported with reduced speed (16GB+ RAM recommended)
- **Storage**: ~5GB for model weights

### 🚀 Quick Start (This Colab)
1. Run all cells in order (Ctrl+F9)
2. Wait for model download (~2-3 minutes)
3. Access the Gradio interface (URL appears after launch)
4. Enter your prompt and generate!

### 📁 Project Structure
```
/content/outputs/          # Generated images & metadata
Step 1: Dependencies       # Install required packages
Step 2: Imports & Setup    # Initialize environment
Step 3: Model Loading      # Load Stable Diffusion
Step 4: Prompt Engineering # Safety & enhancement
Step 5: Generation Logic   # Core image generation
Step 6: Gradio Interface   # Web UI
Step 7: Launch            # Start server
```

### 💡 Prompt Engineering Tips
- **Be Specific**: "A red sports car at sunset" > "a car"
- **Add Quality Terms**: The system auto-adds "4K, detailed, professional"
- **Use Negative Prompts**: Avoid unwanted elements
- **Experiment with Styles**: Each style adds unique characteristics

### 🔒 Responsible AI Use
⚠️ **Content Policy**:
- No violent, explicit, or harmful content
- All images are watermarked
- Content filtering is always active
- Respect intellectual property

### 📊 Performance Notes
- **GPU (T4)**: ~15-20 seconds per image
- **CPU**: ~2-5 minutes per image
- **Steps**: 20 (balanced), 30+ (higher quality, slower)

### 🔧 Customization Options
You can modify:
- Image resolution (currently 512x512)
- Inference steps (10-50)
- Guidance scale (default 7.5)
- Number of images per batch (1-4)

### 💾 Output Files
Each generation creates:
```
image_YYYYMMDD_HHMMSS_1.png     # The generated image
image_YYYYMMDD_HHMMSS_1_metadata.json  # Full parameters
```

### ❗ Limitations
- Generation time varies with hardware
- CPU mode is significantly slower
- Limited to 512x512 resolution (optimized for speed)
- Public Gradio links expire after 72 hours

### 🚀 Future Enhancements
- Higher resolution support (768x768, 1024x1024)
- Custom model fine-tuning
- Img2img transformation
- Inpainting capabilities
- Style transfer features
- Batch processing

### 📝 Example Prompts
1. **Landscape**: "A serene mountain lake at golden hour, mist rising, professional photography"
2. **Portrait**: "Portrait of a cyberpunk character, neon lights, highly detailed, digital art"
3. **Abstract**: "Colorful abstract painting, flowing shapes, vibrant colors, modern art"
4. **Fantasy**: "A magical forest with bioluminescent plants, ethereal atmosphere, fantasy art"

### 👤 Author
Submitted for Talrn ML Internship Assessment

### 💬 Support
For questions about this implementation, refer to the inline code comments or documentation.

---