In [None]:
# Install dependencies (run once if needed)
# ! pip install requests pillow
# ! pip install torch torchvision xformers  # Only if running SD WebUI locally




## Step 1: Environment Check & Dependencies

Verify that all required packages and CUDA support are available.


## Step 4: Generate and Save Image

Create an image using the enhanced prompts via the Stable Diffusion txt2img API.


In [None]:
import torch
import torchvision
import xformers

print("üîç Environment Check:")
print(f"  PyTorch Version: {torch.__version__}")
print(f"  TorchVision Version: {torchvision.__version__}")
print(f"  CUDA Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"  CUDA Device: {torch.cuda.get_device_name(0)}")
print(f"  xFormers Version: {xformers.__version__}")


    PyTorch 2.1.0+cu121 with CUDA 1201 (you have 2.1.0+cu118)
    Python  3.10.11 (you have 3.10.19)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


2.1.0+cu118
0.16.0+cu118
True
0.0.22.post7


## Step 2: Configuration & Utility Functions

Define API endpoints, available models, and helper functions for model switching and prompt parsing.


In [None]:
import requests
import base64
import json # Included for future-proofing advanced JSON parsing
from typing import Optional, Tuple, Dict, Any
from PIL import Image
from io import BytesIO
from IPython.display import display # Specifically for displaying images in Jupyter/Colab
import time
import os

# ============================================================================
# üîß CONFIGURATION
# ============================================================================

SD_API_URL = "http://127.0.0.1:7860"              # Stable Diffusion WebUI
OLLAMA_URL = "http://localhost:11434/api/generate"  # Ollama API

# API Timeouts
TIMEOUT_OLLAMA = 120 # Seconds for prompt generation
TIMEOUT_SD_API = 300 # Seconds for image generation (adjust based on your GPU speed)

# üé® Available Stable Diffusion Checkpoints (Add the full filename as needed)

# --- Model Auto-Discovery Implementation ---
MODEL_FOLDER_PATH = r"C:\AI\stable-diffusion-webui-master\models\Stable-diffusion"
SUPPORTED_EXTENSIONS = ('.safetensors', '.ckpt')

def auto_discover_models(folder_path: str) -> list[str]:
    """Lists model files in the specified directory."""
    if not os.path.isdir(folder_path):
        print(f"‚ùå Error: Model directory not found at: {folder_path}")
        return []
    
    # Filter files ending with supported extensions
    models = [f for f in os.listdir(folder_path) if f.endswith(SUPPORTED_EXTENSIONS)]
    
    # Simple sort to make selection predictable (alphabetical)
    models.sort()
    return models

AVAILABLE_MODELS = auto_discover_models(MODEL_FOLDER_PATH)

# Check if any models were found
if not AVAILABLE_MODELS:
    print("‚ö†Ô∏è Warning: No models found via auto-discovery. Using a fallback list.")
    AVAILABLE_MODELS = [
        "v1-5-pruned-emaonly.safetensors", # Fallback default
    ]
    MODEL_SELECTION_INDEX = 0
elif len(AVAILABLE_MODELS) > 1:
    MODEL_SELECTION_INDEX = 1 # Select the second model in the list (if available)
else:
    MODEL_SELECTION_INDEX = 0
    
MODEL_TO_USE = AVAILABLE_MODELS[MODEL_SELECTION_INDEX]





# Default negative keywords (used as a base for Ollama or as a fallback)
NEGATIVE_PROMPT_KEYWORDS = "low quality, blurry, worst quality, extra limbs, deformed, bad anatomy, jpeg artifacts"

print(f"‚úÖ Configuration Loaded")
print(f"  Total Models Found: {len(AVAILABLE_MODELS)}")
print(f"  Selected Model Index ({MODEL_SELECTION_INDEX}): {MODEL_TO_USE}")
print(f"  SD API: {SD_API_URL}")
print(f"  Ollama API: {OLLAMA_URL}")


# ============================================================================
# üõ†Ô∏è UTILITY FUNCTIONS
# ============================================================================

def switch_sd_checkpoint(model_name: str, api_url: str = SD_API_URL) -> bool:
    """
    Switches the active Stable Diffusion model checkpoint via the A1111 API.
    
    This function calls the /sdapi/v1/options endpoint to change the currently
    loaded model checkpoint. The model must exist in the WebUI's models directory.
    
    Args:
        model_name (str): Exact filename of the model checkpoint to load
        api_url (str): Base URL of the Stable Diffusion WebUI API
    
    Returns:
        bool: True if the switch was successful, False otherwise
    
    Example:
        >>> switch_sd_checkpoint("realisticVisionV60B1_v51HyperVAE.safetensors")
        ‚úÖ Model switch successful.
        True
    """
    options_endpoint = f"{api_url}/sdapi/v1/options"
    switch_payload = {"sd_model_checkpoint": model_name}
    
    print(f"üîÑ Switching model to: {model_name}")
    
    try:
        response = requests.post(options_endpoint, json=switch_payload, timeout=30)
        
        if response.status_code == 200:
            print("‚úÖ Model switch successful.")
            time.sleep(2)  # Brief pause to ensure model is loaded
            return True
        else:
            print(f"‚ùå API Call Failed. Status Code: {response.status_code}")
            try:
                print("   API Error Details:", response.json())
            except requests.exceptions.JSONDecodeError:
                print("   Raw Response:", response.text)
            return False

    except requests.exceptions.RequestException as e:
        print(f"‚ùå Connection/Request Error: {e}")
        print("   Please ensure the Stable Diffusion Web UI is running with the --api flag.")
        return False


def parse_prompt_output(prompt_string: str) -> Tuple[str, str]:
    """
    Intelligently parses LLM output into positive and negative prompt components.
    
    This function attempts to split the LLM's response using common delimiters
    that separate positive and negative prompts. It supports multiple delimiter
    formats to handle variations in LLM output formatting.
    
    Supported delimiters (case-sensitive):
    - "NEGATIVE PROMPT:"
    - "Negative Prompt:"
    - "NEGATIVE:"
    - "Negative:"
    
    Args:
        prompt_string (str): Raw output from the Ollama LLM
    
    Returns:
        Tuple[str, str]: A tuple of (positive_prompt, negative_prompt)
                        If no delimiter is found, returns (full_string, "")
    
    Example:
        >>> output = "Beautiful sunset. Negative Prompt: ugly, blurry"
        >>> parse_prompt_output(output)
        ('Beautiful sunset.', 'ugly, blurry')
    """
    
    # Define common delimiters the prompt generator model might use
    delimiters = ["NEGATIVE PROMPT:", "Negative Prompt:", "NEGATIVE:", "Negative:"]
    
    for delimiter in delimiters:
        if delimiter in prompt_string:
            parts = prompt_string.split(delimiter, 1)
            positive = parts[0].strip().strip(',').strip()
            negative = parts[1].strip().strip(',').strip()
            return positive, negative

    # Fallback: If no delimiter found, assume the whole string is the positive prompt
    print("‚ö†Ô∏è Warning: Could not find negative prompt delimiter in LLM output.")
    print("   Using full string as positive prompt with empty negative prompt.")
    return prompt_string, ""


## Step 3: Generate Enhanced Prompt with Ollama

Use the local Ollama LLM to transform a basic idea into a detailed, high-quality prompt.
The model will generate a prompt with both positive (main description) and negative (what to avoid) components.


In [None]:

# ============================================================================
# ü§ñ STEP 1: OLLAMA PROMPT GENERATION
# ============================================================================

print("=" * 70)
print("ü§ñ STEP 1: Generating Enhanced Prompt with Ollama")
print("=" * 70)

NEGATIVE_PROMPT_KEYWORDS = "low quality, blurry, worst quality, extra limbs, deformed, bad anatomy, jpeg artifacts"

# Define a base prompt idea (customize this!)
base_prompt = (
    "A portrait of a young woman, golden hour sunlight, soft focus, vivid colors, intricate details. "
    "Generate a Stable Diffusion prompt that includes a detailed positive prompt and a section for a "
    f"negative prompt with these keywords: {NEGATIVE_PROMPT_KEYWORDS}. "
    "Ensure the negative section is clearly labeled 'Negative Prompt:'"
)

ollama_data = {
    "model": "brxce/stable-diffusion-prompt-generator",
    "prompt": base_prompt,
    "format": "json",
    "stream": False
}

enhanced_prompt = None
positive_prompt = ""
negative_prompt = NEGATIVE_PROMPT_KEYWORDS  # Default fallback

print(f"üìù Base Prompt: {base_prompt[:80]}...")
print(f"üîÑ Calling Ollama API: {OLLAMA_URL}")

try:
    ollama_response = requests.post(OLLAMA_URL, json=ollama_data, timeout=TIMEOUT_OLLAMA)
    
    if ollama_response.status_code == 200:
        prompt_json = ollama_response.json()
        if 'response' in prompt_json:
            enhanced_prompt = prompt_json.get("response")
            positive_prompt, negative_prompt = parse_prompt_output(enhanced_prompt)
            print("‚úÖ Ollama Prompt Generation Success!")
            print(f"\nüìå Generated Positive Prompt:")
            print(f"   {positive_prompt[:100]}...")
            print(f"\n‚õî Generated Negative Prompt:")
            print(f"   {negative_prompt}")
        else:
            print("‚ùå Ollama response missing 'response' key.")
            print(f"   Response keys: {prompt_json.keys()}")
    else:
        print(f"‚ùå Ollama API call failed (Status: {ollama_response.status_code})")
        print(f"   Response: {ollama_response.text}")

except requests.exceptions.ConnectionError:
    print("‚ùå FATAL ERROR: Could not connect to Ollama.")
    print(f"   Make sure Ollama is running: ollama serve")
    print(f"   And the model is pulled: ollama pull brxce/stable-diffusion-prompt-generator")

except requests.exceptions.Timeout:
    print("‚ùå Timeout: Ollama took too long to respond.")
    print("   Try increasing the timeout value or check system resources.")

# Fallback in case Ollama fails
if not positive_prompt:
    positive_prompt = "A masterwork portrait of a young woman, volumetric golden hour light, highly detailed face, cinematic focus, professional photography"
    negative_prompt = NEGATIVE_PROMPT_KEYWORDS
    print("\n‚ö†Ô∏è Using FALLBACK Prompts (Ollama failed or not responding).")
    print(f"   Positive: {positive_prompt}")
    print(f"   Negative: {negative_prompt}")


## Step 2: Switch Active Model

Select and load the desired Stable Diffusion checkpoint from the AVAILABLE_MODELS list.


In [None]:

# ============================================================================
# üîÑ STEP 2: STABLE DIFFUSION MODEL SWITCHING
# ============================================================================

print("\n" + "=" * 70)
print("üîÑ STEP 2: Switching Stable Diffusion Model")
print("=" * 70)

success = switch_sd_checkpoint(MODEL_TO_USE)

if not success:
    print("\n‚ö†Ô∏è Warning: Model switch failed. Proceeding anyway...")
    print("   The WebUI may still have loaded the model or it might not exist.")



## üîÑ Step 3: Switching Stable Diffusion Model
Attempting to switch model to: **protogenX34Pruned.8NEd.safetensors [ef8629e2c8]**
‚úÖ Model switch successful.


True

## Step 3: Generate Image via Stable Diffusion

Use the enhanced prompts to generate an image. The image will be saved and displayed.


In [None]:

# ============================================================================
# üñºÔ∏è STEP 3: STABLE DIFFUSION IMAGE GENERATION
# ============================================================================

print("\n" + "=" * 70)
print("üñºÔ∏è STEP 3: Generating Image via txt2img API")
print("=" * 70)

# Configure generation parameters
sd_payload = {
    "prompt": positive_prompt,
    "negative_prompt": negative_prompt,
    "steps": 25,                      # Quality vs speed trade-off
    "cfg_scale": 7,                   # Prompt adherence (5-15 typical)
    "sampler_index": "Euler a",       # Sampling algorithm
    "width": 768,                     # Must be multiple of 64
    "height": 512,                    # Must be multiple of 64
    "batch_size": 1                   # Number of images to generate
}

print(f"üìã Generation Parameters:")
print(f"   Steps: {sd_payload['steps']}")
print(f"   CFG Scale: {sd_payload['cfg_scale']}")
print(f"   Resolution: {sd_payload['width']}x{sd_payload['height']}")
print(f"   Sampler: {sd_payload['sampler_index']}")
print(f"\n‚è≥ Generating image... (this may take 30-60 seconds)")

try:
    # Call the txt2img API with extended timeout for generation
    start_time = time.time()
    sd_response = requests.post(
        f"{SD_API_URL}/sdapi/v1/txt2img", 
        json=sd_payload, 
        timeout=TIMEOUT_SD_API  # 5 minute timeout for generation
    )
    elapsed_time = time.time() - start_time
    
    sd_response.raise_for_status()  # Raise HTTPError for bad responses
    
    sd_result = sd_response.json()

    if 'images' in sd_result and sd_result['images']:
        print(f"‚úÖ Image generation successful in {elapsed_time:.1f} seconds!")
        print(f"   Generated {len(sd_result['images'])} image(s)")
        
        # Process and save the first image
        img_b64 = sd_result['images'][0]
        img_bytes = base64.b64decode(img_b64)
        img = Image.open(BytesIO(img_bytes))
        
        # Generate filename from model name
        model_short_name = MODEL_TO_USE.split('.')[0][:20]
        filename = f"sd_generated_output_{model_short_name}_{int(time.time())}.png"
        img.save(filename)
        print(f"\nüñºÔ∏è Image saved as: {filename}")
        print(f"   Dimensions: {img.size[0]}x{img.size[1]}")
        
        # Display image in Jupyter if available
        try:
            from IPython.display import display
            display(img)
        except ImportError:
            print("   (Image not displayed - not running in Jupyter)")

    else:
        print("‚ùå Image generation failed.")
        print(f"   'images' key not found or list is empty.")
        print(f"   Response keys: {sd_result.keys()}")
        if 'info' in sd_result:
            print(f"   Info: {sd_result['info']}")

except requests.exceptions.HTTPError as errh:
    print(f"‚ùå HTTP Error (SD API): {errh}")
    print(f"   Status Code: {sd_response.status_code}")
    print(f"   Response: {sd_response.text}")
    
except requests.exceptions.Timeout:
    print(f"‚ùå Timeout Error: Image generation took too long (>300s)")
    print(f"   Try reducing resolution, steps, or batch_size")
    
except requests.exceptions.ConnectionError:
    print(f"‚ùå Connection Error: Could not reach Stable Diffusion WebUI")
    print(f"   Make sure SD WebUI is running with: python launch.py --api --no-half")
    
except requests.exceptions.RequestException as err:
    print(f"‚ùå Request Error (SD API): {err}")


In [None]:
## üìù Notes & Next Steps

### Tips for Better Results:
- **Adjust `cfg_scale`** (7-10): Higher values enforce the prompt more strictly
- **Increase `steps`** (30-50): More steps = better quality but slower generation
- **Modify `sampler_index`**: Try "DPM++ 2M", "DDIM", or others
- **Change resolution**: Use 512√ó512 for faster generation, 768√ó768+ for more detail
- **Customize prompts**: Edit the `base_prompt` and `NEGATIVE_PROMPT_KEYWORDS`

### Troubleshooting:
- If you get a connection error, ensure both services are running:
  - Ollama: `ollama serve`
  - SD WebUI: `python launch.py --api --no-half`
- If image generation is slow, reduce `steps` or `resolution`
- If you get CUDA errors, add `--lowvram` flag to the SD WebUI launch command

### Generate Multiple Images:
Run the image generation cell multiple times with different prompts or parameters.
