In [None]:
#1) Install Dependencies
!pip -q install -U \
    "diffusers==0.21.4" \
    "transformers==4.38.2" \
    "accelerate==0.27.2" \
    "safetensors==0.4.3" \
    "huggingface_hub==0.20.3" \
    "open_clip_torch==2.24.0" \
    "pillow==10.4.0" \
    matplotlib tqdm

print('All dependencies installed')

In [None]:
#2) Imports & Helpers
import os, time, warnings
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from tqdm.auto import tqdm

import torch
from diffusers import StableDiffusionPipeline
import open_clip

warnings.filterwarnings('ignore')

# Device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DTYPE  = torch.float16 if DEVICE == 'cuda' else torch.float32
print(f'ðŸ”§ Device : {DEVICE}')
if DEVICE == 'cuda':
    print(f'   GPU    : {torch.cuda.get_device_name(0)}')
    print(f'   VRAM   : {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')

# Helpers
def ensure_dir(path):
    os.makedirs(path, exist_ok=True)
    return path

OUT_A = ensure_dir('outputs_model_A')
OUT_B = ensure_dir('outputs_model_B')

def save_pil_images(images, out_dir, prefix):
    paths = []
    for i, img in enumerate(images, 1):
        p = os.path.join(out_dir, f'{prefix}_{i}.png')
        img.save(p)
        paths.append(p)
    return paths

print('Imports & helpers ready')

In [None]:
#3) Define Prompts
PROMPTS = [
    'a futuristic cyberpunk street at night, neon lights, rain, ultra-detailed, cinematic',
    'a robot barista serving coffee in a cozy cafe, warm lighting, cinematic',
    'a floating castle above the clouds, sunrise, epic fantasy art, highly detailed',
]

NUM_IMAGES = 2    # images per prompt per model
STEPS      = 28   # inference steps (tuned for T4)
GUIDANCE   = 7.5  # classifier-free guidance scale

print(f'{len(PROMPTS)} prompts Ã— {NUM_IMAGES} images Ã— 2 models = {len(PROMPTS)*NUM_IMAGES*2} total images')
for i, p in enumerate(PROMPTS, 1):
    print(f'  {i}. {p}')

In [None]:
#4)  Load & Run Model A (SD v1.5)
# â”€â”€ Model A: Stable Diffusion v1.5 (runwayml) â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
# Same weights referenced in Ref #1 (TF/KerasCV SD Tutorial), PyTorch backend
MODEL_A_ID    = 'runwayml/stable-diffusion-v1-5'
MODEL_A_LABEL = 'SD v1.5 (runwayml)'

print(f'Loading {MODEL_A_LABEL} â€¦')
pipe_a = StableDiffusionPipeline.from_pretrained(
    MODEL_A_ID,
    torch_dtype=DTYPE,
    safety_checker=None,
    requires_safety_checker=False,
).to(DEVICE)
print(f'{MODEL_A_LABEL} loaded')

results_A = {}
times_A   = {}

for idx, prompt in enumerate(PROMPTS, 1):
    print(f'\n[Model A] Prompt {idx}/{len(PROMPTS)}: "{prompt[:60]}â€¦"')
    t0  = time.time()
    out = pipe_a(
        prompt,
        num_images_per_prompt=NUM_IMAGES,
        guidance_scale=GUIDANCE,
        num_inference_steps=STEPS,
        height=512, width=512,
    )
    dt = time.time() - t0
    times_A[prompt]   = dt
    results_A[prompt] = save_pil_images(out.images, OUT_A, f'prompt{idx}')
    print(f' Done in {dt:.1f}s  â†’  saved {len(out.images)} images')

# Free VRAM before loading Model B
del pipe_a
torch.cuda.empty_cache()
print('\nModel A complete. VRAM released.')

In [None]:
# â”€â”€ HuggingFace Login â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€
from huggingface_hub import login

login(token="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")  # paste your huggingface token here
print("Logged in to HuggingFace")

In [None]:
#5)  Load & Run Model B (SD v2.1)
# â”€â”€ Model B: CompVis/stable-diffusion-v1-4 (100% public, no auth needed) â”€â”€â”€â”€â”€â”€
# Different from Model A (v1.5): earlier checkpoint, different training mix,
# good for comparison â€” same concept as DALL-E Mini (open generative model)

MODEL_B_ID    = 'CompVis/stable-diffusion-v1-4'
MODEL_B_LABEL = 'SD v1.4 (CompVis)'

print(f'Loading {MODEL_B_LABEL} â€¦')
pipe_b = StableDiffusionPipeline.from_pretrained(
    MODEL_B_ID,
    torch_dtype=DTYPE,
    safety_checker=None,
    requires_safety_checker=False,
).to(DEVICE)
print(f'{MODEL_B_LABEL} loaded')

results_B = {}
times_B   = {}

for idx, prompt in enumerate(PROMPTS, 1):
    print(f'\n[Model B] Prompt {idx}/{len(PROMPTS)}: "{prompt[:60]}â€¦"')
    t0  = time.time()
    out = pipe_b(
        prompt,
        num_images_per_prompt=NUM_IMAGES,
        guidance_scale=GUIDANCE,
        num_inference_steps=STEPS,
        height=512, width=512,
    )
    dt = time.time() - t0
    times_B[prompt]   = dt
    results_B[prompt] = save_pil_images(out.images, OUT_B, f'prompt{idx}')
    print(f'   Done in {dt:.1f}s  â†’  saved {len(out.images)} images')

del pipe_b
torch.cuda.empty_cache()
print(f'\nModel B ({MODEL_B_LABEL}) complete. VRAM released.')

In [None]:
#6) Visual Comparison Grids
COLOR_A = '#7B5EA7'
COLOR_B = '#E8B84B'

def show_comparison(prompt, paths_a, paths_b):
    fig, axes = plt.subplots(2, 2, figsize=(12, 11))
    fig.patch.set_facecolor('#0f0f1a')

    pairs = [
        (paths_a[0], 'SD v1.5  Â·  Image 1', COLOR_A),
        (paths_b[0], 'SD v2.1  Â·  Image 1', COLOR_B),
        (paths_a[1], 'SD v1.5  Â·  Image 2', COLOR_A),
        (paths_b[1], 'SD v2.1  Â·  Image 2', COLOR_B),
    ]

    for ax, (path, title, color) in zip(axes.flat, pairs):
        ax.imshow(np.array(Image.open(path)))
        ax.axis('off')
        ax.set_title(title, color=color, fontsize=11, fontweight='bold', pad=8)
        for spine in ax.spines.values():
            spine.set_edgecolor(color)
            spine.set_linewidth(2)
            spine.set_visible(True)

    short = prompt if len(prompt) < 70 else prompt[:67] + 'â€¦'
    fig.suptitle(f' {short}', color='white', fontsize=12, y=1.01, style='italic')
    plt.tight_layout()
    plt.show()

for prompt in PROMPTS:
    show_comparison(prompt, results_A[prompt], results_B[prompt])

In [None]:
#7) CLIP Similarity Scoring
print('Loading CLIP ViT-B/32 â€¦')
clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
    'ViT-B-32', pretrained='openai'
)
clip_model = clip_model.to(DEVICE).eval()
clip_tok   = open_clip.get_tokenizer('ViT-B-32')
print('CLIP ready')

@torch.no_grad()
def clip_score(prompt, image_path):
    img    = clip_preprocess(Image.open(image_path).convert('RGB')).unsqueeze(0).to(DEVICE)
    txt    = clip_tok([prompt]).to(DEVICE)
    i_feat = clip_model.encode_image(img)
    t_feat = clip_model.encode_text(txt)
    i_feat /= i_feat.norm(dim=-1, keepdim=True)
    t_feat /= t_feat.norm(dim=-1, keepdim=True)
    return (i_feat * t_feat).sum().item()

clip_scores_A = {}
clip_scores_B = {}

for prompt in PROMPTS:
    clip_scores_A[prompt] = [clip_score(prompt, p) for p in results_A[prompt]]
    clip_scores_B[prompt] = [clip_score(prompt, p) for p in results_B[prompt]]
    print(f'\nPrompt: "{prompt[:55]}â€¦"')
    print(f'  SD v1.5 scores : {[f"{s:.4f}" for s in clip_scores_A[prompt]]}  avg={np.mean(clip_scores_A[prompt]):.4f}')
    print(f'  SD v2.1 scores : {[f"{s:.4f}" for s in clip_scores_B[prompt]]}  avg={np.mean(clip_scores_B[prompt]):.4f}')

In [None]:
#8)  Results Tables
def print_table(rows, headers):
    widths = [len(h) for h in headers]
    for r in rows:
        for i, c in enumerate(r):
            widths[i] = max(widths[i], len(str(c)))
    sep = '-+-'.join('-' * w for w in widths)
    print(' | '.join(str(h).ljust(widths[i]) for i, h in enumerate(headers)))
    print(sep)
    for r in rows:
        print(' | '.join(str(c).ljust(widths[i]) for i, c in enumerate(r)))

# Detailed table
print('\n DETAILED CLIP SCORES\n')
detail_rows = []
for prompt in PROMPTS:
    short = prompt[:48] + 'â€¦' if len(prompt) > 48 else prompt
    for i, s in enumerate(clip_scores_A[prompt], 1):
        detail_rows.append([short, 'SD v1.5', i, f'{s:.4f}', f'{times_A[prompt]:.1f}s'])
    for i, s in enumerate(clip_scores_B[prompt], 1):
        detail_rows.append([short, 'SD v2.1', i, f'{s:.4f}', f'{times_B[prompt]:.1f}s'])
print_table(detail_rows, ['Prompt', 'Model', 'Img#', 'CLIP Score', 'Time'])

# Summary table
print('\n\n SUMMARY\n')
summary_rows = []
for prompt in PROMPTS:
    short  = prompt[:48] + 'â€¦' if len(prompt) > 48 else prompt
    avg_a  = np.mean(clip_scores_A[prompt])
    avg_b  = np.mean(clip_scores_B[prompt])
    winner = 'SD v1.5 âœ“' if avg_a >= avg_b else 'SD v2.1 âœ“'
    summary_rows.append([short, f'{avg_a:.4f}', f'{times_A[prompt]:.1f}s',
                                 f'{avg_b:.4f}', f'{times_B[prompt]:.1f}s', winner])
print_table(summary_rows, ['Prompt', 'A AvgCLIP', 'A Time', 'B AvgCLIP', 'B Time', 'Winner'])

In [None]:
#9) Bar Charts
labels      = [f'P{i+1}' for i in range(len(PROMPTS))]
avgs_a      = [np.mean(clip_scores_A[p]) for p in PROMPTS]
avgs_b      = [np.mean(clip_scores_B[p]) for p in PROMPTS]
times_a_lst = [times_A[p] for p in PROMPTS]
times_b_lst = [times_B[p] for p in PROMPTS]

x, w = np.arange(len(labels)), 0.35
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
fig.patch.set_facecolor('#0f0f1a')

for ax in (ax1, ax2):
    ax.set_facecolor('#16162a')
    ax.tick_params(colors='white')
    for spine in ax.spines.values():
        spine.set_color('#333355')

# CLIP chart
b1 = ax1.bar(x - w/2, avgs_a, w, label='SD v1.5', color='#7B5EA7', alpha=0.9)
b2 = ax1.bar(x + w/2, avgs_b, w, label='SD v2.1', color='#E8B84B', alpha=0.9)
ax1.set_xticks(x); ax1.set_xticklabels(labels, color='white')
ax1.set_ylabel('Avg CLIP Score', color='white')
ax1.set_title('Promptâ€“Image Alignment (CLIP)', color='white', fontweight='bold')
ax1.legend(facecolor='#16162a', labelcolor='white')
ax1.bar_label(b1, fmt='%.3f', color='#7B5EA7', fontsize=9)
ax1.bar_label(b2, fmt='%.3f', color='#E8B84B', fontsize=9)

# Runtime chart
b3 = ax2.bar(x - w/2, times_a_lst, w, label='SD v1.5 (512px)', color='#7B5EA7', alpha=0.9)
b4 = ax2.bar(x + w/2, times_b_lst, w, label='SD v2.1 (768px)', color='#E8B84B', alpha=0.9)
ax2.set_xticks(x); ax2.set_xticklabels(labels, color='white')
ax2.set_ylabel('Generation Time (s)', color='white')
ax2.set_title('Runtime per Prompt', color='white', fontweight='bold')
ax2.legend(facecolor='#16162a', labelcolor='white')
ax2.bar_label(b3, fmt='%.1fs', color='#7B5EA7', fontsize=9)
ax2.bar_label(b4, fmt='%.1fs', color='#E8B84B', fontsize=9)

plt.suptitle('Task 02 â€” Model Comparison: SD v1.5 vs SD v2.1',
             color='white', fontsize=13, fontweight='bold')
plt.tight_layout()
plt.savefig('task02_comparison_chart.png', dpi=150, bbox_inches='tight', facecolor='#0f0f1a')
plt.show()
print('Chart saved â†’ task02_comparison_chart.png')