In [1]:
!pip install --upgrade diffusers transformers accelerate safetensors torch torchvision

Collecting torch
  Downloading torch-2.9.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (5.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.8.93 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-runtime-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cuda-cupti-cu12==12.8.90 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cublas-cu12==12.8.4.1 (from torch)
  Downloading nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl.metadata (1.7 kB)
Collecting nvidia-cufft-cu12==11.3.3.83 (from torch)
  Downloading nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux

In [3]:
from diffusers import DiffusionPipeline
import torch
import os

In [4]:
#def Pipeline_CPU(model_name="runwayml/stable-diffusion-v1-5"):
def Pipeline_CPU(model_name="stable-diffusion-v1-5/stable-diffusion-v1-5"):
    device = "cpu"
    pipe = DiffusionPipeline.from_pretrained(model_name)
    pipe = pipe.to(device)
    pipe.enable_attention_slicing()
    print("Pipeline loaded successfully on CPU!")
    return pipe


def Pipeline_GPU(model_name="stable-diffusion-v1-5/stable-diffusion-v1-5"):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32

    pipe = DiffusionPipeline.from_pretrained(model_name, torch_dtype=dtype)
    pipe = pipe.to(device)
    pipe.enable_attention_slicing()
    print(f"Pipeline loaded successfully on {device}!")
    return pipe

In [5]:
generator = Pipeline_GPU()
#generator = Pipeline_CPU()  # uncomment to force CPU

prompts = [
    'A colorful robot braves the storm, sheltering under a vibrant umbrella'
]

os.makedirs("outputs", exist_ok=True)

for i, prompt in enumerate(prompts):
    print(f"Generating image {i+1}: {prompt}")
    image = generator(prompt).images[0]
    image_path = f"outputs/image_{i+1}.png"
    image.save(image_path)
    print(f"Saved {image_path}")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Pipeline loaded successfully on cuda!
Generating image 1: A colorful robot braves the storm, sheltering under a vibrant umbrella


  0%|          | 0/50 [00:00<?, ?it/s]

Saved outputs/image_1.png


In [2]:
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch

#base_model = "runwayml/stable-diffusion-v1-5"
base_model = "stable-diffusion-v1-5/stable-diffusion-v1-5"


pipe = StableDiffusionPipeline.from_pretrained(base_model, torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

#LoRA weights
pipe.load_lora_weights("/content/drive/MyDrive/LoRA Weights/pytorch_lora_weights.safetensors")

pipe = pipe.to("cuda")


prompt = "A colorful robot braves the storm, sheltering under a vibrant umbrella"
image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0]

image.save("/content/generated_image.png")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

config.json: 0.00B [00:00, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


  0%|          | 0/30 [00:00<?, ?it/s]

In [7]:
import torch
import numpy as np
from PIL import Image
import scipy.linalg
from tqdm import tqdm
import warnings
from torchvision import transforms, datasets
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

print(f"GPU Available: {torch.cuda.is_available()}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'}")

class FIDEvaluator:
    """Calculate Fréchet Inception Distance (FID)"""
    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.device = device
        from torchvision.models import inception_v3
        self.model = inception_v3(weights="IMAGENET1K_V1", transform_input=False).to(device)
        self.model.fc = torch.nn.Identity()  # Use features before final FC
        self.model.eval()

        self.preprocess = transforms.Compose([
            transforms.Resize((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ])

    def get_features(self, images):
        features_list = []
        with torch.no_grad():
            for img in tqdm(images, desc="Extracting FID features"):
                if isinstance(img, np.ndarray):
                    img = Image.fromarray(img)
                img_tensor = self.preprocess(img).unsqueeze(0).to(self.device)
                feat = self.model(img_tensor)
                features_list.append(feat.cpu().numpy())
        return np.concatenate(features_list, axis=0)

    def calculate_fid(self, real_images, generated_images):
        print("\n=== FID Calculation ===")
        print("Extracting features from real images...")
        real_features = self.get_features(real_images)

        print("Extracting features from generated images...")
        gen_features = self.get_features(generated_images)

        mu_real = np.mean(real_features, axis=0)
        mu_gen = np.mean(gen_features, axis=0)
        sigma_real = np.cov(real_features.T)
        sigma_gen = np.cov(gen_features.T)

        diff = mu_real - mu_gen
        covmean, _ = scipy.linalg.sqrtm(sigma_real @ sigma_gen, disp=False)
        if np.iscomplexobj(covmean):
            covmean = np.real(covmean)

        fid = np.sum(diff**2) + np.trace(sigma_real + sigma_gen - 2*covmean)
        return fid

GPU Available: True
GPU: Tesla T4


In [11]:
import os
import glob
import numpy as np
from PIL import Image


real_images_path = "/content/drive/MyDrive/input/images"
generated_images_path = "/content/outputs"

# Load images from directories
real_images = [Image.open(f).convert("RGB") for f in glob.glob(os.path.join(real_images_path, "*.png"))]
generated_images = [Image.open(f).convert("RGB") for f in glob.glob(os.path.join(generated_images_path, "*.png"))]

if not real_images:
    print(f"No images found in {real_images_path}")
elif not generated_images:
    print(f"No images found in {generated_images_path}")
else:
    fid_evaluator = FIDEvaluator()
    fid_score = fid_evaluator.calculate_fid(real_images, generated_images)

    print("\n" + "="*60)
    print("EVALUATION RESULTS - Stable Diffusion v1.5")
    print("="*60)
    print(f"FID Score: {fid_score:.2f}")


=== FID Calculation ===
Extracting features from real images...


Extracting FID features: 100%|██████████| 752/752 [00:12<00:00, 58.09it/s]


Extracting features from generated images...


Extracting FID features: 100%|██████████| 2/2 [00:00<00:00, 65.01it/s]



EVALUATION RESULTS - Stable Diffusion v1.5
FID Score: 405.68


In [13]:
import torch
import numpy as np
from PIL import Image
import scipy.linalg
from tqdm import tqdm
from torchvision import transforms
from torchvision.models.inception import inception_v3
import warnings

warnings.filterwarnings("ignore")

print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("Running on CPU")


class FIDEvaluator:
    """
    Calculates Fréchet Inception Distance (FID)
    between two image sets.
    """

    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.device = device
        self.model = inception_v3(weights="IMAGENET1K_V1", transform_input=False).to(device)
        self.model.fc = torch.nn.Identity()  # remove final FC
        self.model.eval()

        # Register forward hook to extract 2048-dim features from pool3
        def hook(module, input, output):
            self.features = output

        self.model.Mixed_7c.register_forward_hook(hook)

        self.preprocess = transforms.Compose([
            transforms.Resize((299, 299)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406],
                                 [0.229, 0.224, 0.225])
        ])

    def get_features(self, images, batch_size=16):
        """
        Extract 2048-D features from InceptionV3's pool3 layer.
        """
        all_features = []
        with torch.no_grad():
            for i in tqdm(range(0, len(images), batch_size), desc="Extracting FID features"):
                batch = images[i:i + batch_size]
                batch_tensors = []
                for img in batch:
                    if isinstance(img, np.ndarray):
                        img = Image.fromarray(img)
                    batch_tensors.append(self.preprocess(img))
                batch_tensors = torch.stack(batch_tensors).to(self.device)

                # Forward pass
                _ = self.model(batch_tensors)
                feats = torch.nn.functional.adaptive_avg_pool2d(self.features, (1, 1))
                feats = feats.squeeze(-1).squeeze(-1)  # [B, 2048]
                all_features.append(feats.cpu().numpy())

        return np.concatenate(all_features, axis=0)

    def calculate_fid(self, real_images, generated_images):
        """
        Compute Fréchet Inception Distance between two image sets.
        """
        print("\n=== Calculating FID ===")
        print(f"Real images: {len(real_images)} | Generated images: {len(generated_images)}")

        print("Extracting features from real images...")
        real_features = self.get_features(real_images)

        print("Extracting features from generated images...")
        gen_features = self.get_features(generated_images)

        # Mean and covariance statistics
        mu_real, sigma_real = np.mean(real_features, axis=0), np.cov(real_features, rowvar=False)
        mu_gen, sigma_gen = np.mean(gen_features, axis=0), np.cov(gen_features, rowvar=False)

        # FID formula
        diff = mu_real - mu_gen
        covmean, _ = scipy.linalg.sqrtm(sigma_real @ sigma_gen, disp=False)

        # Numerical stability
        if np.iscomplexobj(covmean):
            covmean = np.real(covmean)

        fid = np.sum(diff ** 2) + np.trace(sigma_real + sigma_gen - 2 * covmean)
        return float(fid)


if __name__ == "__main__":
    import glob
    import os

    real_images_path = "/content/drive/MyDrive/input/images"
    generated_images_path = "/content/outputs"

    real_images = [Image.open(f).convert("RGB") for f in glob.glob(os.path.join(real_images_path, "*.png"))]
    generated_images = [Image.open(f).convert("RGB") for f in glob.glob(os.path.join(generated_images_path, "*.png"))]

    if not real_images:
        print(f"No images found in {real_images_path}")
    elif not generated_images:
        print(f"No images found in {generated_images_path}")
    else:
        fid_evaluator = FIDEvaluator()
        fid_score = fid_evaluator.calculate_fid(real_images, generated_images)

        print("\n" + "=" * 60)
        print("EVALUATION RESULTS - Stable Diffusion v1.5")
        print("=" * 60)
        print(f"FID Score: {fid_score:.2f}")


GPU Available: True
Using GPU: Tesla T4

=== Calculating FID ===
Real images: 752 | Generated images: 2
Extracting features from real images...


Extracting FID features: 100%|██████████| 47/47 [00:06<00:00,  7.27it/s]


Extracting features from generated images...


Extracting FID features: 100%|██████████| 1/1 [00:00<00:00, 33.60it/s]



EVALUATION RESULTS - Stable Diffusion v1.5
FID Score: 405.68
