In [1]:
%load_ext autoreload
%autoreload 2

import torch
import clip
import os 

# On a multi-GPU system, this hides all GPUs except the first 
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

import fiftyone as fo
import fiftyone.brain as fob
import torchvision.transforms as transforms

from torch.utils.data import DataLoader

# Custom modules
from handsoncv.datasets import TFflowersCLIPDataset
from handsoncv.models import UNet
from handsoncv.metrics import extract_inception_features
from handsoncv.utils import DDPM, set_seed, seed_worker
from handsoncv.evaluation import Evaluator

# Hardware & Paths
NOTEBOOK_DIR = os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, "..", ".."))
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

# Folders we frequently use across the experiments' notebooks
ROOT_PATH = os.path.join(PROJECT_ROOT, "Assignment-3")
ROOT_DATA = os.path.join(ROOT_PATH, "data")
DATA_DIR = f"{ROOT_DATA}/cropped_flowers"
SAMPLE_DIR = f"{ROOT_DATA}/05_images"
CSV_PATH = f"{ROOT_DATA}/clip_embeddings_metadata.csv"

CHECKPOINTS_DIR = os.path.join(ROOT_PATH, "checkpoints")
os.makedirs(CHECKPOINTS_DIR, exist_ok=True)

# Numpy and Torch Reproducibility
SEED=42
set_seed(42)

# Base Configuration Parameters
BATCH_SIZE = 32

cuda
Seeds set to 42 for reproducibility.


In [2]:
# Load UNet/DDPM trained in notebook '05_a_*'
model = UNet(400, 3, 32, down_chs=(256, 256, 512)).to(DEVICE)
model.load_state_dict(torch.load(f"{CHECKPOINTS_DIR}/ddpm_unet_best_clip_model.pt"))
ddpm = DDPM(torch.linspace(0.0001, 0.02, 400).to(DEVICE), DEVICE)
clip_model, clip_preprocess = clip.load("ViT-B/32", device=DEVICE)

In [None]:
# Create a Generator object to pass to the dataLoaders
g = torch.Generator()
g.manual_seed(SEED)

# Base transforms used by both training and validation data
base_t = [
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Lambda(lambda t: (t * 2) - 1)
]

# Create a DataLoader for original (real) images
ds = TFflowersCLIPDataset(CSV_PATH, transform=transforms.Compose(base_t))
data_loader = DataLoader(ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, generator=g)

Extracting features from real images for FID...


In [6]:
# Assessment Part 1 & 2: Generation, Embedding Extraction, CLIP Score and FID
# For inspection of the exact functions, please refer to sample_flowers in src/handsoncv/utils.py and Evaluator class in src/handsoncv/evaluation.py
evaluator = Evaluator(model, ddpm, clip_model, clip_preprocess, DEVICE, results_dir="results/eval_01")

# Define list of text prompts to generate images for
text_prompts = [
    "A red rose flower",
    "A deep red rose",
    "A rose with layered petals",
    "A red rose with layered petals",
    "A pink rose flower",
    "A detailed rose flower",
    "A close-up of a rose",
    
    "Two sunflowers with big brown centers",
    "A sunflower flower",
    "A sunflower with bright yellow petals",
    "An orange sunflower with a big brown center",
    "A bright yellow sunflower",
    "A close-up of a sunflower",
    "A large sunflower",
    "A sunflower with limp, drooping petals",
    
    "A white daisy with a yellow center",
    "A round white daisy",
    "A daisy flower",
    "A detailed daisy flower",
    "A close-up of a daisy",
    "A daisy covered in dew",
    "Two daisies",
    "Two white daisies with yellow centers",
]

eval_results, fid = evaluator.run_full_evaluation(
    text_prompts,
    real_features=real_features
)

print(f"FID Score: {fid}")

FID Score: 203.0053355017136


In [7]:
eval_results

[{'prompt': 'A red rose flower',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_000.png',
  'clip_score': 0.29248046875,
  'embedding': array([ 2.177457  ,  2.7260983 ,  0.52471787, ...,  0.07317976,
         -0.16184847,  0.43653056], shape=(32768,), dtype=float32)},
 {'prompt': 'A deep red rose',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_001.png',
  'clip_score': 0.287841796875,
  'embedding': array([ 2.5545046 ,  4.202106  ,  1.551464  , ..., -0.16619082,
         -0.15627752,  0.44599095], shape=(32768,), dtype=float32)},
 {'prompt': 'A rose with layered petals',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_002.png',
  'clip_score': 0.27490234375,
  'embedding': array([ 3.6166503 ,  3.7612288 ,  0.37931985, ...,  0.59454894,
         -0.15244798, -0.0718812

In [5]:
# Assessment Part 3: FiftyOne Analysis
dataset = fo.Dataset(name="generated_flowers_eval", overwrite=True)
samples = []

# eval_results now contains 21 items (3 prompts * 7 guidance scales)
for res in eval_results:
    sample = fo.Sample(filepath=res["img_path"])
    sample["prompt"] = fo.Classification(label=res["prompt"])
    sample["clip_score"] = res["clip_score"]
    sample["unet_embedding"] = res["embedding"]
    samples.append(sample)

dataset.add_samples(samples)

# Run if we have enough samples to satisfy FiftyOne's default clustering
if len(dataset) >= 20:
    print("Computing brain metrics...")
    fob.compute_uniqueness(dataset)
    fob.compute_representativeness(dataset, embeddings="unet_embedding")
else:
    print(f"Dataset size ({len(dataset)}) is too small for representativeness (needs 20+).")

session = fo.launch_app(dataset)

 100% |███████████████████| 23/23 [182.3ms elapsed, 0s remaining, 128.4 samples/s]    
Computing brain metrics...
Computing embeddings...
 100% |███████████████████| 23/23 [1.3s elapsed, 0s remaining, 17.9 samples/s]      
Computing uniqueness...
Uniqueness computation complete
Computing representativeness...
Computing clusters for 23 embeddings; this may take awhile...
Representativeness computation complete
