In [1]:
%load_ext autoreload
%autoreload 2

import torch
import clip
import os 

# On a multi-GPU system, this hides all GPUs except the first 
os.environ["CUDA_VISIBLE_DEVICES"] = "0" 

import fiftyone as fo
import fiftyone.brain as fob

from handsoncv.models import UNet
from handsoncv.utils import DDPM, set_seed
from handsoncv.evaluation import Evaluator

# Hardware & Paths
NOTEBOOK_DIR = os.getcwd()
PROJECT_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, "..", ".."))
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)

# Folders we frequently use across the experiments' notebooks
ROOT_PATH = os.path.join(PROJECT_ROOT, "Assignment-3")
ROOT_DATA = os.path.join(ROOT_PATH, "data")
DATA_DIR = f"{ROOT_DATA}/cropped_flowers"
SAMPLE_DIR = f"{ROOT_DATA}/05_images"
CSV_PATH = f"{ROOT_DATA}/clip_embeddings_metadata.csv"

CHECKPOINTS_DIR = os.path.join(ROOT_PATH, "checkpoints")
os.makedirs(CHECKPOINTS_DIR, exist_ok=True)

# Numpy and Torch Reproducibility
SEED=42
set_seed(42)

cuda
Seeds set to 42 for reproducibility.


In [2]:
# Load UNet/DDPM trained in notebook '05_a_*'
model = UNet(400, 3, 32, down_chs=(256, 256, 512)).to(DEVICE)
model.load_state_dict(torch.load(f"{CHECKPOINTS_DIR}/ddpm_unet_best_model.pt"))
ddpm = DDPM(torch.linspace(0.0001, 0.02, 400).to(DEVICE), DEVICE)
clip_model, clip_preprocess = clip.load("ViT-B/32", device=DEVICE)

In [None]:
# Assessment Part 1 & 2: Generation, Embedding Extraction, CLIP Score and FID
# For inspection of the exact functions, please refer to sample_flowers in src/handsoncv/utils.py and Evaluator class in src/handsoncv/evaluation.py
evaluator = Evaluator(model, ddpm, clip_model, clip_preprocess, DEVICE, results_dir="results/eval_01")

# Define list of text prompts to generate images for
text_prompts = [
    "A rose with red petals", 
    "A rose with blue petals",
    "A rose with yellow petals",
    "A rose with white petals",
    "A rose with layered, red petals",
    "A sunflower with organge petals and a big brown center", 
    "Two sunflowers with big brown centers",
    "A sunflower watched from the side",
    "A sunflower with limp, drooping petals",
    "A daisy with white petals",
    "Two daisies with white petals",
    "One daisy with white petals, one with rose petals",
    "A daisy with a blue centre",
    "A daisy covered in dew",
    "A close-up of a sunflower",
    "A sunflower facing the camera",
    "A pink rose",
    "A rose with layered petals",
    "A single daisy in bloom",
    "A sunflower with bright, yellow petals",
    "A rose viewed from above",
]
# text_prompts = [
#     "A round white daisy with a yellow center",
#     "An orange sunflower with a big brown center",
#     "A deep red rose flower"
# ]
eval_results, fid = evaluator.run_full_evaluation(text_prompts)

In [4]:
eval_results

[{'prompt': 'A rose with red petals',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_000.png',
  'clip_score': 0.2890625,
  'embedding': array([-0.14917536, -0.13864927,  0.19957997, ...,  2.9715638 ,
          2.9841652 ,  1.7682194 ], shape=(32768,), dtype=float32)},
 {'prompt': 'A rose with blue petals',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_001.png',
  'clip_score': 0.2235107421875,
  'embedding': array([-0.08991776, -0.11866781, -0.07032527, ...,  1.4342769 ,
          1.7433668 ,  1.6278943 ], shape=(32768,), dtype=float32)},
 {'prompt': 'A rose with yellow petals',
  'img_path': '/home/vanessa/Documents/repos/Applied-Hands-On-Computer-Vision/Assignment-3/notebooks/results/eval_01/gen_002.png',
  'clip_score': 0.25146484375,
  'embedding': array([-0.08753923, -0.16721526, -0.12973952, ...,  1.5030211 ,
          0.8201108 ,  

In [5]:
# Assessment Part 3: FiftyOne Analysis
dataset = fo.Dataset(name="generated_flowers_eval", overwrite=True)
samples = []

# eval_results now contains 21 items (3 prompts * 7 guidance scales)
for res in eval_results:
    sample = fo.Sample(filepath=res["img_path"])
    sample["prompt"] = fo.Classification(label=res["prompt"])
    sample["clip_score"] = res["clip_score"]
    sample["unet_embedding"] = res["embedding"]
    samples.append(sample)

dataset.add_samples(samples)

# Run if we have enough samples to satisfy FiftyOne's default clustering
if len(dataset) >= 20:
    print("Computing brain metrics...")
    fob.compute_uniqueness(dataset)
    fob.compute_representativeness(dataset, embeddings="unet_embedding")
else:
    print(f"Dataset size ({len(dataset)}) is too small for representativeness (needs 20+).")

session = fo.launch_app(dataset)

 100% |███████████████████| 21/21 [162.5ms elapsed, 0s remaining, 131.2 samples/s]    
Computing brain metrics...
Computing embeddings...
 100% |███████████████████| 21/21 [1.5s elapsed, 0s remaining, 14.4 samples/s]      
Computing uniqueness...
Uniqueness computation complete
Computing representativeness...
Computing clusters for 21 embeddings; this may take awhile...
Representativeness computation complete
