# Data Download

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("paultimothymooney/kermany2018")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/kermany2018


In [2]:
!pip install kagglehub torch torchvision transformers diffusers accelerate datasets xformers pytorch-fid pandas
import os
import gc
import torch
import numpy as np
import random
from PIL import Image
import pandas as pd
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, TensorDataset
from transformers import ViTModel, CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler, StableDiffusionPipeline
from accelerate import Accelerator
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from pytorch_fid import fid_score

# Set random seed
seed = 123
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)

# GPU setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Memory cleanup function
def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()

Collecting xformers
  Downloading xformers-0.0.30-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting pytorch-fid
  Downloading pytorch_fid-0.3.0-py3-none-any.whl.metadata (5.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (fro

In [None]:
# Example using Python + PIL
from PIL import Image
import os

in_dir = "/kaggle/input/kermany2018/OCT2017 /train/CNV"
# in_dir = "/root/.cache/kagglehub/datasets/paultimothymooney/kermany2018/versions/2/OCT2017 /train/CNV"
out_dir = "processed/CNV/"
os.makedirs(out_dir, exist_ok=True)

for fn in os.listdir(in_dir):
    img = Image.open(os.path.join(in_dir, fn)).convert("RGB")     # ensure 3‑channel
    img = img.resize((512, 512), resample=Image.LANCZOS)         # model’s expected res
    img.save(os.path.join(out_dir, fn))

In [3]:
!cp -r /content/drive/MyDrive/processed_images.zip /content

In [4]:
!unzip /content/processed_images.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: content/processed/CNV/CNV-9642260-181.jpeg  
  inflating: content/processed/CNV/CNV-7513011-42.jpeg  
  inflating: content/processed/CNV/CNV-9911627-11.jpeg  
  inflating: content/processed/CNV/CNV-7683197-28.jpeg  
  inflating: content/processed/CNV/CNV-440315-6.jpeg  
  inflating: content/processed/CNV/CNV-8728968-80.jpeg  
  inflating: content/processed/CNV/CNV-8598714-111.jpeg  
  inflating: content/processed/CNV/CNV-1997439-173.jpeg  
  inflating: content/processed/CNV/CNV-6717547-33.jpeg  
  inflating: content/processed/CNV/CNV-451136-39.jpeg  
  inflating: content/processed/CNV/CNV-172472-371.jpeg  
  inflating: content/processed/CNV/CNV-9374492-187.jpeg  
  inflating: content/processed/CNV/CNV-2724152-104.jpeg  
  inflating: content/processed/CNV/CNV-7710023-117.jpeg  
  inflating: content/processed/CNV/CNV-6717547-29.jpeg  
  inflating: content/processed/CNV/CNV-1279887-44.jpeg  
  inflating: content

# Train the Model

In [5]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
from transformers import CLIPTextModel, CLIPTokenizer
from accelerate import Accelerator
import shutil
from torch.optim.lr_scheduler import LambdaLR

In [6]:
class CNVDataset(Dataset):
    def __init__(self, root_dir, tokenizer, resolution=512, max_length=77, dataset_size:int = 6400):
        self.files = [
            os.path.join(root_dir, f)
            for f in os.listdir(root_dir)
            if f.lower().endswith((".png", ".jpg", ".jpeg"))
        ]

        if len(self.files) > dataset_size:
            self.files = self.files[:dataset_size]
        self.tokenizer = tokenizer
        self.transform = transforms.Compose([
            transforms.Resize((resolution, resolution), transforms.InterpolationMode.LANCZOS),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5]),
        ])
        self.prompt = "OCT scan showing CNV"
        self.max_length = max_length

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = Image.open(self.files[idx]).convert("RGB")
        img = self.transform(img)
        tokens = self.tokenizer(
            self.prompt,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )
        return {"pixel_values": img, "input_ids": tokens.input_ids.squeeze(0)}

In [None]:
# def train(
#     pretrained_model: str,
#     data_dir: str,
#     run_epochs: int,
#     total_epochs: int,
#     output_dir: str = "sd_cnv_finetuned",
#     resolution: int = 512,
#     dataset_size:int = 30000,
#     batch_size: int = 4,
#     learning_rate: float = 1e-4,
#     epochs: int = 3,
#     grad_accum_steps: int = 1,
#     save_steps: int = 1000,
#     resume_checkpoint: str = None,
#     unet: UNet2DConditionModel = None,
#     accelerator: Accelerator = None,
# ):

#     # 1) Prepare output directory, accelerator & device
#     os.makedirs(output_dir, exist_ok=True)
#     accel = accelerator or Accelerator()
#     device = accel.device

#     # 2) Load or resume UNet
#     if resume_checkpoint and unet is None:
#         unet = UNet2DConditionModel.from_pretrained(resume_checkpoint).to(device)
#     elif unet is None:
#         unet = UNet2DConditionModel.from_pretrained(pretrained_model, subfolder="unet").to(device)

#     # 3) Load & freeze tokenizer + text encoder
#     tokenizer    = CLIPTokenizer.from_pretrained(pretrained_model, subfolder="tokenizer")
#     text_encoder = CLIPTextModel.from_pretrained(pretrained_model, subfolder="text_encoder").to(device)
#     text_encoder.requires_grad_(False)

#     # 4) Load & freeze VAE
#     vae = AutoencoderKL.from_pretrained(pretrained_model, subfolder="vae").to(device)
#     vae.requires_grad_(False)

#     # 5) Load noise scheduler
#     scheduler = DDPMScheduler.from_pretrained(pretrained_model, subfolder="scheduler")

#     # 6) Prepare dataset & dataloader (assumes CNVDataset is defined)
#     dataset    = CNVDataset(data_dir, tokenizer, resolution=resolution, dataset_size = dataset_size)
#     dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

#     # 7) Optimizer (only UNet params)
#     optimizer = torch.optim.AdamW(unet.parameters(), lr=learning_rate)

#     # 8) Wrap models, optimizer, and dataloader for mixed‑precision / distributed
#     unet, optimizer, dataloader = accel.prepare(unet, optimizer, dataloader)

#     # 9) Resume state if requested
#     if resume_checkpoint:
#         accel.load_state(resume_checkpoint)
#         # pick up where you left off
#         global_step = int(resume_checkpoint.rsplit("_", 1)[-1])
#     else:
#         global_step = 0

#     # 10) Training loop
#     for epoch in range(1, epochs + 1):
#         unet.train()
#         for batch in dataloader:
#             with accel.accumulate(unet):
#                 # Encode images to latents
#                 pixels  = batch["pixel_values"].to(device)
#                 latents = vae.encode(pixels).latent_dist.sample() * 0.18215

#                 # Add noise
#                 noise     = torch.randn_like(latents)
#                 timesteps = torch.randint(0, scheduler.num_train_timesteps,
#                                           (latents.shape[0],), device=device)
#                 noisy_latents = scheduler.add_noise(latents, noise, timesteps)

#                 # Text conditioning
#                 input_ids             = batch["input_ids"].to(device)
#                 encoder_hidden_states = text_encoder(input_ids)[0]

#                 # Noise prediction & loss
#                 pred_noise = unet(noisy_latents, timesteps, encoder_hidden_states).sample
#                 loss       = torch.nn.functional.mse_loss(pred_noise, noise)

#                 # Backpropagate
#                 accel.backward(loss)
#                 optimizer.step()
#                 optimizer.zero_grad()

#             global_step += 1
#             if global_step % save_steps == 0:
#                 accel.wait_for_everyone()
#                 ckpt_dir = os.path.join(output_dir, f"checkpoint_{global_step}")
#                 unet.save_pretrained(ckpt_dir)
#                 if accel.is_main_process:
#                     tokenizer.save_pretrained(ckpt_dir)
#                 accel.save_state(ckpt_dir)

#         print(f"Epoch {epoch}/{epochs} complete")

#     # 11) Final save
#     accel.wait_for_everyone()
#     final_dir = os.path.join(output_dir, "final_unet")
#     unet.save_pretrained(final_dir)
#     if accel.is_main_process:
#         tokenizer.save_pretrained(output_dir)
#     print("Fine‑tuning complete — models saved to", output_dir)

In [None]:
# from accelerate import Accelerator
# # Optionally load the checkpoint’s state:
# accel = Accelerator()
# train(
#     pretrained_model="runwayml/stable-diffusion-v1-5",
#     data_dir="/content/processed/CNV",
#     output_dir="/content/sd_cnv_finetuned",
#     resolution=512,
#     batch_size=8,
#     learning_rate=1e-4,
#     dataset_size=30000,
#     epochs=30,
#     grad_accum_steps=1,
#     save_steps=10000,
#     # resume_checkpoint="/content/sd_cnv_finetuned/checkpoint_6000",
#     resume_checkpoint=None,
#     accelerator=accel
# )

In [7]:
def train(
    pretrained_model: str,
    data_dir: str,
    output_dir: str = "/content/drive/MyDrive/sd_cnv_finetuned",
    resolution: int = 512,
    dataset_size: int = 30000,
    batch_size: int = 8,
    learning_rate: float = 1e-4,
    epochs: int = 30,
    grad_accum_steps: int = 1,
    resume_checkpoint: str = None,
    accelerator: Accelerator = None,
    log_steps: int = 100
):

    # 1) Setup Mount & accelerator
    os.makedirs(output_dir, exist_ok=True)
    accel = accelerator or Accelerator()
    device = accel.device

    # 2) Load or resume UNet
    if resume_checkpoint:
        unet = UNet2DConditionModel.from_pretrained(resume_checkpoint).to(device)
    else:
        unet = UNet2DConditionModel.from_pretrained(pretrained_model, subfolder="unet").to(device)

    # 3) Tokenizer & Text Encoder
    tokenizer = CLIPTokenizer.from_pretrained(pretrained_model, subfolder="tokenizer")
    text_encoder = CLIPTextModel.from_pretrained(pretrained_model, subfolder="text_encoder").to(device)
    text_encoder.requires_grad_(False)

    # 4) VAE
    vae = AutoencoderKL.from_pretrained(pretrained_model, subfolder="vae").to(device)
    vae.requires_grad_(False)

    # 5) Scheduler
    scheduler = DDPMScheduler.from_pretrained(pretrained_model, subfolder="scheduler")

    # 6) Data
    dataset = CNVDataset(data_dir, tokenizer, resolution=resolution, dataset_size=dataset_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # 7) Optimizer
    optimizer = torch.optim.AdamW(unet.parameters(), lr=learning_rate)
    unet, optimizer, dataloader = accel.prepare(unet, optimizer, dataloader)

    global_steps = 0

    # 8) Epoch loop with single rotating checkpoint
    checkpoint_path = os.path.join(output_dir, "checkpoint")
    for epoch in range(1, epochs + 1):
        unet.train()
        for batch in dataloader:
            with accel.accumulate(unet):
                pixels = batch["pixel_values"].to(device)
                latents = vae.encode(pixels).latent_dist.sample() * 0.18215

                noise = torch.randn_like(latents)
                timesteps = torch.randint(0, scheduler.num_train_timesteps,
                                          (latents.shape[0],), device=device)
                noisy_latents = scheduler.add_noise(latents, noise, timesteps)

                input_ids = batch["input_ids"].to(device)
                encoder_hidden_states = text_encoder(input_ids)[0]

                pred_noise = unet(noisy_latents, timesteps, encoder_hidden_states).sample
                loss = torch.nn.functional.mse_loss(pred_noise, noise)

                accel.backward(loss)
                optimizer.step()
                optimizer.zero_grad()

            global_steps += 1
            if global_steps % log_steps == 0:
                print(f"Epoch {epoch}/{epochs} | Step {global_steps} | Loss: {loss.item()}")

        # end of epoch: delete old checkpoint, save new one
        if os.path.isdir(checkpoint_path):
            shutil.rmtree(checkpoint_path)
        unet.save_pretrained(checkpoint_path)
        if accel.is_main_process:
            tokenizer.save_pretrained(checkpoint_path)
        accel.save_state(checkpoint_path)
        print(f"Epoch {epoch} complete — checkpoint saved at {checkpoint_path}")

    # 9) Final model save
    final_dir = os.path.join(output_dir, "final_unet")
    unet.save_pretrained(final_dir)
    if accel.is_main_process:
        tokenizer.save_pretrained(final_dir)
    print(f"Training finished; final UNet + tokenizer saved to {final_dir}")

In [8]:
accel = Accelerator()
train(
    pretrained_model="runwayml/stable-diffusion-v1-5",
    # data_dir="/content/processed/CNV",
    data_dir="/content/content/processed/CNV",
    # output_dir="/content/drive/MyDrive/sd_cnv_finetuned",
    output_dir="/content/drive/MyDrive/sd_cnv_finetuned-slower-learning-rate",
    resolution=512,
    batch_size=8,
    learning_rate=5e-5,
    dataset_size=30000,
    epochs=2,
    grad_accum_steps=1,
    # resume_checkpoint=None,
    resume_checkpoint="/content/drive/MyDrive/sd_cnv_finetuned/checkpoint-1",
    accelerator=accel
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


Epoch 1/2 | Step 100 | Loss: 0.07405176758766174
Epoch 1/2 | Step 200 | Loss: 0.1437930017709732
Epoch 1/2 | Step 300 | Loss: 0.15559224784374237
Epoch 1/2 | Step 400 | Loss: 0.15109743177890778
Epoch 1/2 | Step 500 | Loss: 0.3273129463195801
Epoch 1/2 | Step 600 | Loss: 0.047856077551841736
Epoch 1/2 | Step 700 | Loss: 0.26141008734703064
Epoch 1/2 | Step 800 | Loss: 0.13428334891796112
Epoch 1/2 | Step 900 | Loss: 0.24136951565742493
Epoch 1/2 | Step 1000 | Loss: 0.27837881445884705
Epoch 1/2 | Step 1100 | Loss: 0.133579283952713
Epoch 1/2 | Step 1200 | Loss: 0.10825271159410477
Epoch 1/2 | Step 1300 | Loss: 0.1639707088470459
Epoch 1/2 | Step 1400 | Loss: 0.0857803151011467
Epoch 1/2 | Step 1500 | Loss: 0.12564828991889954
Epoch 1/2 | Step 1600 | Loss: 0.15772363543510437
Epoch 1/2 | Step 1700 | Loss: 0.12778612971305847
Epoch 1/2 | Step 1800 | Loss: 0.0838075578212738
Epoch 1/2 | Step 1900 | Loss: 0.04853887856006622
Epoch 1/2 | Step 2000 | Loss: 0.10537272691726685
Epoch 1/2 | Ste

In [9]:
# if you get out of memory issues, run this cell

import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [37]:
from torch.optim.lr_scheduler import LambdaLR

def train_simple_decay_schedule(
    pretrained_model: str,
    data_dir: str,
    output_dir: str = "/content/drive/MyDrive/sd_cnv_finetuned",
    resolution: int = 512,
    dataset_size: int = 30000,
    batch_size: int = 8,
    base_learning_rate: float = 1e-4,
    epochs: int = 30,
    decay_epochs: int = 5,            # number of epochs over which to linearly decay LR
    grad_accum_steps: int = 1,
    resume_checkpoint: str = None,
    accelerator: Accelerator = None,
    log_steps: int = 100
):
    # 1) Setup
    os.makedirs(output_dir, exist_ok=True)
    accel = accelerator or Accelerator()
    device = accel.device

    # 2) Load or resume UNet
    if resume_checkpoint:
        unet = UNet2DConditionModel.from_pretrained(resume_checkpoint).to(device)
    else:
        unet = UNet2DConditionModel.from_pretrained(pretrained_model, subfolder="unet").to(device)

    # 3) Tokenizer & Text Encoder
    tokenizer = CLIPTokenizer.from_pretrained(pretrained_model, subfolder="tokenizer")
    text_encoder = CLIPTextModel.from_pretrained(pretrained_model, subfolder="text_encoder").to(device)
    text_encoder.requires_grad_(False)

    # 4) VAE
    vae = AutoencoderKL.from_pretrained(pretrained_model, subfolder="vae").to(device)
    vae.requires_grad_(False)

    # 5) Noise Scheduler for diffusion process
    noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model, subfolder="scheduler")

    # 6) Data
    dataset = CNVDataset(data_dir, tokenizer, resolution=resolution, dataset_size=dataset_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # 7) Optimizer + LR Scheduler
    optimizer = torch.optim.AdamW(unet.parameters(), lr=base_learning_rate)
    # linear decay from base_lr (epoch 0) to 0 (epoch decay_epochs)
    lr_scheduler = LambdaLR(
        optimizer,
        lr_lambda=lambda epoch: max(0.0, 1 - (epoch / (decay_epochs - 1)))
    )

    unet, optimizer, dataloader = accel.prepare(unet, optimizer, dataloader)

    global_steps = 0
    checkpoint_path = os.path.join(output_dir, "checkpoint")

    # 8) Training loop
    for epoch in range(1, epochs + 1):
        unet.train()
        for batch in dataloader:
            with accel.accumulate(unet):
                pixels = batch["pixel_values"].to(device)
                latents = vae.encode(pixels).latent_dist.sample() * 0.18215

                noise = torch.randn_like(latents)
                timesteps = torch.randint(
                    0, noise_scheduler.num_train_timesteps,
                    (latents.shape[0],), device=device
                )
                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

                input_ids = batch["input_ids"].to(device)
                encoder_hidden_states = text_encoder(input_ids)[0]

                pred_noise = unet(noisy_latents, timesteps, encoder_hidden_states).sample
                loss = torch.nn.functional.mse_loss(pred_noise, noise)

                accel.backward(loss)
                optimizer.step()
                optimizer.zero_grad()

            global_steps += 1
            if global_steps % log_steps == 0:
                current_lr = optimizer.param_groups[0]["lr"]
                print(f"Epoch {epoch}/{epochs} | Step {global_steps} | LR {current_lr:.2e} | Loss: {loss.item():.6f}")

        # Linear LR decay step (by epoch)
        lr_scheduler.step()

        # Rotate checkpoint at epoch end
        if os.path.isdir(checkpoint_path):
            shutil.rmtree(checkpoint_path)
        unet.save_pretrained(checkpoint_path)
        if accel.is_main_process:
            tokenizer.save_pretrained(checkpoint_path)
        accel.save_state(checkpoint_path)
        print(f"Epoch {epoch} complete — checkpoint saved at {checkpoint_path}")

    # 9) Final save
    final_dir = os.path.join(output_dir, "final_unet")
    unet.save_pretrained(final_dir)
    if accel.is_main_process:
        tokenizer.save_pretrained(final_dir)
    print(f"Training finished; final UNet + tokenizer saved to {final_dir}")

In [None]:
accel = Accelerator()
train_simple_decay_schedule(
    pretrained_model="runwayml/stable-diffusion-v1-5",
    data_dir="/content/content/processed/CNV",
    output_dir="/content/drive/MyDrive/sd_cnv_finetuned-simple-decay-schedule",
    resolution=512,
    batch_size=8,
    base_learning_rate=1e-4,
    epochs=5,
    decay_epochs=4,
    grad_accum_steps=1,
    resume_checkpoint=None,
    accelerator=accel,
    log_steps=100
)

  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)


Epoch 1/5 | Step 100 | LR 1.00e-04 | Loss: 0.176467
Epoch 1/5 | Step 200 | LR 1.00e-04 | Loss: 0.189999
Epoch 1/5 | Step 300 | LR 1.00e-04 | Loss: 0.204285
Epoch 1/5 | Step 400 | LR 1.00e-04 | Loss: 0.140528
Epoch 1/5 | Step 500 | LR 1.00e-04 | Loss: 0.196579
Epoch 1/5 | Step 600 | LR 1.00e-04 | Loss: 0.074039
Epoch 1/5 | Step 700 | LR 1.00e-04 | Loss: 0.196861
Epoch 1/5 | Step 800 | LR 1.00e-04 | Loss: 0.145963
Epoch 1/5 | Step 900 | LR 1.00e-04 | Loss: 0.040829
Epoch 1/5 | Step 1000 | LR 1.00e-04 | Loss: 0.184121
Epoch 1/5 | Step 1100 | LR 1.00e-04 | Loss: 0.153870
Epoch 1/5 | Step 1200 | LR 1.00e-04 | Loss: 0.062782
Epoch 1/5 | Step 1300 | LR 1.00e-04 | Loss: 0.062802
Epoch 1/5 | Step 1400 | LR 1.00e-04 | Loss: 0.252008
Epoch 1/5 | Step 1500 | LR 1.00e-04 | Loss: 0.063828
Epoch 1/5 | Step 1600 | LR 1.00e-04 | Loss: 0.083160
Epoch 1/5 | Step 1700 | LR 1.00e-04 | Loss: 0.161129
Epoch 1/5 | Step 1800 | LR 1.00e-04 | Loss: 0.198542
Epoch 1/5 | Step 1900 | LR 1.00e-04 | Loss: 0.135585
Ep

In [None]:
!cp -r /content/drive/MyDrive/sd_cnv_finetuned /content

In [None]:
!cp -r /content/models/sd_cnv_finetuned/final_unet /content/drive/MyDrive/Novo-Nordisk/

In [None]:
!cp -r /content/models/sd_cnv_finetuned/checkpoint_10000 /content/drive/MyDrive/

In [None]:
!cp -r /content/processed_images.zip /content/drive/MyDrive/

In [None]:
!zip -r processed_images.zip /content/processed

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/processed/CNV/CNV-9642260-181.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-7513011-42.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-9911627-11.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-7683197-28.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-440315-6.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-8728968-80.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-8598714-111.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-1997439-173.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-6717547-33.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-451136-39.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-172472-371.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-9374492-187.jpeg (deflated 1%)
  adding: content/processed/CNV/CNV-2724152-104.jpeg (deflated 2%)
  adding: content/processed/CNV/CNV-7710023-117.jpeg (deflated 1%)
  adding

In [50]:
# if you get out of memory issues, run this cell

import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
# for me to download the files from colab

!zip -r /content/checkpoints_11000.zip /content/sd_cnv_finetuned/checkpoint_11000/

  adding: content/sd_cnv_finetuned/checkpoint_11000/ (stored 0%)
  adding: content/sd_cnv_finetuned/checkpoint_11000/tokenizer_config.json (deflated 63%)
  adding: content/sd_cnv_finetuned/checkpoint_11000/random_states_0.pkl (deflated 25%)
  adding: content/sd_cnv_finetuned/checkpoint_11000/model.safetensors

# Inference

In [32]:
import torch
from diffusers import StableDiffusionPipeline, UNet2DConditionModel
from transformers import CLIPTokenizer

device = "cuda"  # or "cpu" if no GPU

# 1) Load the base pipeline (with original VAE, text-encoder, scheduler, tokenizer)
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
    safety_checker=None
).to(device)


# 2) Load your fine-tuned UNet
finetuned_unet = UNet2DConditionModel.from_pretrained(
    # "/content/sd_cnv_finetuned/checkpoint"
    # '/content/drive/MyDrive/sd_cnv_finetuned/checkpoint-old',
    "/content/drive/MyDrive/sd_cnv_finetuned-slower-learning-rate/checkpoint",
    torch_dtype=torch.float16
).to(device)

# 3) Replace the pipeline’s UNet
pipe.unet = finetuned_unet

# 4) (Optional) Load & swap in your checkpoint’s tokenizer
ckpt_tokenizer = CLIPTokenizer.from_pretrained(
    # "/content/sd_cnv_finetuned/checkpoint"
    # '/content/drive/MyDrive/sd_cnv_finetuned/checkpoint-old'
    "/content/drive/MyDrive/sd_cnv_finetuned-slower-learning-rate/checkpoint"
)
pipe.tokenizer = ckpt_tokenizer

# 5) Generate!
prompt = "OCT scan showing CNV"
# prompt = "CNV OCT"
out = pipe(
    prompt,
    num_inference_steps=250, #50
    guidance_scale=12.5 #7.5
)
img = out.images[0]
img.save("cnv_finetuned_example.png")

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/250 [00:00<?, ?it/s]

In [33]:
!rm -rf /content/synthetic_cnv

In [34]:
def generate_synthetic_images():
    # Load the Base Pipeline from the Original Model
    pipeline = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float16,
        use_auth_token=False,
        safety_checker=None
    ).to(device)

    # Load the Fine-tuned UNet
    unet = UNet2DConditionModel.from_pretrained(
        '/content/drive/MyDrive/sd_cnv_finetuned-slower-learning-rate/final_unet',
        torch_dtype=torch.float16
    ).to(device)

    # Replace the UNet in the Pipeline
    pipeline.unet = unet

    # Generate Synthetic Images
    synthetic_dir = "/content/synthetic_cnv/"
    os.makedirs(synthetic_dir, exist_ok=True)
    num_images = 20
    prompt = "OCT scan showing CNV"

    for i in range(num_images):
        image = pipeline(prompt, num_inference_steps=250, guidance_scale=12.5).images[0]
        image.save(os.path.join(synthetic_dir, f"synthetic_cnv_{i}.png"))
        if i % 50 == 0:
            print(f"Generated {i}/{num_images} images")
        clear_memory()

generate_synthetic_images()
clear_memory()

Keyword arguments {'use_auth_token': False} are not expected by StableDiffusionPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


  0%|          | 0/250 [00:00<?, ?it/s]

Generated 0/20 images


  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

  0%|          | 0/250 [00:00<?, ?it/s]

In [35]:
!zip -r synthetic_images.zip /content/synthetic_cnv

updating: content/synthetic_cnv/ (stored 0%)
updating: content/synthetic_cnv/synthetic_cnv_0.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_5.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_4.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_14.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_10.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_6.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_19.png (deflated 1%)
updating: content/synthetic_cnv/synthetic_cnv_2.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_12.png (deflated 1%)
updating: content/synthetic_cnv/synthetic_cnv_7.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_1.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_8.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_13.png (deflated 0%)
updating: content/synthetic_cnv/synthetic_cnv_9.png (deflated 0%)
updating: content/syntheti

In [36]:
!cp -r /content/synthetic_images.zip /content/drive/MyDrive/