In [None]:
!pip install -U peft

In [None]:
!pip install git+https://github.com/huggingface/diffusers.git transformers accelerate xformers==0.0.16 datasets==2.21.0

In [None]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np


In [None]:
import gdown
import zipfile

url = f"https://drive.google.com/uc?id=1BJqn1s7J2LAMDenQ7FaORrcueAEvpm0K"

output = "dataset.zip"
gdown.download(url, output, quiet=False)

with zipfile.ZipFile(output, "r") as z:
    z.extractall("dataset")

In [None]:
def qualitative_evaluation(
    model_name,
    conditioning_images,
    prompts=None,
    subfolder=None,
    controlnet_conditioning_scale=1.0,
    num_inference_steps=50,
    guidance_scale=7.5,
    seed=42
):
    print(f"Loading model: {model_name}")
    controlnet = ControlNetModel.from_pretrained(model_name, subfolder=subfolder, torch_dtype=torch.float16)
    pipe = StableDiffusionControlNetPipeline.from_pretrained(
        "stable-diffusion-v1-5/stable-diffusion-v1-5",
        controlnet=controlnet,
        torch_dtype=torch.float16,
        safety_checker=None
    )
    pipe.to("cuda")
    pipe.enable_xformers_memory_efficient_attention()
    
    # Set seed
    generator = torch.Generator(device="cuda").manual_seed(seed)
    
    if prompts is None:
        prompts = [""] * len(conditioning_images)
    
    condition_imgs = []
    for img in conditioning_images:
        if isinstance(img, str):
            condition_imgs.append(Image.open(img).convert("RGB"))
        else:
            condition_imgs.append(img)
    
    generated_images = []
    
    for idx, (condition_img, prompt) in enumerate(zip(condition_imgs, prompts)):
        print(f"  Generating {idx+1}/{len(condition_imgs)}: {prompt[:50]}...")
        
        output = pipe(
            prompt=prompt,
            image=condition_img,
            num_inference_steps=num_inference_steps,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            guidance_scale=guidance_scale,
            generator=generator
        ).images[0]
        
        generated_images.append(output)
    
    n_images = len(condition_imgs)
    fig, axes = plt.subplots(n_images, 2, figsize=(10, 5*n_images))
    
    if n_images == 1:
        axes = [axes]
    
    for idx in range(n_images):
        # Conditioning image
        axes[idx][0].imshow(condition_imgs[idx])
        axes[idx][0].set_title(f"Input Thermal #{idx+1}", fontsize=12)
        axes[idx][0].axis('off')
        
        # Generated image
        axes[idx][1].imshow(generated_images[idx])
        prompt_title = prompts[idx][:50] + "..." if len(prompts[idx]) > 50 else prompts[idx]
        axes[idx][1].set_title(f"Generated RGB #{idx+1}\n{prompt_title}", fontsize=12)
        axes[idx][1].axis('off')
    
    # Add a super title with model info
    subfolder_info = f", Subfolder: {subfolder}" if subfolder is not None else ""
    fig.suptitle(
        f"Model: {model_name}{subfolder_info}, Conditioning Scale: {controlnet_conditioning_scale}",
        fontsize=16
    )
    
    plt.tight_layout(rect=[0, 0, 1, 0.96])  # leave space for suptitle
    plt.show()
    
    return generated_images


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Open the image
image = Image.open("dataset/thermal_8_bit/FLIR_02268.jpeg")

# Convert the image to a NumPy array
image_array = np.array(image)

# Display the image using matplotlib
plt.imshow(image_array)
plt.axis('off')  # Hide axis
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Open the image
image = Image.open("dataset/thermal_8_bit/FLIR_02255.jpeg")

# Convert the image to a NumPy array
image_array = np.array(image)

# Display the image using matplotlib
plt.imshow(image_array)
plt.axis('off')  # Hide axis
plt.show()


# Qualitative evaluation

## Model trained with good prompts derived from image annotations

In [None]:
test_images = [
    "dataset/thermal_8_bit/FLIR_02268.jpeg",
    "dataset/thermal_8_bit/FLIR_02255.jpeg",
]

test_prompts = [
    "outdoor road scene with trees",
    "a few cars and trees and a building",
]

test_prompts2 = [
    "outdoor road scene with trees",
    "a few cars and trees and a church",
]


In [None]:

models = [
    "swetha3456/thermal-rgb-controlnet",
    "swetha3456/thermal-rgb-controlnet-v2",
    "swetha3456/thermal-rgb-controlnet-v2-no-prompts",
    "swetha3456/thermal-rgb-controlnet-v3",
    "swetha3456/thermal-rgb-controlnet-v3-contrast",
    "swetha3456/thermal-rgb-controlnet-v4-clahe"
]

conditioning_scales = [1.0, 1.5, 2.0]

v2_subfolders = [
    "checkpoint-3000-contrast-x2",
    "checkpoint-4000-contrast-x2",
    "checkpoint-5000-contrast-x2",
]

v4_subfolders = [
    "checkpoint-5000"
]

prompt_sets = [
    test_prompts,
    test_prompts2,
]

for model_name in models:
    for prompts in prompt_sets:
        for scale in conditioning_scales:
            if model_name == "swetha3456/thermal-rgb-controlnet-v2":
                for subfolder in v2_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            elif "v4" in model_name or "v3-contrast" in model_name:
                for subfolder in v4_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            else:
                generated = qualitative_evaluation(
                    model_name=model_name,
                    conditioning_images=test_images,
                    prompts=prompts,
                    subfolder=None,
                    controlnet_conditioning_scale=scale,
                    num_inference_steps=50,
                )


In [None]:

models = [
    "swetha3456/thermal-rgb-controlnet-v2",
    "swetha3456/thermal-rgb-controlnet-v2-no-prompts",
    "swetha3456/thermal-rgb-controlnet-v3",
    "swetha3456/thermal-rgb-controlnet-v3-contrast",
    "swetha3456/thermal-rgb-controlnet-v4-clahe"
]

conditioning_scales = [1.0, 1.5, 2.0]

v2_subfolders = [
    "checkpoint-3000-contrast-x2/controlnet",
    "checkpoint-4000-contrast-x2/controlnet",
    "checkpoint-5000-contrast-x2/controlnet",
]

v4_subfolders = [
    "checkpoint-5000/controlnet"
]

prompt_sets = [
    test_prompts,
    test_prompts2,
]

for model_name in models:
    for prompts in prompt_sets:
        for scale in conditioning_scales:
            if model_name == "swetha3456/thermal-rgb-controlnet-v2":
                for subfolder in v2_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            elif "v4" in model_name or "v3-contrast" in model_name:
                for subfolder in v4_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            else:
                generated = qualitative_evaluation(
                    model_name=model_name,
                    conditioning_images=test_images,
                    prompts=prompts,
                    subfolder=None,
                    controlnet_conditioning_scale=scale,
                    num_inference_steps=50,
                )


In [None]:

models = [
    "swetha3456/thermal-rgb-controlnet-v2",
    "swetha3456/thermal-rgb-controlnet-v3-contrast",
    "swetha3456/thermal-rgb-controlnet-v4-clahe"
]

conditioning_scales = [1.0, 1.5, 2.0]

v2_subfolders = [
    "checkpoint-1000-contrast-x2/controlnet",
    "checkpoint-2000-contrast-x2/controlnet"
]

v4_subfolders = [
    "checkpoint-1000/controlnet",
    "checkpoint-2000/controlnet"
]

prompt_sets = [
    test_prompts,
    test_prompts2,
]

for model_name in models:
    for prompts in prompt_sets:
        for scale in conditioning_scales:
            if model_name == "swetha3456/thermal-rgb-controlnet-v2":
                for subfolder in v2_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            elif "v4" in model_name or "v3-contrast" in model_name:
                for subfolder in v4_subfolders:
                    generated = qualitative_evaluation(
                        model_name=model_name,
                        conditioning_images=test_images,
                        prompts=prompts,
                        subfolder=subfolder,
                        controlnet_conditioning_scale=scale,
                        num_inference_steps=50,
                    )
            else:
                generated = qualitative_evaluation(
                    model_name=model_name,
                    conditioning_images=test_images,
                    prompts=prompts,
                    subfolder=None,
                    controlnet_conditioning_scale=scale,
                    num_inference_steps=50,
                )


# Quantitative evaluation

In [None]:
from datasets import load_dataset
from PIL import Image
import io

dataset = load_dataset("swetha3456/thermal-rgb-test-dataset", split='train')

test_dataset = dataset.select(range(len(dataset) - 20, len(dataset))) 

print(f"Total images in dataset: {len(test_dataset)}")
print(f"Columns: {test_dataset.column_names}")


In [None]:
def prepare_evaluation_data(dataset):
    thermal_images = []
    rgb_images = []
    prompts = []
    
    for idx, sample in enumerate(dataset):
        thermal_img = sample['conditioning_image']
        rgb_img = sample['image']
        prompt = sample['prompt']
        
        thermal_images.append(thermal_img)
        rgb_images.append(rgb_img)
        prompts.append(prompt)
        
    print(f"Prepared {len(thermal_images)} image pairs")
    return thermal_images, rgb_images, prompts

thermal_images, rgb_images, prompts = prepare_evaluation_data(test_dataset)

print(f"\nSample:")
print(f"  Thermal size: {thermal_images[0].size}")
print(f"  RGB size: {rgb_images[0].size}")
print(f"  Prompt: {prompts[0]}")

In [None]:
!pip install lpips

In [None]:
from PIL import Image, ImageEnhance

def enhance_contrast_batch(images, contrast_factor=2.0):
    enhanced = []
    for img in images:
        enhanced_img = ImageEnhance.Contrast(img).enhance(contrast_factor)
        enhanced.append(enhanced_img)
    return enhanced

rgb_images_enhanced = enhance_contrast_batch(rgb_images, contrast_factor=2.0)

In [None]:
import torch
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from PIL import Image
import numpy as np
from skimage.metrics import structural_similarity as ssim
from tqdm import tqdm
import lpips


def calculate_ssim_from_dataset(
    model_name,
    thermal_images,
    rgb_gt_images,
    prompts,
    subfolder=None,
    controlnet_conditioning_scale=1.0,
    num_inference_steps=50,
    guidance_scale=7.5,
    seed=42
):
    print(f"Loading model: {model_name}")

    controlnet = ControlNetModel.from_pretrained(
        model_name,
        subfolder=subfolder,
        torch_dtype=torch.float16
    )

    pipe = StableDiffusionControlNetPipeline.from_pretrained(
        "stable-diffusion-v1-5/stable-diffusion-v1-5",
        controlnet=controlnet,
        torch_dtype=torch.float16,
        safety_checker=None
    )
    pipe.to("cuda")
    pipe.enable_xformers_memory_efficient_attention()

    generator = torch.Generator(device="cuda").manual_seed(seed)

    # LPIPS model
    lpips_fn = lpips.LPIPS(net="alex").to("cuda")
    lpips_fn.eval()

    ssim_scores = []
    lpips_scores = []

    results = {
        "individual_ssims": [],
        "individual_lpips": [],
        "prompts": [],
        "generated_images": [],
        "ground_truth_images": []
    }

    print(f"\nCalculating SSIM + LPIPS for {len(thermal_images)} images...")

    for idx in tqdm(range(len(thermal_images))):
        thermal_img = thermal_images[idx]
        rgb_gt = rgb_gt_images[idx]
        prompt = prompts[idx]

        generated = pipe(
            prompt=prompt,
            image=thermal_img,
            num_inference_steps=num_inference_steps,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            guidance_scale=guidance_scale,
            generator=generator
        ).images[0]

        if generated.size != rgb_gt.size:
            generated = generated.resize(rgb_gt.size, Image.LANCZOS)

        # ---- SSIM ----
        gen_np = np.array(generated)
        gt_np = np.array(rgb_gt)

        ssim_score = ssim(
            gt_np,
            gen_np,
            multichannel=True,
            channel_axis=2,
            data_range=255
        )

        # ---- LPIPS ----
        def to_lpips_tensor(img):
            t = torch.from_numpy(np.array(img)).float() / 255.0
            t = t.permute(2, 0, 1).unsqueeze(0)  # 1x3xHxW
            t = 2.0 * t - 1.0  # [0,1] → [-1,1]
            return t.to("cuda")

        gen_t = to_lpips_tensor(generated)
        gt_t = to_lpips_tensor(rgb_gt)

        with torch.no_grad():
            lpips_score = lpips_fn(gen_t, gt_t).item()

        ssim_scores.append(ssim_score)
        lpips_scores.append(lpips_score)

        results["individual_ssims"].append(ssim_score)
        results["individual_lpips"].append(lpips_score)
        results["prompts"].append(prompt)
        results["generated_images"].append(generated)
        results["ground_truth_images"].append(rgb_gt)

    # ---- Statistics ----
    results["mean_ssim"] = float(np.mean(ssim_scores))
    results["std_ssim"] = float(np.std(ssim_scores))
    results["min_ssim"] = float(np.min(ssim_scores))
    results["max_ssim"] = float(np.max(ssim_scores))

    results["mean_lpips"] = float(np.mean(lpips_scores))
    results["std_lpips"] = float(np.std(lpips_scores))
    results["min_lpips"] = float(np.min(lpips_scores))
    results["max_lpips"] = float(np.max(lpips_scores))

    # ---- Print summary ----
    print(f"\n{'='*70}")
    print("SSIM + LPIPS Evaluation Results")
    print(f"{'='*70}")
    print(f"Model: {model_name}")
    print(f"Images: {len(thermal_images)}")
    print(f"Settings: scale={controlnet_conditioning_scale}, steps={num_inference_steps}, cfg={guidance_scale}")

    print(f"\nSSIM  : {results['mean_ssim']:.4f} ± {results['std_ssim']:.4f}")
    print(f"LPIPS: {results['mean_lpips']:.4f} ± {results['std_lpips']:.4f}")
    print(f"{'='*70}\n")

    # Best / worst by SSIM
    best_idx = np.argmax(ssim_scores)
    worst_idx = np.argmin(ssim_scores)

    print(f"Best SSIM (SSIM={ssim_scores[best_idx]:.4f}, LPIPS={lpips_scores[best_idx]:.4f})")
    print(f"  Prompt: {results['prompts'][best_idx]}")

    print(f"\nWorst SSIM (SSIM={ssim_scores[worst_idx]:.4f}, LPIPS={lpips_scores[worst_idx]:.4f})")
    print(f"  Prompt: {results['prompts'][worst_idx]}")

    return results


In [None]:
models = [
    "swetha3456/thermal-rgb-controlnet",
    "swetha3456/thermal-rgb-controlnet-v2",
    "swetha3456/thermal-rgb-controlnet-v2-no-prompts",
    "swetha3456/thermal-rgb-controlnet-v3",
    "swetha3456/thermal-rgb-controlnet-v3-contrast",
    "swetha3456/thermal-rgb-controlnet-v4-clahe"
]

conditioning_scales = [1.0, 1.5, 2.0]

v2_subfolders = [
    "checkpoint-1000-contrast-x2/controlnet",
    "checkpoint-2000-contrast-x2/controlnet",
    "checkpoint-3000-contrast-x2/controlnet",
    "checkpoint-4000-contrast-x2/controlnet",
    "checkpoint-5000-contrast-x2/controlnet"
]

v4_subfolders = [
    "checkpoint-1000/controlnet",
    "checkpoint-2000/controlnet",
    "checkpoint-3000/controlnet",
    "checkpoint-4000/controlnet",
    "checkpoint-5000/controlnet"

]

gt_sets = [
    ("original", rgb_images),
    ("enhanced", rgb_images_enhanced),
]

all_results = {}

for model_name in models:
    all_results[model_name] = {}

    for scale in conditioning_scales:
        all_results[model_name][scale] = {}

        if model_name == "swetha3456/thermal-rgb-controlnet-v2":
            subfolders = v2_subfolders
        elif "v3-contrast" in model_name or "v4" in model_name:
            subfolders = v4_subfolders
        else:
            subfolders = [None]

        for subfolder in subfolders:
            subkey = subfolder if subfolder is not None else "no_subfolder"
            all_results[model_name][scale][subkey] = {}

            for tag, gt_images in gt_sets:
                print("\n" + "=" * 90)
                print(f"Model: {model_name}")
                if subfolder is not None:
                    print(f"Subfolder: {subfolder}")
                print(f"Conditioning scale: {scale}")
                print(f"GT type: {tag}")
                print("=" * 90 + "\n")

                results = calculate_ssim_from_dataset(
                    model_name=model_name,
                    subfolder=subfolder,
                    thermal_images=thermal_images,
                    rgb_gt_images=gt_images,
                    prompts=prompts,
                    controlnet_conditioning_scale=scale,
                    num_inference_steps=50,
                    guidance_scale=7.5,
                )

                all_results[model_name][scale][subkey][tag] = results


In [None]:
models = [
    "swetha3456/thermal-rgb-controlnet-v3-contrast",
    "swetha3456/thermal-rgb-controlnet-v4-clahe"
]

conditioning_scales = [1.0]

v2_subfolders = [
    "checkpoint-1000",
    "checkpoint-1000-contrast-x2/controlnet",
    "checkpoint-2000-contrast-x2/controlnet",
    "checkpoint-3000-contrast-x2/controlnet",
    "checkpoint-4000-contrast-x2/controlnet",
    "checkpoint-5000-contrast-x2/controlnet"
]

v4_subfolders = [
    "checkpoint-1000/controlnet",
    "checkpoint-2000/controlnet",
    "checkpoint-3000/controlnet",
    "checkpoint-4000/controlnet",
    "checkpoint-5000/controlnet"
]

gt_sets = [
    ("original", rgb_images)
]

all_results = {}

for model_name in models:
    all_results[model_name] = {}

    for scale in conditioning_scales:
        all_results[model_name][scale] = {}

        if "v2" in model_name:
            subfolders = v2_subfolders
        elif "v3-contrast" in model_name or "v4" in model_name:
            subfolders = v4_subfolders
        else:
            subfolders = [None]

        for subfolder in subfolders:
            subkey = subfolder if subfolder is not None else "no_subfolder"
            all_results[model_name][scale][subkey] = {}

            for tag, gt_images in gt_sets:
                print("\n" + "=" * 90)
                print(f"Model: {model_name}")
                if subfolder is not None:
                    print(f"Subfolder: {subfolder}")
                print(f"Conditioning scale: {scale}")
                print(f"GT type: {tag}")
                print("=" * 90 + "\n")

                results = calculate_ssim_from_dataset(
                    model_name=model_name,
                    subfolder=subfolder,
                    thermal_images=thermal_images,
                    rgb_gt_images=gt_images,
                    prompts=prompts,
                    controlnet_conditioning_scale=scale,
                    num_inference_steps=50,
                    guidance_scale=7.5,
                )

                all_results[model_name][scale][subkey][tag] = results
