In [1]:
# Cell 1: Imports
import os
import numpy as np
import glob
from PIL import Image
import torch
from tqdm import tqdm
import matplotlib.pyplot as plt

# Import metrics
from metrics import (
    calculate_psnr, 
    calculate_ssim, 
    calculate_fourier_spectrum_similarity,
    calculate_phase_consistency,
    calculate_lpips,
    tensor2img
)

In [2]:
# Cell 2: LPIPS Check
# Try to import LPIPS if available
try:
    import lpips
    LPIPS_AVAILABLE = True
except ImportError:
    LPIPS_AVAILABLE = False
    print("LPIPS not available. Install with: pip install lpips")


In [3]:
# Cell 3: Evaluation Function
def evaluate_model(model_path, test_data_path, output_dir, device='cuda'):
    """
    Evaluate a trained super-resolution model
    
    Args:
        model_path: Path to the trained model checkpoint
        test_data_path: Path to test dataset
        output_dir: Directory to save evaluation results
        device: Device to run evaluation on
    """
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Load model
    from resdiff import ResDiffModel
    model = ResDiffModel().to(device)
    
    checkpoint = torch.load(model_path, map_location=device)
    if 'model_state_dict' in checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.load_state_dict(checkpoint)
    
    model.eval()
    
    # Initialize LPIPS if available
    if LPIPS_AVAILABLE:
        lpips_fn = lpips.LPIPS(net='alex').to(device)
    
    # Load test dataset
    from dataset import SuperResolutionDataset
    test_dataset = SuperResolutionDataset(test_data_path)
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=1, shuffle=False, num_workers=4
    )
    
    # Initialize metrics
    psnr_values = []
    ssim_values = []
    fourier_sim_values = []
    phase_consistency_values = []
    lpips_values = []
    
    # Evaluate on test set
    with torch.no_grad():
        for i, (lr, hr) in enumerate(tqdm(test_loader, desc="Evaluating")):
            lr, hr = lr.to(device), hr.to(device)
            
            # Generate super-resolution image
            sr = model(lr)
            
            # Convert to numpy for metrics calculation
            sr_np = tensor2img(sr[0])
            hr_np = tensor2img(hr[0])
            
            # Calculate metrics
            psnr = calculate_psnr(sr_np, hr_np)
            ssim_val = calculate_ssim(sr_np, hr_np)
            fourier_sim = calculate_fourier_spectrum_similarity(sr_np, hr_np)
            phase_cons = calculate_phase_consistency(sr_np, hr_np)
            
            # Calculate LPIPS if available
            if LPIPS_AVAILABLE:
                lpips_val = calculate_lpips(sr, hr, lpips_fn)
                lpips_values.append(lpips_val)
            
            # Store metrics
            psnr_values.append(psnr)
            ssim_values.append(ssim_val)
            fourier_sim_values.append(fourier_sim)
            phase_consistency_values.append(phase_cons)
            
            # Save sample images every 10 images
            if i % 10 == 0:
                save_path = os.path.join(output_dir, f"sample_{i}.png")
                plt.figure(figsize=(15, 5))
                
                plt.subplot(1, 3, 1)
                plt.imshow(tensor2img(lr[0]))
                plt.title("Low Resolution")
                plt.axis('off')
                
                plt.subplot(1, 3, 2)
                plt.imshow(sr_np)
                plt.title(f"Super Resolution (PSNR: {psnr:.2f})")
                plt.axis('off')
                
                plt.subplot(1, 3, 3)
                plt.imshow(hr_np)
                plt.title("High Resolution")
                plt.axis('off')
                
                plt.tight_layout()
                plt.savefig(save_path)
                plt.close()
    
    # Calculate average metrics
    avg_psnr = np.mean(psnr_values)
    avg_ssim = np.mean(ssim_values)
    avg_fourier_sim = np.mean(fourier_sim_values)
    avg_phase_cons = np.mean(phase_consistency_values)
    
    if LPIPS_AVAILABLE:
        avg_lpips = np.mean(lpips_values)
    
    # Print results
    print("\nEvaluation Results:")
    print(f"Average PSNR: {avg_psnr:.4f}")
    print(f"Average SSIM: {avg_ssim:.4f}")
    print(f"Average Fourier Spectrum Similarity: {avg_fourier_sim:.4f}")
    print(f"Average Phase Consistency: {avg_phase_cons:.4f}")
    
    if LPIPS_AVAILABLE:
        print(f"Average LPIPS: {avg_lpips:.4f}")
    
    # Save results to file
    with open(os.path.join(output_dir, "evaluation_results.txt"), "w") as f:
        f.write(f"Average PSNR: {avg_psnr:.4f}\n")
        f.write(f"Average SSIM: {avg_ssim:.4f}\n")
        f.write(f"Average Fourier Spectrum Similarity: {avg_fourier_sim:.4f}\n")
        f.write(f"Average Phase Consistency: {avg_phase_cons:.4f}\n")
        
        if LPIPS_AVAILABLE:
            f.write(f"Average LPIPS: {avg_lpips:.4f}\n")
    
    return {
        "psnr": avg_psnr,
        "ssim": avg_ssim,
        "fourier_sim": avg_fourier_sim,
        "phase_cons": avg_phase_cons,
        "lpips": avg_lpips if LPIPS_AVAILABLE else None
    }

In [4]:
# Cell 4: Run Evaluation
# Set evaluation parameters
model_path = "results/resdiff_final.pth"  # Path to your trained model
test_data_path = "datasets/test"  # Path to your test dataset
output_dir = "evaluation_results"  # Directory to save results
device = "cuda" if torch.cuda.is_available() else "cpu"  # Use GPU if available

# Run evaluation
results = evaluate_model(
    model_path=model_path,
    test_data_path=test_data_path,
    output_dir=output_dir,
    device=device
)

Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]




Loading model from: c:\Users\Dinesh\AppData\Local\Programs\Python\Python311\Lib\site-packages\lpips\weights\v0.1\alex.pth


Evaluating: 100%|██████████| 376/376 [01:10<00:00,  5.35it/s]


Evaluation Results:
Average PSNR: 38.8085
Average SSIM: 0.9049
Average Fourier Spectrum Similarity: 0.9979
Average Phase Consistency: 0.4902
Average LPIPS: 0.1974





```
Let me analyze these scores for you:
PSNR (Peak Signal-to-Noise Ratio): 21.31 dB
This is a moderate score for super-resolution
Typical ranges:
Excellent: > 30 dB
Good: 25-30 dB
Moderate: 20-25 dB
Poor: < 20 dB
Your score of 21.31 dB indicates moderate performance

SSIM (Structural Similarity Index): 0.8521
This is a good score
SSIM ranges from 0 to 1, where 1 is perfect
Your score of 0.8521 indicates good structural preservation
Typical ranges:
Excellent: > 0.95
Good: 0.85-0.95
Moderate: 0.75-0.85
Poor: < 0.75

Fourier Spectrum Similarity: 0.9992
This is an excellent score
The score of 0.9992 indicates very good preservation of frequency domain information
This suggests your model is doing well at maintaining the frequency characteristics of the images

Phase Consistency: 0.4556
This is a moderate score
Phase consistency ranges from 0 to 1, where 1 is perfect alignment
Your score of 0.4556 indicates room for improvement in phase alignment
This might be an area to focus on for improvement

LPIPS (Learned Perceptual Image Patch Similarity): 0.3773
This is a moderate score
LPIPS ranges from 0 to 1, where 0 is better (more similar)
Your score of 0.3773 indicates moderate perceptual similarity
Typical ranges:
Excellent: < 0.2
Good: 0.2-0.3
Moderate: 0.3-0.4
Poor: > 0.4
```
