In [2]:
import pandas as pd

path_sg2 = './data/BEST/face/stylegan2/metadata.csv'
path_p = './data/BEST/face/palette/metadata.csv'

sg2_meta = pd.read_csv(path_sg2)
palette_meta = pd.read_csv(path_p)

In [5]:
sg2_meta = sg2_meta[sg2_meta['category'].str.contains('ffhq', na=False)]
print(sg2_meta['category'].unique())          
print(sg2_meta['category'].value_counts())
sg2_meta.head()





In [6]:
palette_meta.head()



In [None]:
path_sg2 = './data/BEST/face/stylegan2/metadata.csv'
path_p = './data/BEST/face/palette/metadata.csv'
sg2_meta = pd.read_csv(path_sg2)
palette_meta = pd.read_csv(path_p)
sg2_meta = sg2_meta[sg2_meta['category'].str.contains('ffhq', na=False)]

In [16]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

def azimuthal_average(image, center=None):
    """
    Compute the azimuthally averaged radial profile.
    
    Parameters:
        image (2D np.array): The 2D power spectrum.
        center (tuple, optional): The [x, y] center of the image. 
                                  If None, uses the center of the image.
    
    Returns:
        radial_prof (1D np.array): The azimuthally averaged power spectrum.
    """
    y, x = np.indices(image.shape)
    if center is None:
        center = np.array([(x.max()-x.min())/2.0, (y.max()-y.min())/2.0])
    # Calculate the distance of every pixel from the center
    r = np.hypot(x - center[0], y - center[1])
    # Convert distances to integer bins using Python's built-in int
    r_int = r.astype(int)
    
    # Sum and count values in each bin
    tbin = np.bincount(r_int.ravel(), image.ravel())
    nr = np.bincount(r_int.ravel())
    
    # Compute the radial average by dividing the summed values by the count in each bin
    radial_prof = tbin / (nr + 1e-8)
    return radial_prof

def process_image(image_path):
    """
    Load an image, convert to grayscale, compute FFT and its azimuthal power spectrum.
    
    Parameters:
        image_path (str): Path to the image file.
    
    Returns:
        radial_prof (1D np.array): Azimuthally averaged power spectrum.
    """
    # Open image and convert to grayscale
    img = Image.open(image_path).convert('L')
    img_array = np.array(img)
    
    # Compute 2D FFT and shift the zero frequency to the center
    f = np.fft.fft2(img_array)
    fshift = np.fft.fftshift(f)
    
    # Compute the power spectrum (squared magnitude)
    ps = np.abs(fshift)**2
    
    # Compute azimuthally averaged power spectrum
    radial_prof = azimuthal_average(ps)
    return radial_prof

def compute_mean_radial_profile(sample_df):
    """
    Process a DataFrame of image paths, compute each image's radial profile,
    and return the average profile over all images.
    
    Parameters:
        sample_df (pd.DataFrame): DataFrame with an 'image_path' column.
    
    Returns:
        mean_profile (1D np.array): Mean azimuthally averaged power spectrum.
    """
    radial_profiles = []
    
    for idx, row in sample_df.iterrows():
        image_path = row['image_path']
        if not os.path.exists(image_path):
            print(f"File not found: {image_path}")
            continue
        radial_prof = process_image(image_path)
        radial_profiles.append(radial_prof)
    
    # Ensure all profiles have the same length by padding shorter arrays with NaN
    max_len = max(len(prof) for prof in radial_profiles)
    padded_profiles = []
    for prof in radial_profiles:
        padded = np.full(max_len, np.nan)
        padded[:len(prof)] = prof
        padded_profiles.append(padded)
    
    # Compute the mean, ignoring NaN values
    mean_profile = np.nanmean(np.array(padded_profiles), axis=0)
    return mean_profile

# --- Main Workflow ---

# Load metadata CSVs for each model
path_sg2 = './data/BEST/face/stylegan2/metadata.csv'
path_p = './data/BEST/face/palette/metadata.csv'
sg2_meta = pd.read_csv(path_sg2)
palette_meta = pd.read_csv(path_p)
sg2_meta = sg2_meta[sg2_meta['category'].str.contains('ffhq', na=False)]

# Modify the image paths by prepending the correct base path
palette_meta['image_path'] = './data/BEST/face/palette/' + palette_meta['image_path']
sg2_meta['image_path'] = './data/BEST/face/stylegan2/' + sg2_meta['image_path']

# Randomly sample 1000 images from each dataset
palette_sample = palette_meta.sample(n=1000, random_state=42)
sg2_sample = sg2_meta.sample(n=1000, random_state=42)

# Compute mean azimuthal power spectrum for each model
palette_mean_profile = compute_mean_radial_profile(palette_sample)
sg2_mean_profile = compute_mean_radial_profile(sg2_sample)

# Plot the average frequency spectra for comparison
plt.figure(figsize=(10, 6))
plt.plot(palette_mean_profile, label='Palette')
plt.plot(sg2_mean_profile, label='StyleGAN2')
plt.xlabel('Frequency bin')
plt.ylabel('Power')
plt.title('Azimuthally Averaged Power Spectrum Comparison')
plt.legend()
plt.show()



In [17]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from scipy.stats import linregress

def azimuthal_average(image, center=None):
    """
    Compute the azimuthally averaged radial profile.
    """
    y, x = np.indices(image.shape)
    if center is None:
        center = np.array([(x.max()-x.min())/2.0, (y.max()-y.min())/2.0])
    # Calculate the distance of every pixel from the center
    r = np.hypot(x - center[0], y - center[1])
    # Convert distances to integer bins using Python's built-in int
    r_int = r.astype(int)
    
    # Sum and count values in each bin
    tbin = np.bincount(r_int.ravel(), image.ravel())
    nr = np.bincount(r_int.ravel())
    
    # Compute the radial average by dividing the summed values by the count in each bin
    radial_prof = tbin / (nr + 1e-8)
    return radial_prof

def process_image(image_path):
    """
    Load an image, convert to grayscale, compute FFT and its azimuthal power spectrum.
    """
    # Open image and convert to grayscale
    img = Image.open(image_path).convert('L')
    img_array = np.array(img)
    
    # Compute 2D FFT and shift the zero frequency to the center
    f = np.fft.fft2(img_array)
    fshift = np.fft.fftshift(f)
    
    # Compute the power spectrum (squared magnitude)
    ps = np.abs(fshift)**2
    
    # Compute azimuthally averaged power spectrum
    radial_prof = azimuthal_average(ps)
    return radial_prof

def compute_mean_radial_profile(sample_df):
    """
    Process a DataFrame of image paths, compute each image's radial profile,
    and return the average profile over all images.
    """
    radial_profiles = []
    
    for idx, row in sample_df.iterrows():
        image_path = row['image_path']
        if not os.path.exists(image_path):
            print(f"File not found: {image_path}")
            continue
        radial_prof = process_image(image_path)
        radial_profiles.append(radial_prof)
    
    # Ensure all profiles have the same length by padding shorter arrays with NaN
    max_len = max(len(prof) for prof in radial_profiles)
    padded_profiles = []
    for prof in radial_profiles:
        padded = np.full(max_len, np.nan)
        padded[:len(prof)] = prof
        padded_profiles.append(padded)
    
    # Compute the mean, ignoring NaN values
    mean_profile = np.nanmean(np.array(padded_profiles), axis=0)
    return mean_profile

def process_image_2d(image_path):
    """
    Process an image and return its full 2D power spectrum (without azimuthal averaging).
    """
    img = Image.open(image_path).convert('L')
    img_array = np.array(img)
    f = np.fft.fft2(img_array)
    fshift = np.fft.fftshift(f)
    ps = np.abs(fshift)**2
    return ps

def compute_mean_2d_power(sample_df):
    """
    Compute the average 2D power spectrum for a set of images.
    """
    sum_ps = None
    count = 0
    for idx, row in sample_df.iterrows():
        image_path = row['image_path']
        if not os.path.exists(image_path):
            print(f"File not found: {image_path}")
            continue
        ps = process_image_2d(image_path)
        if sum_ps is None:
            sum_ps = np.zeros_like(ps, dtype=np.float64)
        sum_ps += ps
        count += 1
    mean_ps = sum_ps / count if count > 0 else None
    return mean_ps

# --- Main Workflow ---

# Load metadata CSVs for each model
path_sg2 = './data/BEST/face/stylegan2/metadata.csv'
path_p   = './data/BEST/face/palette/metadata.csv'
sg2_meta = pd.read_csv(path_sg2)
palette_meta = pd.read_csv(path_p)
# Filter for a specific category if needed
sg2_meta = sg2_meta[sg2_meta['category'].str.contains('ffhq', na=False)]

# Modify the image paths by prepending the correct base path
palette_meta['image_path'] = './data/BEST/face/palette/' + palette_meta['image_path']
sg2_meta['image_path']     = './data/BEST/face/stylegan2/' + sg2_meta['image_path']

# Randomly sample 1000 images from each dataset
palette_sample = palette_meta.sample(n=1000, random_state=42)
sg2_sample = sg2_meta.sample(n=1000, random_state=42)

# Compute mean azimuthal power spectrum for each model (1D radial profile)
palette_mean_profile = compute_mean_radial_profile(palette_sample)
sg2_mean_profile     = compute_mean_radial_profile(sg2_sample)

### 1. LOG-LOG PLOT (DC REMOVED)
# Remove the first bin (DC component)
freq_bins = np.arange(len(palette_mean_profile))[1:]

plt.figure(figsize=(10, 6))
plt.plot(freq_bins, palette_mean_profile[1:], label='Palette')
plt.plot(freq_bins, sg2_mean_profile[1:], label='StyleGAN2')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Frequency bin (log scale)')
plt.ylabel('Power (log scale)')
plt.title('Log-Log Azimuthally Averaged Power Spectrum (DC removed)')
plt.legend()
plt.show()

### 2. PLOT OF LOG10 OF THE RADIAL PROFILE
plt.figure(figsize=(10, 6))
plt.plot(np.log10(palette_mean_profile + 1e-8), label='Palette (log10)')
plt.plot(np.log10(sg2_mean_profile + 1e-8), label='StyleGAN2 (log10)')
plt.xlabel('Frequency bin')
plt.ylabel('Log10(Power)')
plt.title('Azimuthally Averaged Power Spectrum (log10 transformed)')
plt.legend()
plt.show()

### 3. SLOPE ESTIMATION IN LOG-LOG SPACE
# Here we fit a line to the log-log data in a chosen frequency range.
# Adjust the range as needed.
start_idx, end_idx = 5, 50  # Example indices for mid-range frequencies
freq = np.arange(start_idx, end_idx)
log_freq = np.log10(freq)

log_palette = np.log10(palette_mean_profile[start_idx:end_idx] + 1e-8)
log_sg2     = np.log10(sg2_mean_profile[start_idx:end_idx] + 1e-8)

slope_palette, intercept_palette, r_value, p_value, std_err = linregress(log_freq, log_palette)
slope_sg2, intercept_sg2, r_value2, p_value2, std_err2 = linregress(log_freq, log_sg2)

plt.figure(figsize=(10, 6))
plt.plot(log_freq, log_palette, 'o', label='Palette Data')
plt.plot(log_freq, intercept_palette + slope_palette*log_freq, 'r', 
         label=f'Palette Fit (slope={slope_palette:.2f})')
plt.plot(log_freq, log_sg2, 'o', label='StyleGAN2 Data')
plt.plot(log_freq, intercept_sg2 + slope_sg2*log_freq, 'g', 
         label=f'StyleGAN2 Fit (slope={slope_sg2:.2f})')
plt.xlabel('Log10(Frequency)')
plt.ylabel('Log10(Power)')
plt.title('Slope Estimation in Log-Log Space')
plt.legend()
plt.show()

### 4. 2D AVERAGE POWER SPECTRUM VISUALIZATION
# Compute the average 2D power spectrum for each model.
palette_mean_ps = compute_mean_2d_power(palette_sample)
sg2_mean_ps     = compute_mean_2d_power(sg2_sample)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.imshow(np.log10(palette_mean_ps + 1e-8), cmap='viridis')
plt.colorbar(label='Log10(Power)')
plt.title('Palette: Average 2D Power Spectrum')

plt.subplot(1, 2, 2)
plt.imshow(np.log10(sg2_mean_ps + 1e-8), cmap='viridis')
plt.colorbar(label='Log10(Power)')
plt.title('StyleGAN2: Average 2D Power Spectrum')

plt.tight_layout()
plt.show()









In [23]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
from scipy.linalg import sqrtm
import lpips

# Set device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

###############################################################################
#                             FID IMPLEMENTATION                              #
###############################################################################

# Define a transformation for the Inception model:
# Resize images to 299x299 and normalize according to Inception requirements.
fid_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def get_activations(image_paths, model, batch_size=50):
    """
    Extract activations (features) from the pre-trained model for a list of images.
    """
    model.eval()
    activations = []
    with torch.no_grad():
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            images = []
            for path in batch_paths:
                try:
                    img = Image.open(path).convert('RGB')
                except Exception as e:
                    print(f"Error opening {path}: {e}")
                    continue
                img = fid_transform(img)
                images.append(img)
            if not images:
                continue
            images = torch.stack(images).to(device)
            pred = model(images)
            activations.append(pred.cpu().numpy())
    activations = np.concatenate(activations, axis=0)
    return activations

# Load pre-trained InceptionV3 model and modify it to output features.
inception = models.inception_v3(pretrained=True, transform_input=False).to(device)
# Remove the final fully connected layer by replacing it with an identity function.
inception.fc = torch.nn.Identity()

def calculate_fid(act1, act2):
    """
    Calculate the Fréchet Inception Distance given two sets of activations.
    """
    mu1 = np.mean(act1, axis=0)
    mu2 = np.mean(act2, axis=0)
    sigma1 = np.cov(act1, rowvar=False)
    sigma2 = np.cov(act2, rowvar=False)
    ssdiff = np.sum((mu1 - mu2) ** 2)
    covmean = sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid

# Example: Get image paths from your DataFrames (adjust based on your code)
# Assume palette_sample and sg2_sample have an 'image_path' column.
palette_image_paths = palette_sample['image_path'].tolist()
sg2_image_paths = sg2_sample['image_path'].tolist()

# Extract activations for both sets.
activations_palette = get_activations(palette_image_paths, inception)
activations_sg2     = get_activations(sg2_image_paths, inception)

# Compute the FID score.
fid_value = calculate_fid(activations_palette, activations_sg2)
print("FID between Palette and StyleGAN2:", fid_value)

###############################################################################
#                        LPIPS (PERCEPTUAL METRIC)                            #
###############################################################################

# Initialize the LPIPS model. You can choose 'alex' or 'vgg' as the backbone.
lpips_loss_fn = lpips.LPIPS(net='alex').to(device)

# Define a transform for LPIPS.
lpips_transform = transforms.Compose([
    transforms.Resize((256, 256)),  # LPIPS typically works on 256x256 images.
    transforms.ToTensor()
])

def compute_lpips_score(image_path1, image_path2):
    """
    Compute the LPIPS score between two images.
    LPIPS expects images in the range [-1, 1], so we scale them accordingly.
    """
    try:
        img1 = Image.open(image_path1).convert('RGB')
        img2 = Image.open(image_path2).convert('RGB')
    except Exception as e:
        print(f"Error opening images: {e}")
        return None
    img1 = lpips_transform(img1).unsqueeze(0).to(device)
    img2 = lpips_transform(img2).unsqueeze(0).to(device)
    # Scale images from [0, 1] to [-1, 1]
    img1 = 2 * img1 - 1
    img2 = 2 * img2 - 1
    with torch.no_grad():
        lpips_val = lpips_loss_fn(img1, img2)
    return lpips_val.item()

# Compute the average LPIPS score for a set of paired images.
# Here, we assume a one-to-one correspondence (or you may randomly pair images).
num_pairs = 400  # Change as needed.
lpips_scores = []
for i in range(num_pairs):
    score = compute_lpips_score(palette_image_paths[i], sg2_image_paths[i])
    if score is not None:
        lpips_scores.append(score)

if lpips_scores:
    avg_lpips = np.mean(lpips_scores)
    print("Average LPIPS between Palette and StyleGAN2:", avg_lpips)
else:
    print("No valid LPIPS scores computed.")





In [24]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
from scipy.linalg import sqrtm
import lpips

# Set device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

###############################################################################
#                             FID IMPLEMENTATION                              #
###############################################################################

# Define a transformation for the Inception model:
# Resize images to 299x299 and normalize according to Inception requirements.
fid_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def get_activations(image_paths, model, batch_size=50):
    """
    Extract activations (features) from the pre-trained model for a list of images.
    """
    model.eval()
    activations = []
    with torch.no_grad():
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            images = []
            for path in batch_paths:
                try:
                    img = Image.open(path).convert('RGB')
                except Exception as e:
                    print(f"Error opening {path}: {e}")
                    continue
                img = fid_transform(img)
                images.append(img)
            if not images:
                continue
            images = torch.stack(images).to(device)
            pred = model(images)
            activations.append(pred.cpu().numpy())
    activations = np.concatenate(activations, axis=0)
    return activations

# Load pre-trained InceptionV3 model and modify it to output features.
inception = models.inception_v3(pretrained=True, transform_input=False).to(device)
# Remove the final fully connected layer by replacing it with an identity function.
inception.fc = torch.nn.Identity()

def calculate_fid(act1, act2):
    """
    Calculate the Fréchet Inception Distance given two sets of activations.
    """
    mu1 = np.mean(act1, axis=0)
    mu2 = np.mean(act2, axis=0)
    sigma1 = np.cov(act1, rowvar=False)
    sigma2 = np.cov(act2, rowvar=False)
    ssdiff = np.sum((mu1 - mu2) ** 2)
    covmean = sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid

# Example: Get image paths from your DataFrames (adjust based on your code)
# Assume palette_sample and sg2_sample have an 'image_path' column.
path_gan1 = './data/BEST/everything/big_gan/metadata.csv'
path_d1 = './data/BEST/everything/vq_diffusion/metadata.csv'
big_gan_e = pd.read_csv(path_gan1)
vq_diffusion_e = pd.read_csv(path_d1)

big_gan_e['image_path']      = './data/BEST/everything/big_gan/' + big_gan_e['image_path']
vq_diffusion_e['image_path'] = './data/BEST/everything/vq_diffusion/' + vq_diffusion_e['image_path']

big_gan_image_paths = big_gan_e['image_path'].tolist()
vqd_image_paths     = vq_diffusion_e['image_path'].tolist()

# Extract activations for both sets.
activations_bg  = get_activations(big_gan_image_paths, inception)
activations_vqd = get_activations(vqd_image_paths, inception)

# Compute the FID score.
fid_value = calculate_fid(activations_vqd, activations_bg)
print("FID between diffusion_vq and Big Gan:", fid_value)

###############################################################################
#                        LPIPS (PERCEPTUAL METRIC)                            #
###############################################################################

# Initialize the LPIPS model. You can choose 'alex' or 'vgg' as the backbone.
lpips_loss_fn = lpips.LPIPS(net='alex').to(device)

# Define a transform for LPIPS.
lpips_transform = transforms.Compose([
    transforms.Resize((256, 256)),  # LPIPS typically works on 256x256 images.
    transforms.ToTensor()
])

def compute_lpips_score(image_path1, image_path2):
    """
    Compute the LPIPS score between two images.
    LPIPS expects images in the range [-1, 1], so we scale them accordingly.
    """
    try:
        img1 = Image.open(image_path1).convert('RGB')
        img2 = Image.open(image_path2).convert('RGB')
    except Exception as e:
        print(f"Error opening images: {e}")
        return None
    img1 = lpips_transform(img1).unsqueeze(0).to(device)
    img2 = lpips_transform(img2).unsqueeze(0).to(device)
    # Scale images from [0, 1] to [-1, 1]
    img1 = 2 * img1 - 1
    img2 = 2 * img2 - 1
    with torch.no_grad():
        lpips_val = lpips_loss_fn(img1, img2)
    return lpips_val.item()

# Compute the average LPIPS score for a set of paired images.
# Here, we assume a one-to-one correspondence (or you may randomly pair images).
num_pairs = 400  # Change as needed.
lpips_scores = []
for i in range(num_pairs):
    score = compute_lpips_score(big_gan_image_paths[i], vqd_image_paths[i])
    if score is not None:
        lpips_scores.append(score)

if lpips_scores:
    avg_lpips = np.mean(lpips_scores)
    print("Average LPIPS between diffusion_vq and Big Gan:", avg_lpips)
else:
    print("No valid LPIPS scores computed.")







In [25]:
import os
import numpy as np
import pandas as pd
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
from scipy.linalg import sqrtm
import lpips

# Set device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

###############################################################################
#                             FID IMPLEMENTATION                              #
###############################################################################

# Define a transformation for the Inception model:
# Resize images to 299x299 and normalize according to Inception requirements.
fid_transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def get_activations(image_paths, model, batch_size=50):
    """
    Extract activations (features) from the pre-trained model for a list of images.
    """
    model.eval()
    activations = []
    with torch.no_grad():
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i+batch_size]
            images = []
            for path in batch_paths:
                try:
                    img = Image.open(path).convert('RGB')
                except Exception as e:
                    print(f"Error opening {path}: {e}")
                    continue
                img = fid_transform(img)
                images.append(img)
            if not images:
                continue
            images = torch.stack(images).to(device)
            pred = model(images)
            activations.append(pred.cpu().numpy())
    activations = np.concatenate(activations, axis=0)
    return activations

# Load pre-trained InceptionV3 model and modify it to output features.
inception = models.inception_v3(pretrained=True, transform_input=False).to(device)
# Remove the final fully connected layer by replacing it with an identity function.
inception.fc = torch.nn.Identity()

def calculate_fid(act1, act2):
    """
    Calculate the Fréchet Inception Distance given two sets of activations.
    """
    mu1 = np.mean(act1, axis=0)
    mu2 = np.mean(act2, axis=0)
    sigma1 = np.cov(act1, rowvar=False)
    sigma2 = np.cov(act2, rowvar=False)
    ssdiff = np.sum((mu1 - mu2) ** 2)
    covmean = sqrtm(sigma1.dot(sigma2))
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = ssdiff + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid

# Example: Get image paths from your DataFrames (adjust based on your code)
# Assume palette_sample and sg2_sample have an 'image_path' column.
path_gan2 = './data/BEST/bedroom_comparison/gansformer/metadata.csv'
path_d2 = './data/BEST/bedroom_comparison/ddpm/metadata.csv'

gansformer_b = pd.read_csv(path_gan2)
ddpm_b = pd.read_csv(path_d2)

gansformer_b['image_path'] = './data/BEST/bedroom_comparison/gansformer/' + gansformer_b['image_path']
ddpm_b['image_path']       = './data/BEST/bedroom_comparison/ddpm/' + ddpm_b['image_path']

gansformer_image_paths = gansformer_b['image_path'].tolist()
ddpm_image_paths       = ddpm_b['image_path'].tolist()

# Extract activations for both sets.
activations_g  = get_activations(gansformer_image_paths, inception)
activations_ddpm = get_activations(ddpm_image_paths, inception)

# Compute the FID score.
fid_value = calculate_fid(activations_ddpm, activations_g)
print("FID between DDPM and Gansformer:", fid_value)

###############################################################################
#                        LPIPS (PERCEPTUAL METRIC)                            #
###############################################################################

# Initialize the LPIPS model. You can choose 'alex' or 'vgg' as the backbone.
lpips_loss_fn = lpips.LPIPS(net='alex').to(device)

# Define a transform for LPIPS.
lpips_transform = transforms.Compose([
    transforms.Resize((256, 256)),  # LPIPS typically works on 256x256 images.
    transforms.ToTensor()
])

def compute_lpips_score(image_path1, image_path2):
    """
    Compute the LPIPS score between two images.
    LPIPS expects images in the range [-1, 1], so we scale them accordingly.
    """
    try:
        img1 = Image.open(image_path1).convert('RGB')
        img2 = Image.open(image_path2).convert('RGB')
    except Exception as e:
        print(f"Error opening images: {e}")
        return None
    img1 = lpips_transform(img1).unsqueeze(0).to(device)
    img2 = lpips_transform(img2).unsqueeze(0).to(device)
    # Scale images from [0, 1] to [-1, 1]
    img1 = 2 * img1 - 1
    img2 = 2 * img2 - 1
    with torch.no_grad():
        lpips_val = lpips_loss_fn(img1, img2)
    return lpips_val.item()

# Compute the average LPIPS score for a set of paired images.
# Here, we assume a one-to-one correspondence (or you may randomly pair images).
num_pairs = 400  # Change as needed.
lpips_scores = []
for i in range(num_pairs):
    score = compute_lpips_score(gansformer_image_paths[i], ddpm_image_paths[i])
    if score is not None:
        lpips_scores.append(score)

if lpips_scores:
    avg_lpips = np.mean(lpips_scores)
    print("Average LPIPS between DDPM and Gansformer:", avg_lpips)
else:
    print("No valid LPIPS scores computed.")









In the Algorithm section (C1), you should explain the principles behind deep neural networks, the structure of U-Net, the purpose of skip connections, and the mathematical definition of cross-entropy loss, etc.

In the Network Design section (C2), you can discuss how many layers you used to build your U-Net, how you defined the weights for the weighted CE loss, and other design choices.

Review diagrams (dimensions etc)
Add mathematical definition (cross-entropy etc)
Purpose of skip connections
