In [5]:
import torch
from torchvision.models import inception_v3
import numpy as np
from scipy.linalg import sqrtm

import torch
from torchvision.models import inception_v3

def get_features(images, model, device='cpu'):
    # Ensure the model is set to evaluation mode
    model.eval()
    
    # Move the model to the specified device (CPU or GPU)
    model.to(device)
    
    with torch.no_grad():  # Ensure no gradients are computed
        # Check if images need to be unsqueezed (add batch dimension)
        if images.ndim == 3:
            images = images.unsqueeze(0)
        images = images.to(device)
        
        # Extract features using the model
        features = model(images)
        
        # Convert features to NumPy array after detaching from the current graph
        features = features.detach().cpu().numpy()
    
    return features

# Load the Inception v3 model
model = inception_v3(pretrained=True, transform_input=False)
model.aux_logits = False  # Disable the auxiliary classifier
model.fc = torch.nn.Identity()  # Modify the model to output features from the last but one layer (pooling layer)

# Now, you can use the `get_features` function to extract features from your images


def calculate_fid(real_features, gen_features):
    # Ensure inputs are numpy arrays
    real_features = np.array(real_features)
    gen_features = np.array(gen_features)
    
    # Calculate the mean and covariance of the real and generated features
    mu_real = np.mean(real_features, axis=0)
    mu_gen = np.mean(gen_features, axis=0)
    cov_real = np.cov(real_features, rowvar=False)
    cov_gen = np.cov(gen_features, rowvar=False)
    
    # Check the shapes of the covariance matrices
    if cov_real.shape != cov_gen.shape or cov_real.ndim != 2:
        raise ValueError("Covariance matrices should be square and of the same size.")
    
    # Compute the squared difference of the means
    mean_diff = np.sum((mu_real - mu_gen)**2)
    
    # Compute the square root of the product of covariances
    covmean = sqrtm(cov_real.dot(cov_gen), disp=False)[0]
    
    # Handle possible complex numbers due to numerical errors
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    
    # Compute the Fréchet Inception Distance
    fid = mean_diff + np.trace(cov_real + cov_gen - 2 * covmean)
    return fid


# Load images
from torchvision import transforms
from PIL import Image

def load_and_transform_image(image_path):
    # Define the transformation
    transform = transforms.Compose([
        transforms.Resize((299, 299)),      # Resize the image to 299x299 for Inception v3
        transforms.ToTensor(),              # Convert the image to a tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for Inception v3
    ])
    
    # Load the image
    image = Image.open(image_path).convert('RGB')  # Convert image to RGB, in case it's not
    # Apply the transformation
    tensor_image = transform(image)
    return tensor_image

# Example usage:
real_image_path = 'C:\\Users\\User\\Desktop\\Aryan\\Western University Work\\9873 - BIAI\\pytorch-stable-diffusion\\images\\cat_actual.jpg'
real_image_tensor = load_and_transform_image(real_image_path)

generated_image_path = 'C:\\Users\\User\\Desktop\\Aryan\\Western University Work\\9873 - BIAI\\pytorch-stable-diffusion\\images\\cat.jpg'
generated_image_tensor = load_and_transform_image(generated_image_path)

# If you need to handle multiple images, you can batch them:
# tensor_image = tensor_image.unsqueeze(0)  # Add a batch dimension if processing a single image

real_image_tensor = real_image_tensor.unsqueeze(0)
generated_image_tensor = generated_image_tensor.unsqueeze(0)

real_features = get_features(real_image_tensor, model)
gen_features = get_features(generated_image_tensor, model)

fid_score = calculate_fid(real_features, gen_features)
print('FID score:', fid_score)


ValueError: Covariance matrices should be square and of the same size.