In [None]:
%pip install lpips

In [None]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

import torch
import lpips
from torchvision import transforms
from diffusers import AutoencoderKL
from transformers import CLIPFeatureExtractor
from torchvision.transforms import functional as TF

In [None]:
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse")
feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32")
lpips_model = lpips.LPIPS(net="alex")

In [None]:
def load_image(path):
    image = Image.open(path).convert("RGB")
    transform = transforms.Compose(
        [
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        ]
    )

    return transform(image).unsqueeze(0)

In [None]:
def reconstruct_image(image_path):
    input_image = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(images=input_image, return_tensors="pt")
    input_tensor = inputs.pixel_values

    with torch.no_grad():
        latent = vae.encode(input_tensor).latent_dist.sample()
        reconstructed_image = vae.decode(latent).sample

    reconstructed_image_resized = TF.resize(reconstructed_image.squeeze(0), (256, 256))

    return reconstructed_image_resized

In [None]:
def calculate_lpips(image_path):
    try:
        original_image = load_image(image_path)
        reconstructed_image = reconstruct_image(image_path)
        reconstructed_image = reconstructed_image.unsqueeze(0)
        lpips_distance = lpips_model(original_image, reconstructed_image)

        return lpips_distance.item()

    except Exception as e:
        print(f"Error calculating LPIPS for {image_path}: {e}")

        return None

In [None]:
def process_dataset(base_path, output_csv, max_samples_per_class=1000):
    data = []
    class_sample_counts = {
        0: 0,
        1: 0,
    }  # Track samples for each class (real: 0, fake: 1)

    for class_label, class_folder in enumerate(["REAL_temp", "FAKE_temp"]):
        folder_path = os.path.join(base_path, class_folder)
        for filename in os.listdir(folder_path):
            if class_sample_counts[class_label] >= max_samples_per_class:
                break  # Stop processing this class if the limit is reached

            if filename.endswith((".jpg", ".png", ".jpeg")):
                image_path = os.path.join(folder_path, filename)
                lpips_distance = calculate_lpips(image_path)

                if lpips_distance is not None:
                    data.append(
                        {
                            "Image": filename,
                            "LPIPS_Distance": lpips_distance,
                            "Class": class_label,
                        }
                    )
                    class_sample_counts[class_label] += 1

    # Save to CSV
    df = pd.DataFrame(data)
    df.to_csv(output_csv, index=False)
    return df

In [None]:
def perform_logistic_regression(csv_file):
    df = pd.read_csv(csv_file)
    X = df["LPIPS_Distance"].values.reshape(-1, 1)
    y = df["Class"].values

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

    # Train logistic regression
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred))


In [None]:
base_path = "/content/train"
output_csv = "lpips_results.csv"
max_samples_per_class = 1000


print("Processing dataset...")
df = process_dataset(base_path, output_csv, max_samples_per_class)
print(f"CSV saved at {output_csv}")

# Perform logistic regression
print("Performing Logistic Regression...")
perform_logistic_regression(output_csv)