# **Image Processing**

import packages

In [None]:
from PIL import Image
import os

reszing the image to 224x224 pixels and adding padding to images.

In [None]:

def resize_and_pad(image, size=(224, 224), color=(0, 0, 0)):
    """Resize while keeping aspect ratio, then pad to size."""
    image.thumbnail(size, Image.LANCZOS)
    new_img = Image.new("RGB", size, color)
    left = (size[0] - image.width) // 2
    top = (size[1] - image.height) // 2
    new_img.paste(image, (left, top))
    return new_img

looping through all images to process them and save them to a new directory

In [None]:
def process_images(input_dir, output_dir, size=(224, 224)):
    os.makedirs(output_dir, exist_ok=True)
    for root, _, files in os.walk(input_dir):
        rel_path = os.path.relpath(root, input_dir)
        output_subdir = os.path.join(output_dir, rel_path)
        os.makedirs(output_subdir, exist_ok=True)

        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                input_path = os.path.join(root, file)
                output_path = os.path.join(output_subdir, file)

                with Image.open(input_path).convert("RGB") as img:
                    resized_img = resize_and_pad(img, size)
                    resized_img.save(output_path, format="JPEG", quality=95)
                    print(f"Processed: {output_path}")

**Run Function**

In [None]:
input_dir= #TODO Replace With Your Input Directory
output_dir= #TODO Replace With Your Output Directory for Inages
size = (224, 224)

process_images(input_dir, output_dir, size)

# **Creating Dataset**

import packages

In [1]:
import torch
from torch import nn
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from collections import defaultdict
import seaborn as sns
import matplotlib.pyplot as plt
from collections import defaultdict
import random
from shutil import copy2
from pathlib import Path
import torchvision.models as models
from torchvision.models.vision_transformer import EncoderBlock

KeyboardInterrupt: 

classify all the images as either 0 (real) or 1 (fake)

In [None]:
class RealFakeDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.transform = transform
        self.samples = []

        for label_dir in ["real_images", "fake_images"]:
            label = 0 if label_dir == "real_images" else 1
            label_path = os.path.join(root_dir, label_dir)

            for root, _, files in os.walk(label_path):
                for img_file in files:
                    if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                        img_path = os.path.join(root, img_file)
                        self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

define transforms to transform image to tensor and normalize

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

transformation for ViT

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )
])

load the dataset

In [None]:

data_dir = #TODO "Replace With Directory of Dataset"
dataset = RealFakeDataset(data_dir, transform=transform)

Split data into train/test sets

In [None]:
random.seed(42)

# Group all samples by (source, category)
grouped = defaultdict(list)

for idx, (path, label) in enumerate(dataset.samples):
    parts = path.replace("\\", "/").split("/")

    if "fake_images" in parts:
        category = parts[parts.index("fake_images") + 1].lower()
        if "chat" in path.lower():
            source = "chatGPT"
        elif "gemeni" in path.lower():
            source = "gemeni"
        else:
            continue
    elif "real_images" in parts:
        category = parts[parts.index("real_images") + 1].lower()
        source = "real"
    else:
        continue

    grouped[(source, category)].append(idx)

# From each group, select 5 for test, rest for train
test_idx = []
train_idx = []

for (source, category), idxs in grouped.items():
    if len(idxs) < 6:
        continue

    random.shuffle(idxs)
    test_idx.extend(idxs[:5])
    train_idx.extend(idxs[5:])  

# Create Subsets
train_ds = Subset(dataset, train_idx)
test_ds = Subset(dataset, test_idx)

save_root = Path("saved_test_set")
save_root.mkdir(parents=True, exist_ok=True)

# Iterate through test samples and save them into group folders
for idx in test_idx:
    path, label = dataset.samples[idx]
    parts = path.replace("\\", "/").split("/")

    # Determine source
    if "real_images" in parts:
        group = "real"
        category = parts[parts.index("real_images") + 1]
    elif "fake_images" in parts:
        category = parts[parts.index("fake_images") + 1]
        lower_path = path.lower()
        if "chat" in lower_path:
            group = "chatGPT"
        elif "gemeni" in lower_path:
            group = "gemeni"
        else:
            continue
    else:
        continue

    dest_dir = save_root / group / category
    dest_dir.mkdir(parents=True, exist_ok=True)

    dest_path = dest_dir / os.path.basename(path)
    copy2(path, dest_path)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=32)

# **Loading Different Models**

ResForm Model Class

In [None]:
class FrozenResNet50Transformer(nn.Module):
    def __init__(self, num_classes=2, embed_dim=512, num_heads=4, num_layers=4):
        super(FrozenResNet50Transformer, self).__init__()

        resnet = models.resnet50(pretrained=True)
        self.cnn_backbone = nn.Sequential(*list(resnet.children())[:-2])

        for name, param in resnet.named_parameters():
            if "layer4" in name or "fc" in name:
                param.requires_grad = True

        self.flatten = nn.Flatten(2)
        self.transpose = lambda x: x.transpose(1, 2)

        self.pos_embed = nn.Parameter(torch.randn(1, 49, embed_dim))


        self.transformer = nn.Sequential(
            *[EncoderBlock(
            num_heads=num_heads,
            hidden_dim=embed_dim,
            mlp_dim=embed_dim * 4,
            dropout=0.1,
            attention_dropout=0.1
            ) 
            for _ in range(num_layers)]
        )

        self.proj = nn.Linear(2048, embed_dim)

        self.cls_head = nn.Sequential(
            nn.LayerNorm(embed_dim),
            nn.Linear(embed_dim, num_classes)
        )

    def forward(self, x):
        x = self.cnn_backbone(x)
        x = self.flatten(x)
        x = self.transpose(x)    
        x = self.proj(x)
        x = x + self.pos_embe
        x = self.transformer(x)
        x = x.mean(dim=1)
        out = self.cls_head(x)
        return out

set the device to GPU if possible

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

load the ResNet18 model

In [None]:
from torchvision.models import resnet18
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

load the ResNet50 model

In [None]:
from torchvision.models import resnet50
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

load the EfficientNetV2 Model

In [None]:
from torchvision.models import efficientnet_v2_s
model = efficientnet_v2_s(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
model = model.to(device)

load the EfficientNet-b0 Model

In [None]:
from torchvision.models import efficientnet_b0
model = efficientnet_b0(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
model = model.to(device)

load EfficientNet-b5 Model

In [None]:
from torchvision.models import efficientnet_b5
model = efficientnet_b5(pretrained=True)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
model = model.to(device)

load VGGnet Model

In [None]:
from torchvision.models import vgg16
model = vgg16(pretrained=True)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 2)
model = model.to(device)

load DenseNet Model

In [None]:
from torchvision.models import densenet121
model = densenet121(pretrained=True)
model.classifier = nn.Linear(model.classifier.in_features, 2)
model = model.to(device)

load ViT Model

In [None]:
from torchvision.models import vit_b_16
model = vit_b_16(weights="IMAGENET1K_V1") 
model.heads.head = nn.Linear(model.heads.head.in_features, 2)

load ResForm Model

In [None]:
model = FrozenResNet50Transformer(num_classes=2).to(device)

# **Training the Model**

using cross entropy for our loss function and AdamW as our optimizer

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-2)

Adjusted optimizer only if using __ResForm__

In [None]:
optimizer = torch.optim.AdamW([
    {
        "params": model.cnn_backbone.parameters(),
        "lr": 1e-5
    },
    {
        "params": model.transformer.parameters(),
        "lr": 1e-4
    },
    {
        "params": model.cls_head.parameters(),
        "lr": 1e-4
    }
], weight_decay=1e-2)

train the model while logging accuracy and loss per epoch

In [None]:
epochs = 10
epoch_accuracies = []
epoch_losses = []


for epoch in range(epochs):
    model.train()
    running_loss = 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    accuracy = correct / total

    epoch_accuracies.append(accuracy)
    epoch_losses.append(running_loss / len(train_loader))

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss:.4f}, Accuracy: {accuracy:.2%}")

# Save the model
if isinstance(model, FrozenResNet50Transformer):
    model_save_path = "resform_best.pt"
    torch.save(model.state_dict(), model_save_path)

# **Generate Output**

In [None]:
fig, ax1 = plt.subplots(figsize=(10, 5))

# plot: Accuracy over epochs
plt.figure(figsize=(8, 5))
sns.lineplot(x=range(1, epochs + 1), y=epoch_accuracies, marker="o")
plt.title("FrozenResNet50Transformer Accuracy per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.ylim(0, 1)
plt.grid(True)
plt.savefig("FrozenResNet50Transformer_accuracy_per_epoch.png", dpi=300, bbox_inches="tight")

# plot: Loss over epochs
plt.figure(figsize=(8, 5))
sns.lineplot(x=range(1, epochs + 1), y=epoch_losses, marker="x", color="red")
plt.title("FrozenResNet50Transformer Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.savefig("FrozenResNet50Transformer_loss_per_epoch.png", dpi=300, bbox_inches="tight")

# 7. Evaluate
model.eval()
all_preds = []
all_labels = []
all_paths = []

with torch.no_grad():
    for i, (images, labels) in enumerate(test_loader):
        batch_indices = test_idx[i * test_loader.batch_size : (i + 1) * test_loader.batch_size]
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1).cpu()
        all_preds.extend(preds.tolist())
        all_labels.extend(labels.tolist())
        all_paths.extend([dataset.samples[idx][0] for idx in batch_indices])  # get paths

# Print overall classification report
print(classification_report(
    all_labels,
    all_preds,
    labels=[0, 1],
    target_names=["real_images", "fake_images"]
))

cross_stats = defaultdict(lambda: {"correct": 0, "total": 0})

for path, pred, label in zip(all_paths, all_preds, all_labels):
    parts = path.replace("\\", "/").split("/")

    # Only evaluate for fake images (LLM-generated)
    if "fake_images" in parts:
        try:
            category = parts[parts.index("fake_images") + 1].lower()
        except IndexError:
            category = "unknown"

        path_lower = path.lower()
        if "chat" in path_lower:
            source = "chatGPT"
        elif "gemeni" in path_lower:
            source = "gemeni"
        else:
            continue  # skip if no known LLM source

        key = f"{category}-{source}"
        cross_stats[key]["total"] += 1
        if pred == label:
            cross_stats[key]["correct"] += 1

# Print results
print("\nAccuracy by Category + LLM:")
for key, stats in cross_stats.items():
    acc = stats["correct"] / stats["total"] if stats["total"] > 0 else 0
    print(f"{key:20s}: {acc:.2%} ({stats['correct']} / {stats['total']})")

# Accuracy by Category (Real Images Only)
real_stats = defaultdict(lambda: {"correct": 0, "total": 0})

for path, pred, label in zip(all_paths, all_preds, all_labels):
    parts = path.replace("\\", "/").split("/")

    if "real_images" in parts:
        try:
            category = parts[parts.index("real_images") + 1].lower()
        except IndexError:
            category = "unknown"

        real_stats[category]["total"] += 1
        if pred == label:
            real_stats[category]["correct"] += 1

# Print real image accuracy by category
print("\nAccuracy by Category (Real Images):")
for category, stats in real_stats.items():
    acc = stats["correct"] / stats["total"] if stats["total"] > 0 else 0
    print(f"{category:20s}: {acc:.2%} ({stats['correct']} / {stats['total']})")