In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
import cv2
import os
import numpy as np
from scipy.ndimage import uniform_filter
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Implementation of BiHPF and PRNU filters

In [3]:
def bilateral_high_pass_filter(image_path, d=9, sigma_color=75, sigma_space=75):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    # img should be a grayscale or single-channel image
    smooth = cv2.bilateralFilter(img, d, sigma_color, sigma_space)
    high_pass = cv2.subtract(img, smooth)
    return high_pass

In [4]:
def extract_prnu_sprectrum(image_path, gaussian_kernel_size=3, gaussian_sigma=1.0, rms_window_size=3):
    img_bgr = cv2.imread(image_path)

    img_ycbcr = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2YCrCb)
    img_y = img_ycbcr[:, :, 0].astype(np.float32)

    img_y /= 255.

    img_blur = cv2.GaussianBlur(img_y, (gaussian_kernel_size, gaussian_kernel_size), gaussian_sigma)

    residual = img_y - img_blur

    squared = residual ** 2
    mean_squared = uniform_filter(squared, size=rms_window_size)
    rms_residual = np.sqrt(mean_squared)

    mean_rms = np.mean(rms_residual)
    prnu_spectrum = rms_residual / (mean_rms + 1e-8)  # evita divisão por zero

    return prnu_spectrum

## Creating the custom dataset

In [None]:
image_dir = "/kaggle/input/real-and-fake-images/dataset/train"


class CustomDataset(Dataset):
    def __init__(self, image_dir, device='cuda', transform=None):
        self.image_dir = image_dir
        self.image_paths = []
        self.labels = []
        
        for label in ['fake_images', 'real_images']:
            label_dir = os.path.join(self.image_dir, label)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(1 if label == 'fake_images' else 0)
        
        self.transform = transform
        
    def __len__(self):
        return len(os.listdir(self.image_dir))
    
    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        
        BiHPF_img = bilateral_high_pass_filter(img_path)
        PRNU_img = extract_prnu_sprectrum(img_path)
    
        feature_map = np.stack([BiHPF_img, PRNU_img], axis=-1)  
    
        label = self.labels[idx]
        
        if self.transform:
            feature_map = self.transform(feature_map)
    
        return feature_map, label

## Implementation of the path embedding

In [None]:
class PatchEmbedding(nn.Module):
    def __init__(self, img_size=224, patch_size=16, in_channels=2, embed_dim=768, use_cls_token=True):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.in_channels = in_channels
        self.embed_dim = embed_dim
        self.num_patches = (img_size // patch_size) ** 2
        self.use_cls_token = use_cls_token

        self.projection = nn.Linear(in_channels * patch_size * patch_size, embed_dim)
        if use_cls_token:
            self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
            self.num_patches += 1
        self.pos_embedding = nn.Parameter(torch.zeros(1, self.num_patches, embed_dim))

    def forward(self, x):
        B, C, H, W = x.shape
        assert H == self.img_size and W == self.img_size, f"Esperado {self.img_size}×{self.img_size}, mas recebeu {H}×{W}"
        assert C == self.in_channels

        patches = x.unfold(2, self.patch_size, self.patch_size) \
                   .unfold(3, self.patch_size, self.patch_size)
        patches = patches.contiguous().view(B, C, -1, self.patch_size, self.patch_size)
        patches = patches.permute(0, 2, 1, 3, 4)
        patches = patches.flatten(2)

        patch_embeddings = self.projection(patches)  

        if self.use_cls_token:
            cls_tokens = self.cls_token.expand(B, -1, -1) 
            patch_embeddings = torch.cat([cls_tokens, patch_embeddings], dim=1)  


        patch_embeddings = patch_embeddings + self.pos_embedding

        return patch_embeddings  


## Implementation of the attention mechanism

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class DualHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim=768, num_heads=2):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        assert embed_dim % num_heads == 0, "embed_dim deve ser divisível por num_heads"

        self.q_proj = nn.Linear(embed_dim, embed_dim)
        self.k_proj = nn.Linear(embed_dim, embed_dim)
        self.v_proj = nn.Linear(embed_dim, embed_dim)

        self.out_proj = nn.Linear(embed_dim, embed_dim)

        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)

    def forward(self, x):
        B, N, E = x.shape

        x_norm = self.norm1(x)

        Q = self.q_proj(x_norm)
        K = self.k_proj(x_norm)
        V = self.v_proj(x_norm)

        Q = Q.view(B, N, self.num_heads, self.head_dim).transpose(1, 2)  
        K = K.view(B, N, self.num_heads, self.head_dim).transpose(1, 2)
        V = V.view(B, N, self.num_heads, self.head_dim).transpose(1, 2)

        attn_scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.head_dim)
        attn_probs = F.softmax(attn_scores, dim=-1)

        attn_output = torch.matmul(attn_probs, V)

        attn_output = attn_output.transpose(1, 2).contiguous().view(B, N, E)

        out = self.out_proj(attn_output)
        out = self.norm2(out + x)  

        return out  


## Classification model

In [None]:
class Classifier(nn.Module):
    def __init__(self, embed_dim=768):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Linear(embed_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 1) 
        )

    def forward(self, x):
        cls_token = x[:, 0, :] 
        logits = self.mlp(cls_token)
        return logits

## Proposed model

In [None]:
class GANDetectionModel(nn.Module):
    def __init__(self, img_size=224, patch_size=16, embed_dim=768):
        super().__init__()
        self.patch_embed = PatchEmbedding(img_size, patch_size, in_channels=2, embed_dim=embed_dim)
        self.attn_block = DualHeadSelfAttention(embed_dim=embed_dim)
        self.classifier = Classifier(embed_dim=embed_dim)

    def forward(self, x):
        # x: [B, H, W, 2]
        x = self.patch_embed(x)         
        x = self.attn_block(x)           
        logits = self.classifier(x)      
        return logits

In [10]:
transform = v2.Compose([
    v2.ToPILImage(),
    v2.Resize((224, 224)),
    v2.ToTensor(),
])



## Training process

In [11]:
dataset = CustomDataset(image_dir=image_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=128, shuffle=True)

In [12]:
model = GANDetectionModel()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [None]:
model.to(device)

epochs = 100
for epoch in range(1, epochs+1):
    epoch_loss = 0
    model.train()
    for images, labels in dataloader:
        images = images.to(device)   
        labels = labels.to(device).float()

        out = model(images)          
        loss = criterion(out.squeeze(), labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"[{epoch}/{epochs}] Loss: {epoch_loss}")


[1/100] Loss: 0.725012481212616
[2/100] Loss: 0.2652965188026428
[3/100] Loss: 0.11526736617088318
[4/100] Loss: 0.082882821559906
[5/100] Loss: 0.04764764755964279
[6/100] Loss: 0.019114738330245018
[7/100] Loss: 0.023878809064626694
[8/100] Loss: 0.016685351729393005
[9/100] Loss: 0.007581563666462898
[10/100] Loss: 0.008214038796722889
[11/100] Loss: 0.004565594717860222
[12/100] Loss: 0.004019990097731352
[13/100] Loss: 0.006741341203451157
[14/100] Loss: 0.0050885374657809734
[15/100] Loss: 0.002784386742860079
[16/100] Loss: 0.0023856949992477894
[17/100] Loss: 0.002550278790295124
[18/100] Loss: 0.0029604299925267696
[19/100] Loss: 0.0031932496931403875
[20/100] Loss: 0.003320696298032999
[21/100] Loss: 0.0018338726367801428
[22/100] Loss: 0.003983508795499802
[23/100] Loss: 0.0023817517794668674
[24/100] Loss: 0.0018352956976741552
[25/100] Loss: 0.0026498246006667614
[26/100] Loss: 0.0020438856445252895
[27/100] Loss: 0.0022020828910171986
[28/100] Loss: 0.0014692828990519047


## Validation process

In [14]:
val_image_dir = "/kaggle/input/real-and-fake-images/dataset/validation"
val_dataset = CustomDataset(image_dir=val_image_dir, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=True)

In [None]:
model.eval()
running_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)    
        labels = labels.to(device).float()

        logits = model(images)        
        loss   = criterion(logits.squeeze(), labels)

        running_loss += loss.item() * images.size(0)

        probs     = torch.sigmoid(logits).squeeze()
        preds     = (probs > 0.5).long()
        correct  += (preds == labels.long()).sum().item()
        total    += labels.size(0)

    avg_loss = running_loss / total
    accuracy = correct / total * 100

In [17]:
print(f"Validation Loss: {avg_loss}")
print(f"Accuracy: {accuracy}")

Validation Loss: 0.00017810953431762755
Accuracy: 100.0


In [18]:
torch.save(model.state_dict(), "model.pth")