In [20]:
"""
Importing dataset. 
"""
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import random
from PIL import Image 
from torch.utils.data import DataLoader, Dataset
import os
import torchvision.transforms as transforms
import torch.nn as nn
import torchvision.transforms.functional as TF
import torch.nn.functional as F
from tqdm import tqdm

In [3]:
# Setting up the root folder
root_folder = "Dataset/"

In [4]:
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Apple Silicon GPU
elif torch.cuda.is_available():
    device = torch.device("cuda")  # NVIDIA GPU
else:
    device = torch.device("cpu")  # if no GPU, then CPU

print("Device Running on:", device)

Device Running on: mps


In [5]:
# importing dataset 
df = pd.read_csv(root_folder+"subset_train.csv")

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,file_name,label
0,7230,subset_train_data/872586102a8e44738ca8fa97046c...,1
1,3021,subset_train_data/5ba3fef1f8cb42cea0d3f43b98e6...,0
2,49745,subset_train_data/0f1ed6e90603411a89122c6de6e9...,0
3,60079,subset_train_data/f6422ac852aa4ab2a30ef9db7196...,0
4,3465,subset_train_data/0a7b016a42f14e1d9ba7fa98953c...,0


In [7]:
print(df['label'].value_counts())

label
0    6000
1    4000
Name: count, dtype: int64


This dataset contains 6000 real images and 4000 AI genertaed images. 

In [8]:
def show_random_image(df):
    """
    Display a random image from the dataset with its label.
    """

    # Choosing the random index
    index = random.randint(0, len(df) - 1)

    # Loading path and label
    file_name = df.iloc[index]['file_name']
    label = df.iloc[index]['label']

    # Load the image 
    image = Image.open(root_folder + file_name).convert('RGB')

    # Convert label
    label_text = "Real" if label == 0 else "AI-Generated"

    # Visualization
    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Label: {label_text}", fontsize=14, color="red")
    plt.show()

In [None]:
show_random_image(df)

In [10]:
""" 
Splitting dataset for training and testing
 - test_size = 20% 
"""
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [11]:
print(train_df['label'].value_counts())

label
0    4815
1    3185
Name: count, dtype: int64


In [12]:
print(val_df['label'].value_counts())

label
0    1185
1     815
Name: count, dtype: int64


# CNN with Frequency Domain - Model 1

In [13]:
"""
Preprocessing and argumentation for the training and testing dataset. 
"""

transform_train = transforms.Compose([
    
    # Resize the image to 224 * 224  
    transforms.Resize((224, 224)),  

    # Randomly flip dataset  
    transforms.RandomHorizontalFlip(),

    # Convert the image to PyTorch Tensor
    transforms.ToTensor(),

    # Normalizing the image, using imagenet mean and standard deviation
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


transform_val = transforms.Compose([

    # Resize the image to 224 * 224
    transforms.Resize((224, 224)),

    # Convert the image to PyTorch Tensor
    transforms.ToTensor(),

    # Normalizing the image, using imagenet mean and standard deviation
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [14]:
class CNN_FD_Dataset(Dataset):
    """  
        A Custom PyTorch Dataset to load images and labels from the dataset. 
        Converting the images to the frequency domain using FFT. 
    """
    def __init__(self, dataframe, image_dir, transform=None, file_name_col='file_name', label_col='label'):
        """ 
            Arguments : 
                dataframe - imported dataset. 
                image_dir - directory of image dataset
                transform - transformation for image
                file_name_col - column name of the image path from dataset
                label_col - column name of the labels (real or fake) from dataset
        """
        self.dataframe = dataframe
        self.transform = transform
        self.image_dir = image_dir
        self.file_name_col = file_name_col
        self.label_col = label_col

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        """ 
            Processing one sample using image and corresponing labels. 
        """

        # Retriving the image name and forming the image path. 
        img_file = os.path.basename(
            self.dataframe.iloc[idx][self.file_name_col])
        img_path = os.path.join(self.image_dir, img_file)

        # Retriving the label. If not present assigning -1. 
        label = self.dataframe.iloc[idx][self.label_col] if self.label_col in self.dataframe.columns else -1

        image = Image.open(img_path).convert('RGB')
        image = TF.to_tensor(image)

        # Applying the transformation for the images before FFT converstion (Optional). 
        if self.transform:
            image = self.transform(image)

        # Transforming image to FFT
        freq_image = self._get_magnitude_spectrum(image)

        return freq_image, label

    def _get_magnitude_spectrum(self, img_tensor):
        """  
        Converting image as frequency domain represtation using 2D FFT. 
        """
        freq = torch.fft.fft2(img_tensor)
        freq_shift = torch.fft.fftshift(freq)
        magnitude = torch.abs(freq_shift)
        magnitude = torch.log1p(magnitude)  # log scale for stability
        return magnitude

In [15]:
# Setting up batch size
batch_size = 32

In [16]:
"""
Initialization of training and testing dataset using custom dataset (CNN_FD_Dataset)
"""

# Training dataset initialization 
train_dataset = CNN_FD_Dataset(train_df,
                              image_dir=root_folder + "/subset_train_data",
                              transform=transform_train)

# Testing dataset initialization
val_dataset = CNN_FD_Dataset(val_df,
                            image_dir=root_folder + "/subset_train_data",
                            transform=transform_val)

In [17]:
""" 
Dataloader for training and validation 
""" 

train_loader = DataLoader(train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True,
                          num_workers=4)

val_loader = DataLoader(val_dataset, 
                        batch_size=batch_size, 
                        shuffle=False, 
                        num_workers=4)

In [18]:
print("Training set size : ", len(train_dataset), "samples")
print("Validation set size:", len(val_dataset), "samples")

Training set size :  8000 samples
Validation set size: 2000 samples


In [None]:
class CNN_FD_Classifier(nn.Module):
    """  
    CNN Model with Architecture : 
        -  3 convolution layer with ReLu activation function. 
        -  2 max pooling layers
        -  2 fully connected layers with 0.5 dropout
    """
    
    def __init__(self, num_classes=2):
        super(CNN_FD_Classifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, 
                               out_channels=32, 
                               kernel_size=3, 
                               stride=1, 
                               padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, 
                               out_channels=64, 
                               kernel_size=3, 
                               stride=1, 
                               padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, 
                                 padding=2)
        self.conv3 = nn.Conv2d(64, 128, 
                               kernel_size=3, 
                               stride=1, 
                               padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, 
                                  padding=2)

        self.flatten_dim = 128 * 56 * 56  # assuming 224x224 input

        self.fc1 = nn.Linear(self.flatten_dim, 512)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool2(F.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return self.fc2(x)


# Initialize the CNN model and move it to the designated device
cnn_fd_model = CNN_FD_Classifier(num_classes=2).to(device)

# Print the model architecture
print(cnn_fd_model)

CNN_FD_Classifier(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=2, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=2, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=401408, out_features=512, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
)


In [21]:
def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    """ 
    Trainer function to train the CNN_FD_Classifier PyTorch model
    """
    
    # Setting up the Pytorch model in training mode
    model.train() 

    for epoch in range(num_epochs):
        running_loss, correct, total = 0.0, 0, 0
        # Progress bar for each epochs
        progress_bar = tqdm(
            train_loader, desc=f'Training Epoch {epoch+1}/{num_epochs}', leave=False)
        
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)
            # making gradients as zero before backpropagation
            optimizer.zero_grad()
            # Forward pass outpt
            outputs = model(images)
            # predicted value and true value loss calculations
            loss = criterion(outputs, labels)
            # Backward propergation 
            loss.backward()
            optimizer.step()
            # Update loss
            running_loss += loss.item()
            # Calculating the accuracy 
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            progress_bar.set_postfix(
                loss=loss.item(), acc=f"{100 * correct / total:.2f}%")
        # Printing summary of epocs
        print(
            f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

# SWIN Transformer - Model 2

# Hybrid Model (Model 1 + Model 2)