In [27]:
"""
Importing dataset. 
"""
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
import random
from PIL import Image 
from torch.utils.data import DataLoader, Dataset
import os
import torchvision.transforms as transforms

In [None]:
# Setting up the root folder
root_folder = "Dataset/"

In [None]:
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Apple Silicon GPU
elif torch.cuda.is_available():
    device = torch.device("cuda")  # NVIDIA GPU
else:
    device = torch.device("cpu")  # if no GPU, then CPU

print("Device Running on:", device)

Device Running on: mps


In [None]:
# importing dataset 
df = pd.read_csv(root_folder+"subset_train.csv")

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,file_name,label
0,7230,subset_train_data/872586102a8e44738ca8fa97046c...,1
1,3021,subset_train_data/5ba3fef1f8cb42cea0d3f43b98e6...,0
2,49745,subset_train_data/0f1ed6e90603411a89122c6de6e9...,0
3,60079,subset_train_data/f6422ac852aa4ab2a30ef9db7196...,0
4,3465,subset_train_data/0a7b016a42f14e1d9ba7fa98953c...,0


In [10]:
print(df['label'].value_counts())

label
0    6000
1    4000
Name: count, dtype: int64


This dataset contains 6000 real images and 4000 AI genertaed images. 

In [18]:
def show_random_image(df):
    """
    Display a random image from the dataset with its label.
    """

    # Choosing the random index
    index = random.randint(0, len(df) - 1)

    # Loading path and label
    file_name = df.iloc[index]['file_name']
    label = df.iloc[index]['label']

    # Load the image 
    image = Image.open(root_folder + file_name).convert('RGB')

    # Convert label
    label_text = "Real" if label == 0 else "AI-Generated"

    # Visualization
    plt.figure(figsize=(6, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Label: {label_text}", fontsize=14, color="red")
    plt.show()

In [None]:
show_random_image(df)

In [23]:
""" 
Splitting dataset for training and testing
 - test_size = 20% 
"""
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [21]:
print(train_df['label'].value_counts())

label
0    4815
1    3185
Name: count, dtype: int64


In [22]:
print(val_df['label'].value_counts())

label
0    1185
1     815
Name: count, dtype: int64


# CNN with Frequency Domain - Model 1

In [28]:
"""
Preprocessing and argumentation for the training and testing dataset. 
"""

transform_train = transforms.Compose([
    
    # Resize the image to 224 * 224  
    transforms.Resize((224, 224)),  

    # Randomly flip dataset  
    transforms.RandomHorizontalFlip(),

    # Convert the image to PyTorch Tensor
    transforms.ToTensor(),

    # Normalizing the image, using imagenet mean and standard deviation
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


transform_val = transforms.Compose([

    # Resize the image to 224 * 224
    transforms.Resize((224, 224)),

    # Convert the image to PyTorch Tensor
    transforms.ToTensor(),

    # Normalizing the image, using imagenet mean and standard deviation
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [25]:
"""  
A Custom PyTorch Dataset to load images and labels from the dataset. 

Arguments : 
    dataframe - imported dataset. 
    image_dir - directory of image dataset
    transform - transformation for image
    file_name_col - column name of the image path from dataset
    label_col - column name of the labels (real or fake) from dataset
"""

class CNN_FD_Dataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None, file_name_col='file_name', label_col='label'):
      self.dataframe = dataframe
      self.transform = transform
      self.image_dir = image_dir
      self.file_name_col = file_name_col
      self.label_col = label_col

    def __len__(self):
      return len(self.dataframe)

    def __getitem__(self, idx):

      # File name from dataset 
      img_file = self.dataframe.iloc[idx][self.file_name_col]
      
      # Setting up the image path
      img_file = os.path.basename(img_file)
      img_path = os.path.join(self.image_dir, img_file)

      # Check the label, if not there assign -1
      if self.label_col is not None and self.label_col in self.dataframe.columns:
          label = self.dataframe.iloc[idx][self.label_col]
      else:
          label = -1

      # if no image in directory, warning message
      if not os.path.exists(img_path):
          print("Warning: Image path does not exist : ", img_path)

      # Apply transformation for the image
      image = Image.open(img_path).convert('RGB')
      if self.transform:
          image = self.transform(image)
      return image, label

In [26]:
# Setting up batch size
batch_size = 32

In [29]:
"""
Initialization of training and testing dataset using custom dataset (CNN_FD_Dataset)
"""

# Training dataset initialization 
train_dataset = CNN_FD_Dataset(train_df,
                              image_dir=root_folder + "/subset_train_data",
                              transform=transform_train)

# Testing dataset initialization
val_dataset = CNN_FD_Dataset(val_df,
                            image_dir=root_folder + "/subset_train_data",
                            transform=transform_val)

# SWIN Transformer - Model 2

# Hybrid Model (Model 1 + Model 2)