In [4]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix
import os
from PIL import Image, ImageFile
import warnings

with warnings.catch_warnings():
    warnings.simplefilter("ignore", Image.DecompressionBombWarning)



# Define dataset path
data_dir = '/kaggle/input/ai-generated-images-vs-real-images/train'


# Set a higher limit if you know your dataset has large images
Image.MAX_IMAGE_PIXELS = None  # Disable the check completely (use with caution)
# or
Image.MAX_IMAGE_PIXELS = 200000000  # Set to a specific safe value (e.g., 200M pixels)


def convert_to_rgb(image):
    """
    Ensures the input image is in RGB mode. Converts images with a palette
    (e.g., PNG or GIF) and transparency to RGBA, then to RGB by blending with a white background.
    """
    if image.mode == 'P':  # Palette-based images
        image = image.convert("RGBA")  # Convert to RGBA
    if image.mode == "RGBA":  # Images with transparency
        # Blend with a white background to remove transparency
        background = Image.new("RGB", image.size, (255, 255, 255))
        image = Image.alpha_composite(background, image).convert("RGB")
    elif image.mode != "RGB":
        image = image.convert("RGB")  # Convert other modes directly to RGB
    return image



def resize_large_image(image, max_size):
    """
    Resize images that exceed a certain size.
    Args:
        image (PIL.Image.Image): Input image.
        max_size (tuple): Maximum allowed dimensions (width, height).
    Returns:
        PIL.Image.Image: Resized image if necessary.
    """
    if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
        image.thumbnail(max_size, Image.Resampling.LANCZOS)  # Use LANCZOS for high-quality downscaling
    return image


train_transform = transforms.Compose([
    transforms.Lambda(lambda img: resize_large_image(img, max_size=(5000, 5000))),  # Ensure large images are resized
    transforms.Lambda(convert_to_rgb),               # Convert to RGB
    transforms.RandomResizedCrop(224),               # Randomly crop and resize to 224x224
    transforms.RandomHorizontalFlip(p=0.5),          # Randomly flip horizontally with 50% probability
    transforms.RandomVerticalFlip(p=0.2),            # Randomly flip vertically with 20% probability
    transforms.RandomRotation(degrees=15),           # Random rotation within ±15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),                           # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize using ImageNet stats
])



val_transform = transforms.Compose([
    transforms.Lambda(convert_to_rgb),  # Convert to RGB
    transforms.Resize((224, 224)),      # Resize images to 224x224
    transforms.ToTensor(),              # Convert to tensor
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize with ImageNet mean and std
])

# Allow loading of truncated images
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Minimum file size in bytes for an image to be considered valid
MIN_IMAGE_FILE_SIZE = 3284  # 1 KB (adjust as needed)

# Function to validate images based on size and format
def is_valid_image(file_path):
    """
    Checks if the image file is valid, has enough bytes, and can be opened.
    Args:
        file_path (str): Path to the image file.
    Returns:
        bool: True if valid, False otherwise.
    """
    try:
        # Check file size
        if os.path.getsize(file_path) < MIN_IMAGE_FILE_SIZE:
            return False
        
        # Verify image integrity
        with Image.open(file_path) as img:
            img.verify()  # Verify it's a valid image
        return True
    except (OSError, Image.DecompressionBombError):
        return False

# Custom ImageFolder class that filters invalid images
class ValidImageFolder(datasets.ImageFolder):
    def __init__(self, root, transform=None):
        super().__init__(root, transform)
        # Filter out invalid images
        self.samples = [(path, label) for path, label in self.samples if is_valid_image(path)]
        self.targets = [label for _, label in self.samples]

# Load dataset and validate images
dataset = ValidImageFolder(root=data_dir, transform=train_transform)

In [6]:
from torch.utils.data import Subset
# Separate indices for "fake" and "real" images
fake_indices = [i for i, (_, label) in enumerate(dataset.samples) if label == dataset.class_to_idx['fake']]
real_indices = [i for i, (_, label) in enumerate(dataset.samples) if label == dataset.class_to_idx['real']]

# Take 50% from each class
fake_subset_size = int(0.4 * len(fake_indices))
real_subset_size = int(0.4 * len(real_indices))

fake_subset_indices = np.random.choice(fake_indices, fake_subset_size, replace=False)
real_subset_indices = np.random.choice(real_indices, real_subset_size, replace=False)

# Combine the indices for the balanced subset
balanced_subset_indices = np.concatenate((fake_subset_indices, real_subset_indices))

# Create the subset
balanced_subset = Subset(dataset, balanced_subset_indices)

# Print the sizes of the original and subset datasets
print(f"Original Dataset Size: {len(dataset)}")
print(f"Balanced Subset Size: {len(balanced_subset)} (Fake: {fake_subset_size}, Real: {real_subset_size})")


Original Dataset Size: 47998
Balanced Subset Size: 19199 (Fake: 9600, Real: 9599)


In [7]:
# Split the balanced subset into training and validation sets
train_size = int(0.8 * len(balanced_subset))
val_size = len(balanced_subset) - train_size
train_dataset, val_dataset = random_split(balanced_subset, [train_size, val_size])

# Apply validation transforms to validation set
val_dataset.dataset.transform = val_transform

accumulation_steps = 4  # Gradient accumulation steps

# Create DataLoader for training and validation sets
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4)

In [8]:
# Print the sizes of the training and validation datasets
print(f"Train Dataset Size: {len(train_dataset)}")
print(f"Validation Dataset Size: {len(val_dataset)}")

Train Dataset Size: 15359
Validation Dataset Size: 3840


In [None]:
import matplotlib.pyplot as plt

# Calculate dataset sizes
train_size = len(train_dataset)
val_size = len(val_dataset)

# Dataset labels and sizes
labels = ['Training Dataset', 'Validation Dataset']
sizes = [train_size, val_size]

# Create bar chart
plt.figure(figsize=(8, 6))
plt.bar(labels, sizes, color=['blue', 'orange'], alpha=0.7)

# Add annotations to the bars
for i, size in enumerate(sizes):
    plt.text(i, size + 5, str(size), ha='center', fontsize=12)

# Add chart details
plt.title('Dataset Split Distribution')
plt.ylabel('Number of Samples')
plt.xlabel('Dataset')
plt.ylim(0, max(sizes) + 50)
plt.show()

In [9]:
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [10]:
from torchvision.models import resnet152
import torch.nn as nn

# Load a pre-trained ResNet18
model = resnet152(pretrained=True)

# Modify the last fully connected layer with dropout
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(p=0.5),  # 50% dropout
    nn.Linear(num_ftrs, 2)  # 2 output classes
)

model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:01<00:00, 206MB/s]  


In [11]:
from torch.optim.lr_scheduler import StepLR

# Mixed precision scaler
scaler = torch.cuda.amp.GradScaler()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Define a scheduler that reduces the learning rate every 3 epochs
scheduler = StepLR(optimizer, step_size=3, gamma=0.1)

  scaler = torch.cuda.amp.GradScaler()


In [12]:
# Early stopping parameters
patience = 3
trigger_times = 0
best_val_loss = float('inf')


In [13]:
# Training and validation loop
epochs = 10
best_val_acc = 0.0

# Initialize lists to store metrics
train_losses = []
train_accuracies = []
val_losses = []
val_accuracies = []

In [11]:
for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")

    # Training phase
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Track loss and accuracy
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc.item())
    
    print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

    # Validation phase
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            val_corrects += torch.sum(preds == labels.data)

            # Store predictions and labels for metrics
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss = val_loss / len(val_dataset)
    val_acc = val_corrects.double() / len(val_dataset)

    val_losses.append(val_loss)
    val_accuracies.append(val_acc.item())

    print(f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # Precision, Recall, F1-Score
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=dataset.classes))

    if val_acc > best_val_acc:
        best_val_acc = val_acc
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f'best_model{epoch}.pth')
        trigger_times = 0  # Reset early stopping counter
    else:
        trigger_times += 1
        print(f"Early stopping trigger count: {trigger_times}/{patience}")
        if trigger_times >= patience:
            print("Early stopping...")
            break

    # Step the scheduler
    scheduler.step()

print(f'Best Validation Accuracy: {best_val_acc:.4f}')
print(f'Best Validation loss: {best_val_loss:.4f}')


Epoch 1/10


OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 101.12 MiB is free. Process 2515 has 15.79 GiB memory in use. Of the allocated memory 14.98 GiB is allocated by PyTorch, and 536.27 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
for epoch in range(epochs):
    epochs = 15
    print(f"Epoch {epoch+1}/{epochs}")
    model.train()
    running_loss = 0.0
    running_corrects = 0

    optimizer.zero_grad()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)

        # Mixed precision forward pass
        with torch.cuda.amp.autocast():
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        # Backward pass with gradient scaling
        loss = loss / accumulation_steps
        scaler.scale(loss).backward()

        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        # Track training metrics
        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_loss = running_loss / len(train_dataset)
    epoch_acc = running_corrects.double() / len(train_dataset)

    print(f"Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")

    # Validation Phase
    model.eval()
    val_loss = 0.0
    val_corrects = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            with torch.cuda.amp.autocast():
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            val_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)

            # Collect predictions and labels for metrics
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss /= len(val_dataset)
    val_acc = val_corrects.double() / len(val_dataset)

    print(f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=dataset.classes))

    # Early Stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f'best_model_epoch_{epoch+1}.pth')
        trigger_times = 0
    else:
        trigger_times += 1
        print(f"Early stopping trigger count: {trigger_times}/{patience}")
        if trigger_times >= patience:
            print("Early stopping...")
            break

    # Step the scheduler
    scheduler.step()

print(f"Best Validation Accuracy: {best_val_acc:.4f}")
print(f"Best Validation Loss: {best_val_loss:.4f}")

Epoch 1/10


  with torch.cuda.amp.autocast():


Train Loss: 0.1359 Acc: 0.7440


  with torch.cuda.amp.autocast():


Val Loss: 0.4400 Acc: 0.7948

Classification Report:
              precision    recall  f1-score   support

        fake       0.82      0.74      0.78      1886
        real       0.77      0.85      0.81      1954

    accuracy                           0.79      3840
   macro avg       0.80      0.79      0.79      3840
weighted avg       0.80      0.79      0.79      3840

Epoch 2/10


  with torch.cuda.amp.autocast():


Train Loss: 0.1064 Acc: 0.8071


  with torch.cuda.amp.autocast():


Val Loss: 0.4948 Acc: 0.7956

Classification Report:
              precision    recall  f1-score   support

        fake       0.81      0.76      0.78      1886
        real       0.78      0.83      0.81      1954

    accuracy                           0.80      3840
   macro avg       0.80      0.79      0.80      3840
weighted avg       0.80      0.80      0.80      3840

Early stopping trigger count: 1/3
Epoch 3/10


  with torch.cuda.amp.autocast():


Train Loss: 0.1007 Acc: 0.8217


  with torch.cuda.amp.autocast():


Val Loss: 0.4148 Acc: 0.8193

Classification Report:
              precision    recall  f1-score   support

        fake       0.87      0.74      0.80      1886
        real       0.78      0.90      0.83      1954

    accuracy                           0.82      3840
   macro avg       0.83      0.82      0.82      3840
weighted avg       0.83      0.82      0.82      3840

Epoch 4/10


  with torch.cuda.amp.autocast():


Train Loss: 0.0904 Acc: 0.8452


  with torch.cuda.amp.autocast():


Val Loss: 0.3498 Acc: 0.8471

Classification Report:
              precision    recall  f1-score   support

        fake       0.84      0.85      0.85      1886
        real       0.85      0.85      0.85      1954

    accuracy                           0.85      3840
   macro avg       0.85      0.85      0.85      3840
weighted avg       0.85      0.85      0.85      3840





Train Loss: 0.0840 Acc: 0.8530


  with torch.cuda.amp.autocast():


Val Loss: 0.3493 Acc: 0.8628

Classification Report:
              precision    recall  f1-score   support

        fake       0.87      0.85      0.86      1886
        real       0.86      0.88      0.87      1954

    accuracy                           0.86      3840
   macro avg       0.86      0.86      0.86      3840
weighted avg       0.86      0.86      0.86      3840

Epoch 6/10


In [None]:
# Plot Loss
plt.figure(figsize=(10, 5))
plt.plot(range(1, epochs + 1), train_losses, label='Training Loss')
plt.plot(range(1, epochs + 1), val_losses, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot Accuracy
plt.figure(figsize=(10, 5))
plt.plot(range(1, epochs + 1), train_accuracies, label='Training Accuracy')
plt.plot(range(1, epochs + 1), val_accuracies, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


In [None]:
kklkjmklmnbhgjkjhk