In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import timm
import matplotlib.pyplot as plt
import numpy as np

# Path to your dataset
train_path = '/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train'
test_path = '/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test'

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to ViT input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Load datasets
train_dataset = datasets.ImageFolder(train_path, transform=transform)
test_dataset = datasets.ImageFolder(test_path, transform=transform)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Check classes
print("Classes:", train_dataset.classes)
num_classes = len(train_dataset.classes)

# Load Vision Transformer model
model = timm.create_model('vit_base_patch16_224', pretrained=True)
model.head = nn.Linear(model.head.in_features, num_classes)  # Update final layer
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)

# Training the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

# Save the trained model
torch.save(model.state_dict(), 'vit_custom_dataset.pth')
print("Model saved as 'vit_custom_dataset.pth'.")

# Testing the model
model.eval()
correct = 0
total = 0
all_predictions = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        all_predictions.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

# Confusion matrix and classification report
from sklearn.metrics import classification_report, confusion_matrix

print("Classification Report:")
print(classification_report(all_labels, all_predictions, target_names=train_dataset.classes))

print("Confusion Matrix:")
print(confusion_matrix(all_labels, all_predictions))

Classes: ['FAKE', 'REAL']


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Epoch 1/10, Loss: 0.3118
Epoch 2/10, Loss: 0.2014
Epoch 3/10, Loss: 0.1782
Epoch 4/10, Loss: 0.4120
Epoch 5/10, Loss: 0.3175
Epoch 6/10, Loss: 0.2804
Epoch 7/10, Loss: 0.2764
Epoch 8/10, Loss: 0.2593
Epoch 9/10, Loss: 0.2699
Epoch 10/10, Loss: 0.2645
Model saved as 'vit_custom_dataset.pth'.
Test Accuracy: 88.14%
Classification Report:
              precision    recall  f1-score   support

        FAKE       0.87      0.89      0.88     10000
        REAL       0.89      0.87      0.88     10000

    accuracy                           0.88     20000
   macro avg       0.88      0.88      0.88     20000
weighted avg       0.88      0.88      0.88     20000

Confusion Matrix:
[[8947 1053]
 [1319 8681]]
