In [56]:
import torch
from PIL import Image, ImageFile
import torch
import torchvision
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision.models import convnext_tiny
import os
import random
import shutil
from torchvision import models
import kagglehub

In [57]:
Image.MAX_IMAGE_PIXELS = None
ImageFile.LOAD_TRUNCATED_IMAGES = True
torch.backends.cudnn.benchmark = True

In [64]:
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

transform_val = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [8]:
path = kagglehub.dataset_download("tristanzhang32/ai-generated-images-vs-real-images")
print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/ai-generated-images-vs-real-images


In [63]:
!echo "Train AI:" && ls /kaggle/working/subset_dataset/val/fake | wc -l


Train AI:
2955


In [65]:
train_dataset = datasets.ImageFolder(root='/kaggle/working/subset_dataset/train', transform=transform_train)
validation_dataset = datasets.ImageFolder(root='/kaggle/working/subset_dataset/val', transform=transform_val)
test_dataset = datasets.ImageFolder(root='/kaggle/working/subset_dataset/test', transform=transform_test)

In [66]:
test_dataset.class_to_idx

{'fake': 0, 'real': 1}

In [67]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True)
validation_loader = DataLoader(validation_dataset, batch_size=128, shuffle=False, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=4, pin_memory=True)

In [68]:
images, labels = next(iter(train_loader))
print(images.shape)
print(labels)

torch.Size([128, 3, 224, 224])
tensor([1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
        0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
        0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
        1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 0])


In [69]:
device=''
if torch.cuda.is_available():
    device='cuda'
else:
    device='cpu'

print(device)

cuda


In [70]:
model = convnext_tiny(weights=None)
in_features = model.classifier[2].in_features
model.classifier[2] = nn.Linear(in_features, 1)
model = model.to(device, memory_format=torch.channels_last)

In [71]:
ckpt_path = '/kaggle/input/convnext/pytorch/default/1/convNext_final.pth'
model.load_state_dict(torch.load(ckpt_path, map_location=device))

<All keys matched successfully>

In [72]:
for param in model.parameters():
    param.requires_grad = False

for param in model.classifier[2].parameters():
    param.requires_grad = True

In [73]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.classifier[2].parameters(), lr=1e-4)

In [74]:
scaler = torch.amp.GradScaler()

In [75]:
def train(model, loader, optimizer, criterion):

    model.train()
    train_loss = 0.0

    for i, (inputs, labels) in enumerate(loader):

        inputs = inputs.to(device)
        labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

        optimizer.zero_grad()

        with torch.amp.autocast(device_type=device):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item() * inputs.size(0)

    return train_loss / len(loader.dataset)

In [76]:
def validate(model, loader, criterion):

    model.eval()
    running_loss  = 0.0
    correct = 0
    total = 0

    with torch.no_grad():

        for i, (inputs, labels) in enumerate(loader):

            inputs = inputs.to(device)
            labels = labels.float().unsqueeze(1).to(device, non_blocking=True)

            outputs = model(inputs)
            loss = criterion(outputs, labels)


            probs = torch.sigmoid(outputs)
            preds = (probs > 0.5).float()

            correct += (preds.squeeze(1) == labels.squeeze(1)).sum().item()
            total += labels.size(0)
            running_loss += loss.item() * inputs.size(0)


    avg_loss = running_loss / total
    accuracy = correct / total
    return avg_loss, accuracy

In [77]:
epochs = 5
PATIENCE = 2
best_val_acc = 0.0
patience_counter = 0

for epoch in range(epochs):

    train_loss = train(model, train_loader, optimizer, criterion)
    val_loss, val_acc = validate(model, validation_loader, criterion)

    print(f"""Epoch [{epoch+1}/{epochs}]
            Train Loss: {train_loss:.4f}
            Val Loss:   {val_loss:.4f}
            Val Acc:    {val_acc:.4f}
    """)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        print("Early stopping triggered")
        break



Epoch [1/5]
            Train Loss: 2.4002
            Val Loss:   2.3269
            Val Acc:    0.5920
    




Epoch [2/5]
            Train Loss: 2.1461
            Val Loss:   2.0765
            Val Acc:    0.6072
    




Epoch [3/5]
            Train Loss: 1.9135
            Val Loss:   1.8440
            Val Acc:    0.6226
    




Epoch [4/5]
            Train Loss: 1.6785
            Val Loss:   1.6288
            Val Acc:    0.6347
    




Epoch [5/5]
            Train Loss: 1.4895
            Val Loss:   1.4299
            Val Acc:    0.6476
    


In [53]:
import shutil
import os

# Define the paths to remove
subset_folder = '/kaggle/working/subset_dataset'
zip_file = '/kaggle/working/subset_dataset.zip'

# Remove the unzipped folder
if os.path.exists(subset_folder):
    shutil.rmtree(subset_folder)
    print(f"Deleted folder: {subset_folder}")

# Remove the zip file (only do this after you have downloaded it!)
if os.path.exists(zip_file):
    os.remove(zip_file)
    print(f"Deleted zip: {zip_file}")

# Optional: Clear everything in working directory (be careful!)
# for filename in os.listdir('/kaggle/working'):
#     file_path = os.path.join('/kaggle/working', filename)
#     if os.path.isfile(file_path) or os.path.islink(file_path):
#         os.unlink(file_path)
#     elif os.path.isdir(file_path):
#         shutil.rmtree(file_path)


Deleted folder: /kaggle/working/subset_dataset


In [78]:
for p in model.parameters():
    p.requires_grad = False
for p in model.features[-1].parameters():
    p.requires_grad = True
for p in model.classifier[2].parameters():
    p.requires_grad = True

In [79]:
optimizer = torch.optim.AdamW(
    [
        {"params": model.features[-1].parameters(), "lr": 1e-5},
        {"params": model.classifier[2].parameters(),     "lr": 1e-4},
    ],
    weight_decay=1e-4
)

criterion = nn.BCEWithLogitsLoss()


In [None]:
EPOCHS = 14
PATIENCE = 3
best_val_acc = 0.0
patience_counter = 0


for epoch in range(EPOCHS):
    train_loss = train(model, train_loader, optimizer, criterion)
    val_loss, val_acc = validate(model, validation_loader, criterion)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"Val Acc: {val_acc:.4f}"
    )

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        print("Early stopping triggered")
        break

In [81]:
test_loss, test_acc = validate(
    model,
    test_loader,
    criterion
)



In [82]:
print(f"TEST Loss: {test_loss:.4f}")
print(f"TEST Accuracy: {test_acc:.4f}")

TEST Loss: 0.2070
TEST Accuracy: 0.9158


In [83]:
import shutil
import os

# Define the paths to remove
subset_folder = '/kaggle/working/subset_dataset'
zip_file = '/kaggle/working/subset_dataset.zip'

# Remove the unzipped folder
if os.path.exists(subset_folder):
    shutil.rmtree(subset_folder)
    print(f"Deleted folder: {subset_folder}")

# Remove the zip file (only do this after you have downloaded it!)
if os.path.exists(zip_file):
    os.remove(zip_file)
    print(f"Deleted zip: {zip_file}")

# Optional: Clear everything in working directory (be careful!)
# for filename in os.listdir('/kaggle/working'):
#     file_path = os.path.join('/kaggle/working', filename)
#     if os.path.isfile(file_path) or os.path.islink(file_path):
#         os.unlink(file_path)
#     elif os.path.isdir(file_path):
#         shutil.rmtree(file_path)

Deleted folder: /kaggle/working/subset_dataset
Deleted zip: /kaggle/working/subset_dataset.zip


In [84]:
torch.save(
    model.state_dict(),
    "/kaggle/working/convnext_tiny_final.pth"
)