In [52]:
import torch
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"CUDA device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No CUDA devices available.")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CUDA device 0: NVIDIA GeForce GTX 970


In [53]:
import os
from dotenv import load_dotenv

load_dotenv()

KAGGLE_USERNAME = os.getenv('KAGGLE_USERNAME')
KAGGLE_KEY = os.getenv("KAGGLE_KEY")

os.environ['KAGGLEHUB_CACHE'] = os.path.join(os.getcwd(), "data")
os.getenv("KAGGLEHUB_CACHE")

'/home/ratattwg/Desktop/testing_baseline_model/data'

In [54]:
import kagglehub

path = kagglehub.dataset_download("tristanzhang32/ai-generated-images-vs-real-images")


In [55]:
print(path)

/home/ratattwg/Desktop/testing_baseline_model/data/datasets/tristanzhang32/ai-generated-images-vs-real-images/versions/2


In [56]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as dsets

train_transform = transforms.Compose([  
    transforms.RandomResizedCrop(128),  # Randomly crop to 128x128
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])  # Normalize to [-1, 1]
])
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])  # Normalize to [-1, 1]
])

train_dataset = dsets.ImageFolder(root=os.path.join(path, "train"), transform=train_transform)
test_dataset = dsets.ImageFolder(root=os.path.join(path, "test"), transform=test_transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=12,
    pin_memory=True,
    pin_memory_device="cuda",
    persistent_workers=True,
    prefetch_factor=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=12,
    pin_memory=True,
    pin_memory_device="cuda",
    prefetch_factor=2
)


In [57]:
print(f"Dataloaders: {train_loader, test_loader}") 
print(f"Length of train dataloader: {len(train_loader)} batches of {128}")
print(f"Length of test dataloader: {len(test_loader)} batches of {128}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x754d04ed5fa0>, <torch.utils.data.dataloader.DataLoader object at 0x754d04ed5190>)
Length of train dataloader: 375 batches of 128
Length of test dataloader: 94 batches of 128


In [58]:
from torch import nn
from torch.nn import functional as F
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score

class ResidualBlock(nn.Module):
    """
    Two 3×3 convs with batchnorm and ReLU, plus skip connection.
    """
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_ch)
        self.skip = nn.Conv2d(in_ch, out_ch, kernel_size=1) if in_ch != out_ch else nn.Identity()
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        skip = self.skip(x)
        return self.relu(out + skip)

class CustomBinaryCNN(nn.Module):
    """
    Custom CNN for AI vs. natural image classification.
    - 4 residual convolutional stages
    - SpatialDropout2d for regularization
    - Global average pooling
    - Small classification head
    """
    def __init__(self):
        super().__init__()
        self.stage1 = nn.Sequential(
            ResidualBlock(3, 32),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage2 = nn.Sequential(
            ResidualBlock(32, 64),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage3 = nn.Sequential(
            ResidualBlock(64, 128),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage4 = nn.Sequential(
            ResidualBlock(128, 256),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )

        self.global_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(256, 128)
        self.dropout = nn.Dropout(0.5)
        self.classifier = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.stage1(x)    # [B,32,H/2,W/2]
        x = self.stage2(x)    # [B,64,H/4,W/4]
        x = self.stage3(x)    # [B,128,H/8,W/8]
        x = self.stage4(x)    # [B,256,H/16,W/16]
        x = self.global_pool(x)  # [B,256,1,1]
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        logits = self.classifier(x)
        return logits
    
    def predict(self, x):
        self.eval()
        with torch.no_grad():
            x = self(x)
            return self.sigmoid(x)
    


In [59]:
def test_model(model, test_loader, loss_fn):
    model.eval()
    total_loss = 0.0
    all_labels = []
    all_outputs = []
    
    with torch.no_grad():
        for i, (features, labels) in enumerate(test_loader):
            features, labels = features.to(device), labels.to(device)
            outputs = model.predict(features)
            
            all_labels.extend(labels.cpu().numpy())
            all_outputs.extend(outputs.view(-1).cpu().numpy())

            #batch_accuracy = np.mean((outputs.view(-1).numpy() > 0.5) == labels.numpy())
            #batch_auc = roc_auc_score(labels.numpy(), outputs.view(-1).numpy())
            #print(f"Batch {i+1}/{len(test_loader)}, Loss: {loss_value.item():.4f}, Accuracy: {batch_accuracy:.4f}, AUC: {batch_auc:.4f}")
    
    accuracy = np.mean((np.array(all_outputs) > 0.5) == np.array(all_labels))
    
    auc_score = roc_auc_score(all_labels, all_outputs)
    precision = precision_score(all_labels, (np.array(all_outputs) > 0.5).astype(int))
    recall = recall_score(all_labels, (np.array(all_outputs) > 0.5).astype(int))
    
    print(f"Test Accuracy: {accuracy:.4f}, AUC: {auc_score:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

In [60]:
loss = nn.BCEWithLogitsLoss()
model = CustomBinaryCNN().to(device)
model.load_state_dict(torch.load("./model_epoch_25.pth", map_location=device))
print(next(model.parameters()).is_cuda)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

epochs = 50

from tqdm.auto import tqdm

for epoch in range(25, epochs):
    model.train()
    correct = 0
    total = 0
    for features, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{epochs}"):
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss_value = loss(outputs.view(-1), labels.float())
        loss_value.backward()
        optimizer.step()
        preds = (outputs.view(-1) > 0.0)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    accuracy = correct / total
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss_value.item():.4f}, Accuracy: {accuracy:.4f}")

    if (epoch + 1) % 5 == 0:
        test_model(model, test_loader, loss)
        torch.save(model.state_dict(), f"./model_epoch_{epoch+1}.pth")


True


Training Epoch 26/50:   0%|          | 0/375 [00:00<?, ?it/s]

Training Epoch 26/50: 100%|██████████| 375/375 [07:30<00:00,  1.20s/it]


Epoch 26/50, Loss: 0.3805, Accuracy: 0.8202


Training Epoch 27/50: 100%|██████████| 375/375 [07:36<00:00,  1.22s/it]


Epoch 27/50, Loss: 0.3831, Accuracy: 0.8214


Training Epoch 28/50: 100%|██████████| 375/375 [07:34<00:00,  1.21s/it]


Epoch 28/50, Loss: 0.3978, Accuracy: 0.8182


Training Epoch 29/50: 100%|██████████| 375/375 [07:35<00:00,  1.22s/it]


Epoch 29/50, Loss: 0.3372, Accuracy: 0.8229


Training Epoch 30/50: 100%|██████████| 375/375 [07:34<00:00,  1.21s/it]

Epoch 30/50, Loss: 0.3745, Accuracy: 0.8243





Test Accuracy: 0.8438, AUC: 0.9222, Precision: 0.8404, Recall: 0.8488


Training Epoch 31/50: 100%|██████████| 375/375 [07:38<00:00,  1.22s/it]


Epoch 31/50, Loss: 0.4389, Accuracy: 0.8209


Training Epoch 32/50: 100%|██████████| 375/375 [07:35<00:00,  1.21s/it]


Epoch 32/50, Loss: 0.3264, Accuracy: 0.8226


Training Epoch 33/50: 100%|██████████| 375/375 [07:38<00:00,  1.22s/it]


Epoch 33/50, Loss: 0.4566, Accuracy: 0.8219


Training Epoch 34/50: 100%|██████████| 375/375 [07:37<00:00,  1.22s/it]


Epoch 34/50, Loss: 0.3272, Accuracy: 0.8263


Training Epoch 35/50: 100%|██████████| 375/375 [07:40<00:00,  1.23s/it]

Epoch 35/50, Loss: 0.5002, Accuracy: 0.8257





Test Accuracy: 0.8488, AUC: 0.9264, Precision: 0.8695, Recall: 0.8207


Training Epoch 36/50: 100%|██████████| 375/375 [07:36<00:00,  1.22s/it]


Epoch 36/50, Loss: 0.3976, Accuracy: 0.8244


Training Epoch 37/50: 100%|██████████| 375/375 [07:37<00:00,  1.22s/it]


Epoch 37/50, Loss: 0.3994, Accuracy: 0.8250


Training Epoch 38/50: 100%|██████████| 375/375 [07:36<00:00,  1.22s/it]


Epoch 38/50, Loss: 0.3812, Accuracy: 0.8252


Training Epoch 39/50: 100%|██████████| 375/375 [07:34<00:00,  1.21s/it]


Epoch 39/50, Loss: 0.4909, Accuracy: 0.8251


Training Epoch 40/50: 100%|██████████| 375/375 [07:38<00:00,  1.22s/it]

Epoch 40/50, Loss: 0.3540, Accuracy: 0.8244





Test Accuracy: 0.8516, AUC: 0.9282, Precision: 0.8535, Recall: 0.8488


Training Epoch 41/50:  32%|███▏      | 121/375 [03:06<06:32,  1.54s/it]


KeyboardInterrupt: 

In [61]:
model = CustomBinaryCNN().to(device)
model.load_state_dict(torch.load("./model_epoch_40.pth", map_location=torch.device('cuda')))

<All keys matched successfully>

In [10]:
loss = nn.BCEWithLogitsLoss()
model.eval()
test_model(model, test_loader, loss)

  return F.conv2d(input, weight, bias, self.stride,


Test Accuracy: 0.8403, AUC: 0.9219, Precision: 0.8138, Recall: 0.8823


In [65]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)


test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])  # Normalize to [-1, 1]
])
test_dataset = dsets.ImageFolder(root=os.path.join(path, "test"), transform=test_transform)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=True,
    num_workers=4,
    pin_memory=True,
    pin_memory_device="cuda",
    prefetch_factor=2
)

Path to dataset files: /home/ratattwg/Desktop/testing_baseline_model/data/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3


In [66]:
model.eval()
test_model(model, test_loader, loss)

Test Accuracy: 0.5148, AUC: 0.4800, Precision: 0.5095, Recall: 0.7917
