# Baseline model

Poniżej znajduje się skrypt do trenowania modelu. W katalogu znajduje się również checkpoint do załadowania dla aktualnej wersji. Model jest jeszcze niedopracowany i wymaga badania nad architekturą sieci i datasetem.

Użyte dane: 
1. https://www.kaggle.com/datasets/tristanzhang32/ai-generated-images-vs-real-images - Cały dataset treningowy
2. https://www.kaggle.com/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images - po 10000 zdjęć do AI i Naturalnych do podzbioru treningowego i po 3000 do testowego (w celu dostarczenia zdjęć z niską rozdzielczością)

In [1]:
import torch
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"CUDA device {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No CUDA devices available.")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CUDA device 0: NVIDIA GeForce GTX 970


In [12]:
import os
from dotenv import load_dotenv

load_dotenv()

KAGGLE_USERNAME = os.getenv('KAGGLE_USERNAME')
KAGGLE_KEY = os.getenv("KAGGLE_KEY")

os.environ['KAGGLEHUB_CACHE'] = os.path.join(os.getcwd(), "data")

In [None]:
import kagglehub

# Download latest version

path = kagglehub.dataset_download("tristanzhang32/ai-generated-images-vs-real-images")

print("Path to dataset files:", path)


Path to dataset files: /home/ratattwg/Desktop/testing_baseline_model/data/datasets/tristanzhang32/ai-generated-images-vs-real-images/versions/2


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as dsets

train_transform = transforms.Compose([  
    transforms.RandomResizedCrop(128),  # Randomly crop to 128x128
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])  # Normalize to [-1, 1]
])
test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])  # Normalize to [-1, 1]
])

train_dataset = dsets.ImageFolder(root=os.path.join(path, "train"), transform=train_transform)
test_dataset = dsets.ImageFolder(root=os.path.join(path, "test"), transform=test_transform)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=12, # Ustawić do wartości odpowiadającej liczbie wątków CPU
    pin_memory=True,
    pin_memory_device=device,
    persistent_workers=True,
    prefetch_factor=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=12, # Analogicznie do train_loader
    pin_memory=True,
    pin_memory_device=device,
    prefetch_factor=2
)


In [7]:
print(f"Dataloaders: {train_loader, test_loader}") 
print(f"Length of train dataloader: {len(train_loader)} batches of {128}")
print(f"Length of test dataloader: {len(test_loader)} batches of {128}")

Dataloaders: (<torch.utils.data.dataloader.DataLoader object at 0x761670f400e0>, <torch.utils.data.dataloader.DataLoader object at 0x761668a30b00>)
Length of train dataloader: 532 batches of 128
Length of test dataloader: 141 batches of 128


In [8]:
from torch import nn
from torch.nn import functional as F
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score

class ResidualBlock(nn.Module):
    """
    Two 3×3 convs with batchnorm and ReLU, plus skip connection.
    """
    def __init__(self, in_ch, out_ch):
        super().__init__()
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_ch)
        self.conv2 = nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_ch)
        self.skip = nn.Conv2d(in_ch, out_ch, kernel_size=1) if in_ch != out_ch else nn.Identity()
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        skip = self.skip(x)
        return self.relu(out + skip)

class CustomBinaryCNN(nn.Module):
    """
    Custom CNN for AI vs. natural image classification.
    - 4 residual convolutional stages
    - SpatialDropout2d for regularization
    - Global average pooling
    - Small classification head
    """
    def __init__(self):
        super().__init__()
        self.stage1 = nn.Sequential(
            ResidualBlock(3, 32),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage2 = nn.Sequential(
            ResidualBlock(32, 64),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage3 = nn.Sequential(
            ResidualBlock(64, 128),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )
        self.stage4 = nn.Sequential(
            ResidualBlock(128, 256),
            nn.MaxPool2d(2),
            nn.Dropout2d(0.2)
        )

        self.global_pool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(256, 128)
        self.dropout = nn.Dropout(0.5)
        self.classifier = nn.Linear(128, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.stage1(x)    # [B,32,H/2,W/2]
        x = self.stage2(x)    # [B,64,H/4,W/4]
        x = self.stage3(x)    # [B,128,H/8,W/8]
        x = self.stage4(x)    # [B,256,H/16,W/16]
        x = self.global_pool(x)  # [B,256,1,1]
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        logits = self.classifier(x)
        return logits
    
    def predict(self, x):
        self.eval()
        with torch.no_grad():
            x = self(x)
            return self.sigmoid(x)
    


In [9]:
from torch import nn
from torch.nn import functional as F
import numpy as np
from sklearn.metrics import roc_auc_score, precision_score, recall_score


def test_model(model, test_loader, loss_fn):
    model.eval()

    all_labels = []
    all_outputs = []
    
    with torch.no_grad():
        for i, (features, labels) in enumerate(test_loader):
            features, labels = features.to(device), labels.to(device)
            outputs = model.predict(features)
            
            all_labels.extend(labels.cpu().numpy())
            all_outputs.extend(outputs.view(-1).cpu().numpy())

            #batch_accuracy = np.mean((outputs.view(-1).numpy() > 0.5) == labels.numpy())
            #batch_auc = roc_auc_score(labels.numpy(), outputs.view(-1).numpy())
            #print(f"Batch {i+1}/{len(test_loader)}, Loss: {loss_value.item():.4f}, Accuracy: {batch_accuracy:.4f}, AUC: {batch_auc:.4f}")
    
    accuracy = np.mean((np.array(all_outputs) > 0.5) == np.array(all_labels))
    
    auc_score = roc_auc_score(all_labels, all_outputs)
    precision = precision_score(all_labels, (np.array(all_outputs) > 0.5).astype(int))
    recall = recall_score(all_labels, (np.array(all_outputs) > 0.5).astype(int))
    
    print(f"Test Accuracy: {accuracy:.4f}, AUC: {auc_score:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

In [None]:
# Trenowanie modelu

loss = nn.BCEWithLogitsLoss()
model = CustomBinaryCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

epochs = 50

from tqdm.auto import tqdm

for epoch in range(epochs):
    model.train()
    correct = 0
    total = 0
    for features, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{epochs}"):
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss_value = loss(outputs.view(-1), labels.float())
        loss_value.backward()
        optimizer.step()
        preds = (outputs.view(-1) > 0.0)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    accuracy = correct / total
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss_value.item():.4f}, Accuracy: {accuracy:.4f}")

    if (epoch + 1) % 5 == 0:
        test_model(model, test_loader, loss)
        torch.save(model.state_dict(), f"./model_epoch_{epoch+1}.pth")


In [10]:
# Walidacja załadowanego modelu

model = CustomBinaryCNN().to(device)
model.load_state_dict(torch.load("./baseline_model.pth", map_location=torch.device('cuda')))

loss = nn.BCEWithLogitsLoss()
test_model(model, test_loader, loss)

  return F.conv2d(input, weight, bias, self.stride,


Test Accuracy: 0.8327, AUC: 0.9156, Precision: 0.8101, Recall: 0.8696
