In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import kagglehub
import polars as pl
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
import time

import dataloader as dl
from model import BinaryViT

  from .autonotebook import tqdm as notebook_tqdm


# Fetch the dataset from Kaggle

In [4]:

# Download latest version
path = kagglehub.dataset_download("shreyasraghav/shutterstock-dataset-for-ai-vs-human-gen-image")

print("Path to dataset files:", path)
train_data_path = os.path.join(path, "train.csv")

Path to dataset files: /home/dan/.cache/kagglehub/datasets/shreyasraghav/shutterstock-dataset-for-ai-vs-human-gen-image/versions/2


# Setup the data loader

In [5]:
# setup the polars dataframe containing the path and label of the images
train_data = pl.read_csv(train_data_path)
# prepend the path with the path to the dataset
train_data = train_data.with_columns([pl.col("file_name").map_elements(lambda s: os.path.join(path, s), return_dtype=str).alias("path")])

In [6]:
dataloader = dl.load_data(train_data)
#display the first image
# img, label = next(iter(dataloader))
# print(img.shape, label.shape)
# plt.imshow(img[0].permute(1, 2, 0))

# Training

In [9]:
from dataclasses import dataclass
@dataclass
class TrainReport:
    accuracies  : list[float]
    losses      : list[float]

def train(model: nn.Module, dataloader, criterion, optimizer, epochs, device) -> TrainReport:
    report = TrainReport([], [])
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            report.losses.append(loss.item())
            report.accuracies.append((outputs.argmax(1) == labels).float().mean().item())

        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(dataloader)}")

In [10]:
data_for_train = torch.utils.data.Subset(dataloader.dataset, range(10))
model = BinaryViT()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
print('Training...')
start = time.time()
train(model, data_for_train, criterion, optimizer, 10, device)
print(f"Training took {time.time()-start:.2f} seconds")


Training...
Epoch 1/10, Loss: 2.7824938570149245
Epoch 2/10, Loss: 0.7428660750389099
Epoch 3/10, Loss: 0.680300161242485
Epoch 4/10, Loss: 0.5195346131920815
Epoch 5/10, Loss: 0.4087089017033577
Epoch 6/10, Loss: 0.26569681577384474
Epoch 7/10, Loss: 0.30899502013344315
Epoch 8/10, Loss: 0.3363210613839328
Epoch 9/10, Loss: 0.7763480836525559
Epoch 10/10, Loss: 0.10216161543503403
Training took 281.40 seconds
