<a href="https://colab.research.google.com/github/Nico25041/AI-Image-Detector/blob/main/AI_finder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import kagglehub

# Download Dataset
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)

In [None]:

# Load train and test datasets
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

# Load entire training set
full_train_dataset = datasets.ImageFolder("/kaggle/input/cifake-real-and-ai-generated-synthetic-images/train", transform=transform)

#Split given training set into 90% training set, 10% validation set
train_size = int(0.9 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

#Create test data set
test_dataset = datasets.ImageFolder(root='/kaggle/input/cifake-real-and-ai-generated-synthetic-images/test', transform=transform)

#Set up data loaders
train_dataloader = DataLoader(train_dataset, batch_size= 64, shuffle=True, num_workers=8)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False,num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False,num_workers=8)


In [None]:
import torch.nn.functional as F
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from io import BytesIO
import pandas as pd
import numpy as np

#Initialize CNN block
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        self.fc1 = nn.Linear(128 * 16 * 16, 512)
        self.fc2 = nn.Linear(512, 2)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.dropout = nn.Dropout(0.5)


    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = x.view(-1, 128 * 16 * 16)
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

In [None]:
import os

#Save function
#saves the state in folder expressing which epoch it was from
def save_checkpoint(model, epoch):
    model_folder = "model/checkpoint/"

    model_out_path = model_folder + "model_{}.pth".format(epoch)
    state = {
        "epoch": epoch,
        "model_state_dict": model.state_dict()
    }
    if not os.path.exists(model_folder):
        os.makedirs(model_folder)
    torch.save(state, model_out_path)
    sav = "Checkpoint saved to {}".format(model_out_path)

In [None]:
#Two different models can be trained through this cell
# 1)Resnet50 pretrained where hyper parameters are just trained to this dataset
# 2)Custom CNN

from types import CellType
from torchvision import datasets, transforms, models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#Custom Model
model = CNN().to(device)

#Resnet Model

#model = models.resnet50(pretrained=True)
#num_features = model.fc.in_features
#model.fc = nn.Linear(num_features, 3)
#model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for imgs, labels in train_dataloader:
        imgs, labels = imgs.to(device), torch.tensor(labels).to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        ce_loss = F.cross_entropy(outputs, labels)
        ce_loss.backward()
        optimizer.step()

        total_loss += ce_loss.item()

#Validation Accuracy
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    val_acc = 100 * correct / total
    print(f"Validation Accuracy: {val_acc:.2f}%")
    save_checkpoint(model, epoch)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

In [None]:
#Accuracy Testing
myModel = torch.load("model/checkpoint/model_14.pth")
model.load_state_dict(myModel["model_state_dict"])
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in test_dataloader:
        imgs, labels = imgs.to(device), torch.tensor(labels).to(device)
        outputs = model(imgs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")

In [None]:
#Testing for the confusion matrix, where all predictions and labels are put into lists for the graph
import torch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in test_dataloader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
#Displaying confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Real", "AI"])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()