<a href="https://colab.research.google.com/github/Htets-Corner/SYNTHBUSTER_RAISE-1k/blob/main/syn32_real_Mobilenetv2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Step 0: Mount Drive and Import Libraries

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import core libraries
import os
import numpy as np
import matplotlib.pyplot as plt

# Torch and torchvision
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Utilities
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns


Mounted at /content/drive


In [None]:
!ls drive/MyDrive/

In [5]:
folder_path = "drive/MyDrive/RAISE/PNG"
# Count only .png files
png_count = sum(1 for f in os.listdir(folder_path) if f.lower().endswith(".png"))

print(f"Number of PNG files in '{folder_path}': {png_count}")

Number of PNG files in 'drive/MyDrive/RAISE/PNG': 999


In [6]:
# Step 1: Load Datasets and Split into Train/Test

import kagglehub

# Download Synthbuster (JPEG resized version) from Kaggle
synth_path = kagglehub.dataset_download("devpatel484/synthbuster-32")
print("Path to Synthbuster dataset:", synth_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/devpatel484/synthbuster-32?dataset_version_number=1...


100%|██████████| 9.04M/9.04M [00:00<00:00, 106MB/s]

Extracting files...





Path to Synthbuster dataset: /root/.cache/kagglehub/datasets/devpatel484/synthbuster-32/versions/1


In [None]:
# Paths
real_path = "/content/drive/MyDrive/RAISE/PNG"   # Real images
ai_path   = synth_path   # AI images (JPEGs)

# Define transforms (resize, normalize, augment for train)
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # MobileNetV2 expects 224x224
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Combine both datasets under one structure using ImageFolder
# Let's first create parent folders with 'real' and 'ai' subdirs

!mkdir -p /content/dataset/real
!mkdir -p /content/dataset/ai

# Symlink instead of copying (saves space)
!ln -s "{real_path}"/* /content/dataset/real/
!ln -s "{ai_path}"/* /content/dataset/ai/

# Now create dataset with ImageFolder
full_dataset = datasets.ImageFolder(
    root="/content/dataset",
    transform=transform_train
)

# Train/Test split (80/20)
train_size = int(0.8 * len(full_dataset))
test_size  = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, test_size]
)

# Apply correct transforms
train_dataset.dataset.transform = transform_train
test_dataset.dataset.transform  = transform_test

# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Total images: {len(full_dataset)}")
print(f"Train: {len(train_dataset)}, Test: {len(test_dataset)}")
print(f"Classes: {full_dataset.classes}")

In [None]:
#!ls dataset/ai/resized_data_Synthbuster/Synthbuster_Dataset/
!ls dataset/real

In [8]:
# Step 2: Develop MobileNetV2 Model for Binary Classification

# Load pretrained MobileNetV2
mobilenet_v2 = models.mobilenet_v2(pretrained=True)

# Freeze feature extractor (optional: comment out if you want full fine-tuning)
for param in mobilenet_v2.features.parameters():
    param.requires_grad = False

# Modify classifier for 2 classes (real vs AI)
mobilenet_v2.classifier[1] = nn.Linear(mobilenet_v2.last_channel, 2)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mobilenet_v2 = mobilenet_v2.to(device)

# Define loss function & optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mobilenet_v2.parameters(), lr=0.001)

#print(mobilenet_v2)




Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 143MB/s]


In [None]:
# Step 3: Training Loop with Accuracy, Loss Tracking & Saving Best Model

num_epochs = 20
best_acc = 0.0

# Store results for visualization
train_losses, test_losses = [], []
train_accs, test_accs = [], []

for epoch in range(num_epochs):
    # -------------------- Training --------------------
    mobilenet_v2.train()
    running_loss, running_corrects = 0.0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = mobilenet_v2(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * images.size(0)
        running_corrects += torch.sum(preds == labels.data)

    epoch_train_loss = running_loss / len(train_dataset)
    epoch_train_acc = running_corrects.double() / len(train_dataset)

    # -------------------- Evaluation --------------------
    mobilenet_v2.eval()
    test_loss, test_corrects = 0.0, 0

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = mobilenet_v2(images)
            loss = criterion(outputs, labels)

            _, preds = torch.max(outputs, 1)
            test_loss += loss.item() * images.size(0)
            test_corrects += torch.sum(preds == labels.data)

    epoch_test_loss = test_loss / len(test_dataset)
    epoch_test_acc = test_corrects.double() / len(test_dataset)

    # Save results
    train_losses.append(epoch_train_loss)
    test_losses.append(epoch_test_loss)
    train_accs.append(epoch_train_acc.item())
    test_accs.append(epoch_test_acc.item())

    # Save best model
    if epoch_test_acc > best_acc:
        best_acc = epoch_test_acc
        torch.save(mobilenet_v2.state_dict(), "best_mobilenetv2.pth")
        print("💾 Best model saved")

    # Print progress
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.2%} "
          f"Test Loss: {epoch_test_loss:.4f}, Test Acc: {epoch_test_acc:.2%}")
