<a href="https://colab.research.google.com/github/Htets-Corner/SYNTHBUSTER_RAISE-1k/blob/main/synthbuster_real.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Step 0: Mount Drive and Import Libraries

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Import core libraries
import os
import numpy as np
import matplotlib.pyplot as plt

# Torch and torchvision
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

# Utilities
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Step 1: Prepare dataset loaders and save structured dataset into Drive (resumable)

import os
import shutil
import random
from tqdm import tqdm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

# Paths to original datasets in your Drive
real_path = "/content/drive/MyDrive/RAISE/PNG"
ai_path = "/content/drive/MyDrive/SYNTHBUSTER_32"

# Destination structured dataset root (inside Drive)
dataset_root = "/content/drive/MyDrive/Binary_Dataset"
train_dir = os.path.join(dataset_root, "train")
test_dir = os.path.join(dataset_root, "test")

# Create folders
for split in ["train", "test"]:
    for cls in ["real", "ai"]:
        os.makedirs(os.path.join(dataset_root, split, cls), exist_ok=True)

# --- Handle real dataset (flat folder of PNGs) ---
real_images = os.listdir(real_path)
random.shuffle(real_images)

split_idx = int(0.8 * len(real_images))
train_real, test_real = real_images[:split_idx], real_images[split_idx:]

# Copy real train images (skip if exists)
for img in tqdm(train_real, desc="Copying Real Train"):
    dst = os.path.join(train_dir, "real", img)
    if not os.path.exists(dst):
        shutil.copy(os.path.join(real_path, img), dst)

# Copy real test images (skip if exists)
for img in tqdm(test_real, desc="Copying Real Test"):
    dst = os.path.join(test_dir, "real", img)
    if not os.path.exists(dst):
        shutil.copy(os.path.join(real_path, img), dst)

# --- Handle AI dataset (9 subfolders with JPGs) ---
ai_folders = [os.path.join(ai_path, f) for f in os.listdir(ai_path) if os.path.isdir(os.path.join(ai_path, f))]
ai_images = []
for folder in ai_folders:
    imgs = [os.path.join(folder, x) for x in os.listdir(folder)]
    ai_images.extend(imgs)

random.shuffle(ai_images)

split_idx = int(0.8 * len(ai_images))
train_ai, test_ai = ai_images[:split_idx], ai_images[split_idx:]

# Copy AI train images (skip if exists)
for img in tqdm(train_ai, desc="Copying AI Train"):
    dst = os.path.join(train_dir, "ai", os.path.basename(img))
    if not os.path.exists(dst):
        shutil.copy(img, dst)

# Copy AI test images (skip if exists)
for img in tqdm(test_ai, desc="Copying AI Test"):
    dst = os.path.join(test_dir, "ai", os.path.basename(img))
    if not os.path.exists(dst):
        shutil.copy(img, dst)

print("✅ Dataset structured successfully and saved in Google Drive at:", dataset_root)

# --- Define transforms ---
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# --- Create datasets & loaders ---
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
test_dataset = datasets.ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"Train set size: {len(train_dataset)} images")
print(f"Test set size: {len(test_dataset)} images")
print(f"Classes: {train_dataset.classes}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Copying Real Train: 100%|██████████| 799/799 [10:00<00:00,  1.33it/s]
Copying Real Test: 100%|██████████| 200/200 [02:37<00:00,  1.27it/s]
Copying AI Train: 100%|██████████| 7200/7200 [06:41<00:00, 17.93it/s]  
Copying AI Test: 100%|██████████| 1800/1800 [05:53<00:00,  5.09it/s]

✅ Dataset structured successfully and saved in Google Drive at: /content/drive/MyDrive/Binary_Dataset
Train set size: 1799 images
Test set size: 1071 images
Classes: ['ai', 'real']





In [6]:
# Step 2: Define MobileNetV2 model for Binary Classification

import torch
import torch.nn as nn
import torchvision.models as models

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Load pretrained MobileNetV2
model = models.mobilenet_v2(pretrained=True)

# Freeze feature extractor (optional, speeds up training if dataset is small)
for param in model.features.parameters():
    param.requires_grad = False

# Replace classifier for binary classification
model.classifier[1] = nn.Linear(model.last_channel, 2)

model = model.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

print("✅ MobileNetV2 ready for binary classification (real vs AI)")


Using device: cpu




Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 35.5MB/s]


✅ MobileNetV2 ready for binary classification (real vs AI)
