<a href="https://colab.research.google.com/github/Htets-Corner/CIFAKE/blob/main/CIFAKE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install kaggle




In [5]:
from google.colab import files
files.upload()   # upload kaggle.json


Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"htetnaychiwin","key":"da94e4e262fe768cf88491fcc2233731"}'}

In [6]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [12]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("birdy654/cifake-real-and-ai-generated-synthetic-images")

print("Path to dataset files:", path)


Downloading from https://www.kaggle.com/api/v1/datasets/download/birdy654/cifake-real-and-ai-generated-synthetic-images?dataset_version_number=3...


100%|██████████| 105M/105M [00:03<00:00, 32.8MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3


In [13]:
import os

print("Dataset is stored at:", path)
print("Files:", os.listdir(path))


Dataset is stored at: /root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3
Files: ['train', 'test']


In [24]:
#Load with PyTorch
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

# Path to dataset
data_path = "/root/.cache/kagglehub/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images/versions/3"

# Transformations
transform = transforms.Compose([
    transforms.Resize((32, 32)),  # CIFAR size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Train & Test
train_data = datasets.ImageFolder(root=os.path.join(data_path, "train"), transform=transform)
test_data = datasets.ImageFolder(root=os.path.join(data_path, "test"), transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)

print("Train size:", len(train_data))   # expect 100000
print("Test size:", len(test_data))     # expect 20000
print("Classes:", train_data.classes)   # ['FAKE', 'REAL']


Train size: 100000
Test size: 20000
Classes: ['FAKE', 'REAL']


In [16]:
import torch.nn as nn
import torch.nn.functional as F

class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)   # (3,32,32) → (32,32,32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)  # (32,32,32) → (64,32,32)
        self.pool = nn.MaxPool2d(2, 2)                # halves H,W
        self.fc1 = nn.Linear(64*8*8, 128)             # after 2 pools → 8x8
        self.fc2 = nn.Linear(128, 2)                  # 2 classes (FAKE, REAL)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))   # (32,32,32) → (32,16,16)
        x = self.pool(F.relu(self.conv2(x)))   # (64,16,16) → (64,8,8)
        x = x.view(-1, 64*8*8)                 # flatten
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [17]:
import torch.optim as optim
import time
start_time = time.time()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNClassifier().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10
for epoch in range(epochs):
    running_loss = 0.0
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")


current_time = time.time()
total = current_time - start_time
print(f'Training Took: {total/60} minutes!')

Epoch 1/10, Loss: 0.2547
Epoch 2/10, Loss: 0.1651
Epoch 3/10, Loss: 0.1335
Epoch 4/10, Loss: 0.1133
Epoch 5/10, Loss: 0.0918
Epoch 6/10, Loss: 0.0742
Epoch 7/10, Loss: 0.0598
Epoch 8/10, Loss: 0.0491
Epoch 9/10, Loss: 0.0385
Epoch 10/10, Loss: 0.0339
Training Took: 8.63235346476237 minutes!


In [19]:
correct, total = 0, 0
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 94.59%


In [21]:
test_data[1978]

(tensor([[[ 0.0824,  0.0824,  0.0824,  ...,  0.1608,  0.0275, -0.0824],
          [ 0.0824,  0.0824,  0.0824,  ...,  0.0196,  0.1137,  0.2392],
          [ 0.0824,  0.0824,  0.0824,  ...,  0.0902, -0.0039, -0.0980],
          ...,
          [-0.5373, -0.5373, -0.5373,  ..., -0.5059, -0.5216, -0.5529],
          [-0.4902, -0.4902, -0.4902,  ..., -0.4588, -0.4588, -0.4588],
          [-0.4118, -0.4118, -0.4118,  ..., -0.4275, -0.3961, -0.3725]],
 
         [[ 0.3020,  0.3020,  0.3020,  ...,  0.4353,  0.3176,  0.2078],
          [ 0.3020,  0.3020,  0.3020,  ...,  0.2784,  0.3882,  0.5137],
          [ 0.3020,  0.3020,  0.3020,  ...,  0.3255,  0.2314,  0.1373],
          ...,
          [-0.3569, -0.3569, -0.3569,  ..., -0.4039, -0.4196, -0.4510],
          [-0.3333, -0.3333, -0.3333,  ..., -0.3569, -0.3569, -0.3569],
          [-0.2549, -0.2549, -0.2549,  ..., -0.3255, -0.2941, -0.2706]],
 
         [[ 0.6078,  0.6078,  0.6078,  ...,  0.7647,  0.6627,  0.5529],
          [ 0.6078,  0.6078,