In [1]:
# Install required libraries
!pip install tensorflow kaggle

# Download the dataset (you'll need to upload your kaggle.json file first)
# Upload your kaggle.json file using the file upload button in Colab
from google.colab import files
files.upload()  # Upload your kaggle.json file

# Setup Kaggle credentials
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the CIFAKE dataset
!kaggle datasets download -d birdy654/cifake-real-and-ai-generated-synthetic-images

# Unzip the dataset
!unzip -q cifake-real-and-ai-generated-synthetic-images.zip



Saving kaggle.json to kaggle.json
Dataset URL: https://www.kaggle.com/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images
License(s): other
Downloading cifake-real-and-ai-generated-synthetic-images.zip to /content
  0% 0.00/105M [00:00<?, ?B/s]
100% 105M/105M [00:00<00:00, 1.79GB/s]


In [2]:
import os
import shutil
import random
from pathlib import Path

# Create validation directory structure
os.makedirs('validation/REAL', exist_ok=True)
os.makedirs('validation/FAKE', exist_ok=True)

# Set seed for reproducibility
random.seed(42)

# Move 10,000 REAL images from train to validation
real_train_path = 'train/REAL'
real_val_path = 'validation/REAL'
real_images = os.listdir(real_train_path)
random.shuffle(real_images)
images_to_move = real_images[:10000]

for img in images_to_move:
    src = os.path.join(real_train_path, img)
    dst = os.path.join(real_val_path, img)
    shutil.move(src, dst)

print(f"Moved {len(images_to_move)} REAL images to validation")

# Move 10,000 FAKE images from train to validation
fake_train_path = 'train/FAKE'
fake_val_path = 'validation/FAKE'
fake_images = os.listdir(fake_train_path)
random.shuffle(fake_images)
images_to_move = fake_images[:10000]

for img in images_to_move:
    src = os.path.join(fake_train_path, img)
    dst = os.path.join(fake_val_path, img)
    shutil.move(src, dst)

print(f"Moved {len(images_to_move)} FAKE images to validation")

# Verify the split
print(f"\nTrain REAL images: {len(os.listdir('train/REAL'))}")
print(f"Train FAKE images: {len(os.listdir('train/FAKE'))}")
print(f"Validation REAL images: {len(os.listdir('validation/REAL'))}")
print(f"Validation FAKE images: {len(os.listdir('validation/FAKE'))}")
print(f"Test REAL images: {len(os.listdir('test/REAL'))}")
print(f"Test FAKE images: {len(os.listdir('test/FAKE'))}")

Moved 10000 REAL images to validation
Moved 10000 FAKE images to validation

Train REAL images: 40000
Train FAKE images: 40000
Validation REAL images: 10000
Validation FAKE images: 10000
Test REAL images: 10000
Test FAKE images: 10000


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
import matplotlib.pyplot as plt

# Set seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
# Load datasets using image_dataset_from_directory
img_height = 32
img_width = 32
batch_size = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    'train',
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='binary',
    seed=42
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    'validation',
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='binary',
    seed=42
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    'test',
    image_size=(img_height, img_width),
    batch_size=batch_size,
    label_mode='binary',
    seed=42
)

# Optimize dataset performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

Found 80000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
