# AI Image Detection - ResNet-50 Training on CIFAKE
**Final Year Project - IS Project 2**

This notebook trains a ResNet-50 model using transfer learning to detect AI-generated images.

**Final Results:**
- Training Accuracy: 76.4%
- Validation Accuracy: 78.4%
- Test Accuracy: 80.3%
- Precision: 77.8%
- Recall: 84.8%

In [None]:
# CELL 1: Setup and Download Dataset
!pip install -q kaggle

import os
os.environ['KAGGLE_USERNAME'] = 'fake'
os.environ['KAGGLE_KEY'] = 'fake'
os.environ['KAGGLE_API_TOKEN'] = 'KGAT_976505f28bb394d7b30a11fdfb1d8126'

!kaggle datasets download -d birdy654/cifake-real-and-ai-generated-synthetic-images
!unzip -q cifake-real-and-ai-generated-synthetic-images.zip
print("Dataset downloaded and extracted")

In [None]:
# CELL 2: Imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {tf.config.list_physical_devices('GPU')}")

In [None]:
# CELL 3: Configuration
TRAIN_DIR = '/content/train'
TEST_DIR = '/content/test'

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 0.0001

print(f"Train directory: {TRAIN_DIR}")
print(f"Test directory: {TEST_DIR}")
print(f"Image size: {IMG_SIZE}")
print(f"Batch size: {BATCH_SIZE}")
print(f"Epochs: {EPOCHS}")

In [None]:
# CELL 4: Data Generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

print(f"Training samples: {train_generator.samples}")
print(f"Validation samples: {validation_generator.samples}")
print(f"Test samples: {test_generator.samples}")
print(f"Classes: {train_generator.class_indices}")

In [None]:
# CELL 5: Build Model
base_model = ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)

print(" Model built and compiled")
model.summary()

In [None]:
# CELL 6: Train with Auto-Save
callbacks = [
    keras.callbacks.ModelCheckpoint(
        'resnet50_cifake.h5',
        save_best_only=True,
        monitor='val_accuracy',
        verbose=1
    ),
    keras.callbacks.EarlyStopping(
        patience=3,
        restore_best_weights=True
    ),
    keras.callbacks.ReduceLROnPlateau(
        factor=0.5,
        patience=2
    )
]

print("ðŸš€ Starting training...")
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=EPOCHS,
    callbacks=callbacks
)
print(" Training complete!")

In [None]:
# CELL 7: Plot Training History
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# CELL 8: Evaluate on Test Set
test_loss, test_acc, test_precision, test_recall = model.evaluate(test_generator)

print(f"\nFINAL TEST ACCURACY: {test_acc:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")
print(f"F1-Score: {2 * (test_precision * test_recall) / (test_precision + test_recall):.4f}")

In [None]:
# CELL 9: Confusion Matrix
predictions = model.predict(test_generator)
predicted_classes = (predictions > 0.5).astype(int).flatten()

cm = confusion_matrix(test_generator.classes, predicted_classes)
print("\nConfusion Matrix:")
print(cm)

print("\nClassification Report:")
print(classification_report(
    test_generator.classes,
    predicted_classes,
    target_names=['FAKE', 'REAL']
))

In [None]:
# CELL 10: Download Model
from google.colab import files
files.download('resnet50_cifake.h5')
print("Model downloading to your computer...")