1. Setup + Install Packages

In [13]:
# Mount Google Drive in Colab
# -------------------------------
# This allows your notebook to access files stored in your Google Drive.
# After running this cell, a link will appear.
# Click the link → choose your Google account → copy the authorization code → paste it back.
# Once authenticated, your Drive will be mounted under: /content/drive
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
DATA_DIR = "/content/drive/MyDrive/Faces_10percent_resized"


In [3]:
# Install Required Libraries
# ----------------------------------------------------------
# - tensorflow==2.12.0 : Stable TensorFlow version for model training/inference
# - efficientnet        : EfficientNet model architecture
# - gradio              : For creating a web UI (interface for predictions)
# - piexif              : For handling EXIF data in images
# - pillow              : Image processing library (PIL)
# - opencv-python-headless : OpenCV without GUI (recommended for Colab)
#
# The "-q" flag keeps output quiet and clean.
!pip install -q tensorflow==2.12.0 efficientnet gradio piexif pillow opencv-python-headless


[31mERROR: Could not find a version that satisfies the requirement tensorflow==2.12.0 (from versions: 2.16.0rc0, 2.16.1, 2.16.2, 2.17.0rc0, 2.17.0rc1, 2.17.0, 2.17.1, 2.18.0rc0, 2.18.0rc1, 2.18.0rc2, 2.18.0, 2.18.1, 2.19.0rc0, 2.19.0, 2.19.1, 2.20.0rc0, 2.20.0)[0m[31m
[0m[31mERROR: No matching distribution found for tensorflow==2.12.0[0m[31m
[0m

2. Create 45% Subset

In [4]:
# ----------------------------------------------------------
# Create a 45% Subset of Your Dataset
# ----------------------------------------------------------
# This script copies 45% of images from each class folder
# (Real / Fake) and each split (Train / Val / Test)
# into a new directory called dataset_45.
#
# Purpose:
# - Reduce dataset size for faster experiments.
# - Keep class balance the same.
# ----------------------------------------------------------

subset_dir = "/content/dataset_45" # Output folder for the subset dataset
splits = ["Train", "Val", "Test"]   # Dataset splits
classes = ["Real", "Fake"]           # Two target classes

for split in splits:
    for cls in classes:
      # Source folder of full dataset
        src = Path(f"{DATA_DIR}/{split}/{cls}")

        # Destination folder for subset
        dst = Path(f"{subset_dir}/{split}/{cls}")
        dst.mkdir(parents=True, exist_ok=True)
         # List of image files in the source directory
        imgs = [p for p in src.iterdir() if p.suffix.lower() in [".jpg",".jpeg",".png"]]

        # Shuffle images to ensure random distribution
        random.shuffle(imgs)
 # Select 45% of images
        take = int(len(imgs) * 0.45)
        selected = imgs[:take]
        # Copy selected images to subset directory

        for img in selected:
            shutil.copy(img, dst)

print("✔ 45% subset created at:", subset_dir)


NameError: name 'Path' is not defined

3.Data Generators

In [5]:
# ----------------------------------------------------------
# Import Required Libraries
# ----------------------------------------------------------
# os, random, shutil, io, json  → System utilities (file paths, copying, randomness)
# pathlib.Path                  → Modern and clean file path handling
# numpy                         → Numerical operations (arrays, preprocessing)
# tensorflow / keras            → Deep learning framework and model building
# ImageDataGenerator            → Efficient loading & augmentation of images
# EfficientNetB0                → Pretrained CNN model used for feature extraction / fine-tuning
# models, layers                → Building neural network architectures
# PIL (Pillow)                  → Image handling (open, resize, enhance)
# gradio                        → Web-based UI to run the model in an interface
# ----------------------------------------------------------

import os, random, shutil, io, json
from pathlib import Path
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import models, layers
from PIL import Image, ImageChops, ImageEnhance, ExifTags
import gradio as gr

# ----------------------------------------------------------
# Global Configuration
# ----------------------------------------------------------
# IMG_SIZE → Input image size for the model (224x224 required by EfficientNetB0)
# BATCH    → Number of images processed per batch during training
# ----------------------------------------------------------


IMG_SIZE = (224, 224) # Resize all images to 224x224 for EfficientNetB0
BATCH = 32             # Batch size used for training/validation/testing


4. Build EfficientNetB0 Model

In [14]:
# Image Data Generators
# ----------------------------------------------------------
# These objects automatically:
#  - Load images from folders
#  - Resize them to the required IMG_SIZE
#  - Apply normalization (rescale)
#  - Perform real-time data augmentation (for training only)
#
# ImageDataGenerator helps prevent overfitting and improves
# model generalization by creating slightly modified images
# on each epoch.
# ----------------------------------------------------------

# Training data generator:
# - Normalizes pixel values (1/255)
# - Random zoom (zoom_range=0.1)
# - Random horizontal flip

train_gen = ImageDataGenerator(rescale=1/255, zoom_range=0.1, horizontal_flip=True)

# ----------------------------------------------------------

# Validation & Test data generators:
# - Only rescaling (no augmentation to keep evaluation stable)
val_gen   = ImageDataGenerator(rescale=1/255)


# Validation & Test data generators:
# - Only rescaling (no augmentation to keep evaluation stable)
train = train_gen.flow_from_directory(f"{subset_dir}/Train",
                                      target_size=IMG_SIZE, batch_size=BATCH, class_mode='binary')

val   = val_gen.flow_from_directory(f"{subset_dir}/Val",
                                    target_size=IMG_SIZE, batch_size=BATCH, class_mode='binary')

test  = val_gen.flow_from_directory(f"{subset_dir}/Test",
                                    target_size=IMG_SIZE, batch_size=BATCH, class_mode='binary')


FileNotFoundError: [Errno 2] No such file or directory: '/content/dataset_45/Train'

In [None]:

# ----------------------------------------------------------
# Build EfficientNetB0-Based Binary Classification Model
# ----------------------------------------------------------
# We use EfficientNetB0 as the feature extractor (pretrained on ImageNet).
# include_top=False  → removes the final ImageNet classification layer
# trainable=False    → freezes pretrained weights to speed up training
#
# After the base model, we add our own classification head:
#   - GlobalAveragePooling2D → converts feature maps into a vector
#   - Dense(128, relu)       → learnable fully-connected layer
#   - Dense(1, sigmoid)      → final output (Fake=0, Real=1)
#
# The model is compiled for binary classification using:
#   - loss='binary_crossentropy'
#   - optimizer='adam'
#   - metric='accuracy'
# ----------------------------------------------------------

# Load EfficientNetB0 with pretrained ImageNet weights
base = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224,224,3))
# Freeze base model weights (transfer learning)
base.trainable = False
# Build classification head
x = layers.GlobalAveragePooling2D()(base.output)
x = layers.Dense(128, activation='relu')(x)
out = layers.Dense(1, activation='sigmoid')(x)
# Create final model
model = models.Model(base.input, out)
# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


5. Train Model (First 10 Epochs)

In [None]:
# ----------------------------------------------------------
# Train the Model & Save Best Weights
# ----------------------------------------------------------
# We train the model for 10 epochs using:
#   - Training generator (train)
#   - Validation generator (val)
#
# ModelCheckpoint callback:
#   - Saves the model ONLY when validation accuracy improves
#   - Prevents overwriting with worse-performing checkpoints
#
# The final model will be saved as: model_10epochs.h5
# ----------------------------------------------------------

ckpt = "/content/model_10epochs.h5"

history = model.fit(
    train,
    validation_data=val,
    epochs=10,
    callbacks=[tf.keras.callbacks.ModelCheckpoint(ckpt, save_best_only=True)]
)

print("Model saved at:", ckpt)


  self._warn_if_super_not_called()


Epoch 1/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5143 - loss: 0.7048



[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 1s/step - accuracy: 0.5143 - loss: 0.7048 - val_accuracy: 0.4980 - val_loss: 0.6972
Epoch 2/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.5072 - loss: 0.6974



[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 1s/step - accuracy: 0.5072 - loss: 0.6974 - val_accuracy: 0.5020 - val_loss: 0.6952
Epoch 3/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4942 - loss: 0.6958



[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 1s/step - accuracy: 0.4942 - loss: 0.6958 - val_accuracy: 0.4980 - val_loss: 0.6932
Epoch 4/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.4957 - loss: 0.6939



[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 1s/step - accuracy: 0.4956 - loss: 0.6939 - val_accuracy: 0.4980 - val_loss: 0.6931
Epoch 5/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 1s/step - accuracy: 0.5002 - loss: 0.6936 - val_accuracy: 0.4980 - val_loss: 0.6932
Epoch 6/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 1s/step - accuracy: 0.4949 - loss: 0.6932 - val_accuracy: 0.4980 - val_loss: 0.6932
Epoch 7/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 1s/step - accuracy: 0.5010 - loss: 0.6932 - val_accuracy: 0.4980 - val_loss: 0.6932
Epoch 8/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m254s[0m 1s/step - accuracy: 0.4992 - loss: 0.6932 - val_accuracy: 0.5020 - val_loss: 0.6931
Epoch 9/10
[1m197/197[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 1s/step - accuracy: 0.4986 - loss: 0.6932 -

6. Continue Training (20 More Epochs) — History 2

In [6]:
# Continue training without losing previous epochs
history2 = model.fit(
    train,
    validation_data=val,
    epochs=30   # train 30 more epochs
)

NameError: name 'model' is not defined

Save Final Model (History 2)

In [7]:
final_model_path = "/content/drive/MyDrive/my_model_after_history2.h5"
model.save(final_model_path)

print("✔ Saved model to:", final_model_path)


NameError: name 'model' is not defined

8. Plot

In [12]:
# ----------------------------------------------------------
# Plot Training & Validation Accuracy/Loss Curves
# ----------------------------------------------------------
# NOTE:
# history1 and history2 are assumed to be two training runs.
# We combine them to create a continuous graph.
#
# acc / val_acc   → Accuracy over epochs
# loss / val_loss → Loss over epochs
#
# These graphs help visualize:
#   ✓ Model learning progress
#   ✓ Overfitting (if val_acc drops while acc rises)
# ----------------------------------------------------------

import matplotlib.pyplot as plt

# Combine histories from two training phases (if used)
acc = history1.history['accuracy'] + history2.history['accuracy']
val_acc = history1.history['val_accuracy'] + history2.history['val_accuracy']

loss = history1.history['loss'] + history2.history['loss']
val_loss = history1.history['val_loss'] + history2.history['val_loss']

# Total number of epochs after combining runs
epochs = range(1, len(acc) + 1)

# ------------------ Accuracy Graph ------------------
plt.figure(figsize=(10, 5))
plt.plot(epochs, acc, label="Training Accuracy")
plt.plot(epochs, val_acc, label="Validation Accuracy")

plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.legend()
plt.grid(True)
plt.show()

# ------------------ Loss Graph ----------------------
plt.figure(figsize=(10, 5))
plt.plot(epochs, loss, label="Training Loss")
plt.plot(epochs, val_loss, label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid(True)
plt.show()


NameError: name 'history1' is not defined

9. Evaluate on Test Set

In [8]:
# ----------------------------------------------------------
# Evaluate Model on Test Set Using Classification Report
# ----------------------------------------------------------
# - model.predict(test) gives predicted probabilities
# - We convert probabilities to class labels:
#       > 0.5 → Real (1)
#       <=0.5 → Fake (0)
#
# - classification_report shows:
#       precision, recall, f1-score, support
#
# This is one of the most important evaluation metrics
# for binary classification projects.
# ----------------------------------------------------------

from sklearn.metrics import classification_report

# Predict probabilities for all test images
pred_probs = model.predict(test)

# Convert probabilities → binary labels (0 or 1)
pred = (pred_probs > 0.5).astype(int).ravel()

# Print detailed evaluation report
print(classification_report(test.classes, pred))


NameError: name 'model' is not defined

10. Load Final Model & Run Gradio

In [9]:
# ----------------------------------------------------------
# Gradio Interface for Fake Image Detection
# ----------------------------------------------------------
# This interface allows users to upload an image and receive:
#   1. Prediction (Real / Fake + confidence %)
#   2. EXIF metadata of the image
#   3. Original uploaded image
#   4. ELA image (Error Level Analysis visualization)
#
# ELA Highlights tampered areas by showing recompression errors.
# ----------------------------------------------------------

import gradio as gr
from PIL import Image, ImageChops, ImageEnhance, ExifTags
import io, json
import numpy as np

# ----------------------------------------------------------
# Load Trained Model
# ----------------------------------------------------------
model = tf.keras.models.load_model(final_model_path)
print("✔ Loaded model for Gradio:", final_model_path)

# ----------------------------------------------------------
# Function: Error Level Analysis (ELA)
# ----------------------------------------------------------
# ELA helps detect image manipulation by:
#   - Recompressing the image at 90% quality
#   - Comparing original vs recompressed
#   - Highlighting differences (tampered pixels)
# ----------------------------------------------------------
def do_ela(img, q=90):
    buf = io.BytesIO()
    img.save(buf, "JPEG", quality=q)           # Recompress
    comp = Image.open(io.BytesIO(buf.getvalue()))
    diff = ImageChops.difference(img, comp)    # Pixel-wise difference
    return ImageEnhance.Brightness(diff).enhance(5)  # Increase visibility

# ----------------------------------------------------------
# Function: Extract EXIF Metadata
# ----------------------------------------------------------
def get_exif(img):
    try:
        raw = img._getexif()
        return {ExifTags.TAGS.get(k, k): v for k, v in raw.items()} if raw else {}
    except:
        return {}

# ----------------------------------------------------------
# Prediction Function for Gradio
# ----------------------------------------------------------
# Steps:
#   1. Resize image to 224×224
#   2. Normalize (divide by 255)
#   3. Model prediction → probability
#   4. Convert probability → Real/Fake label
#   5. Generate ELA + EXIF data
# ----------------------------------------------------------
def predict_img(img):

    # Convert PIL → numpy array → normalized tensor
    arr = np.expand_dims(np.array(img.resize(IMG_SIZE)) / 255.0, 0)

    # Probability output from model
    p = float(model.predict(arr)[0][0])

    # Threshold decision
    label = "Fake" if p >= 0.5 else "Real"

    # Generate ELA version of image
    ela = do_ela(img)

    # Extract metadata
    exif = json.dumps(get_exif(img), indent=2)

    # Return all items for interface
    return f"{label} ({p*100:.2f}%)", exif, img, ela

# ----------------------------------------------------------
# Gradio Interface Setup
# ----------------------------------------------------------
# Interface returns:
#   Text   → Prediction
#   Text   → EXIF metadata
#   Image  → Original image
#   Image  → ELA visualization
# ----------------------------------------------------------

gr.Interface(
    fn=predict_img,
    inputs=gr.Image(type="pil"),
    outputs=["text", "text", "image", "image"],
    title="Fake Image Detector"
).launch(share=True)




✔ Loaded model for Gradio: /content/drive/MyDrive/my_model_after_history2.h5
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://871f1b64052b7731bb.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


