In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("divg07/casia-20-image-tampering-detection-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/divg07/casia-20-image-tampering-detection-dataset?dataset_version_number=1...


100%|██████████| 2.56G/2.56G [00:48<00:00, 56.4MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/divg07/casia-20-image-tampering-detection-dataset/versions/1


In [2]:
!cp -r /root/.cache/kagglehub/datasets/divg07/casia-20-image-tampering-detection-dataset/versions/1 /content/

In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import (Dense, Dropout, Flatten, Conv2D, MaxPool2D,
                                   BatchNormalization, GlobalAveragePooling2D, Input)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
from PIL import Image, ImageChops, ImageEnhance
import gc

In [2]:
# Set memory growth for GPU if available
try:
    physical_devices = tf.config.list_physical_devices('GPU')
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)
except:
    pass

In [3]:
def convert_to_ela_image(path, quality):
    """Enhanced ELA implementation with error handling and cleanup"""
    temp_filename = 'temp_file_name.jpg'
    try:
        image = Image.open(path).convert('RGB')
        image.save(temp_filename, 'JPEG', quality=quality)
        temp_image = Image.open(temp_filename)
        ela_image = ImageChops.difference(image, temp_image)

        # Calculate scaling factor
        extrema = ela_image.getextrema()
        max_diff = max([ex[1] for ex in extrema])
        max_diff = max(1, max_diff)  # Avoid division by zero
        scale = 255.0 / max_diff

        # Enhance and normalize
        ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
        return ela_image

    finally:
        # Cleanup temp file
        if os.path.exists(temp_filename):
            os.remove(temp_filename)

In [4]:
def prepare_image(image_path, image_size=(128, 128)):  # Reduced image size
    """Memory-efficient image preparation"""
    try:
        ela_img = convert_to_ela_image(image_path, 90)
        ela_img = ela_img.resize(image_size)
        ela_array = np.array(ela_img)

        # Clear PIL Image objects
        ela_img.close()
        del ela_img

        return ela_array / 255.0

    except Exception as e:
        print(f"Error processing image {image_path}: {str(e)}")
        return None

class DataGenerator(tf.keras.utils.Sequence):
    """Custom data generator class"""
    def __init__(self, authentic_path, tampered_path, batch_size=32, is_training=True):
        self.batch_size = batch_size
        self.is_training = is_training
        self.authentic_files = []
        self.tampered_files = []

        # Collect file paths
        for root, _, files in os.walk(authentic_path):
            self.authentic_files.extend([os.path.join(root, f) for f in files
                                      if f.lower().endswith(('jpg', 'jpeg', 'png'))])

        for root, _, files in os.walk(tampered_path):
            self.tampered_files.extend([os.path.join(root, f) for f in files
                                      if f.lower().endswith(('jpg', 'jpeg', 'png'))])

        # Shuffle files
        np.random.shuffle(self.authentic_files)
        np.random.shuffle(self.tampered_files)

        # Limit dataset size for memory efficiency
        max_samples = 2000 if self.is_training else 500
        self.authentic_files = self.authentic_files[:max_samples]
        self.tampered_files = self.tampered_files[:max_samples]

        self.indexes = np.arange(len(self.authentic_files) + len(self.tampered_files))
        np.random.shuffle(self.indexes)

    def __len__(self):
        return int(np.floor(len(self.indexes) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        X_batch = []
        y_batch = []

        for idx in indexes:
            if idx < len(self.authentic_files):
                img = prepare_image(self.authentic_files[idx])
                if img is not None:
                    X_batch.append(img)
                    y_batch.append(1)
            else:
                tampered_idx = idx - len(self.authentic_files)
                img = prepare_image(self.tampered_files[tampered_idx])
                if img is not None:
                    X_batch.append(img)
                    y_batch.append(0)

        return np.array(X_batch), np.array(y_batch)

    def on_epoch_end(self):
        np.random.shuffle(self.indexes)

In [10]:
def build_efficient_model(input_shape):
    """Memory-efficient model using MobileNetV2"""
    base_model = MobileNetV2(weights='imagenet',
                            include_top=False,
                            input_shape=input_shape)

    base_model.trainable = False

    model = Sequential([
        base_model,
        GlobalAveragePooling2D(),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])

    return model

In [13]:
def train_model(train_gen, val_gen, model):
    """Training function with Colab-specific settings"""
    callbacks = [
        EarlyStopping(
            monitor='val_accuracy',
            patience=5,
            mode='max',
            restore_best_weights=True
        ),
        ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=3,
            min_lr=1e-6
        ),
        ModelCheckpoint(
            'best_model.keras',
            monitor='val_accuracy',
            save_best_only=True,
            mode='max'
        )
    ]

    model.compile(
        optimizer=Adam(learning_rate=1e-4),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=20,
        callbacks=callbacks
    )

    return history

In [None]:
def main():
    # Define paths for Colab
    AUTHENTIC_PATH = '/content/1/CASIA2/Au'
    TAMPERED_PATH = '/content/1/CASIA2/Tp'

    # Parameters
    BATCH_SIZE = 16  # Small batch size for memory efficiency
    IMAGE_SIZE = (128, 128, 3)

    # Verify paths exist
    if not os.path.exists(AUTHENTIC_PATH) or not os.path.exists(TAMPERED_PATH):
        raise ValueError("Dataset paths not found. Please check the paths.")

    # Create data generators
    train_generator = DataGenerator(
        AUTHENTIC_PATH,
        TAMPERED_PATH,
        BATCH_SIZE,
        is_training=True
    )

    val_generator = DataGenerator(
        AUTHENTIC_PATH,
        TAMPERED_PATH,
        BATCH_SIZE,
        is_training=False
    )

    # Build and train model
    model = build_efficient_model(IMAGE_SIZE)

    history = train_model(
        train_generator,
        val_generator,
        model
    )

    # Save final model
    model.save('final_forgery_detection_model.h5')

    # Clear memory
    gc.collect()

if __name__ == "__main__":
    main()

Epoch 1/20


  self._warn_if_super_not_called()


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 232ms/step - accuracy: 0.6778 - loss: 0.7860 - val_accuracy: 0.8569 - val_loss: 0.3554 - learning_rate: 1.0000e-04
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 217ms/step - accuracy: 0.7958 - loss: 0.5591 - val_accuracy: 0.8760 - val_loss: 0.3289 - learning_rate: 1.0000e-04
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 214ms/step - accuracy: 0.8331 - loss: 0.4250 - val_accuracy: 0.8780 - val_loss: 0.3356 - learning_rate: 1.0000e-04
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 215ms/step - accuracy: 0.8348 - loss: 0.4121 - val_accuracy: 0.8891 - val_loss: 0.3212 - learning_rate: 1.0000e-04
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 213ms/step - accuracy: 0.8480 - loss: 0.3832 - val_accuracy: 0.8821 - val_loss: 0.2918 - learning_rate: 1.0000e-04
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━

In [None]:
import tensorflow as tf
import numpy as np
from PIL import Image, ImageChops, ImageEnhance
import os

def convert_to_ela_image(path, quality):
    """Convert image to ELA"""
    temp_filename = 'temp_file_name.jpg'
    try:
        with Image.open(path) as image:
            image = image.convert('RGB')
            image.save(temp_filename, 'JPEG', quality=quality)

            with Image.open(temp_filename) as temp_image:
                ela_image = ImageChops.difference(image, temp_image)

                extrema = ela_image.getextrema()
                max_diff = max([ex[1] for ex in extrema])
                max_diff = max(1, max_diff)
                scale = 255.0 / max_diff

                ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
                return ela_image
    finally:
        if os.path.exists(temp_filename):
            os.remove(temp_filename)

def predict_image(model_path, image_path):
    """Predict if image is authentic or tampered"""
    # Load the model
    model = tf.keras.models.load_model(model_path)

    # Process image
    ela_img = convert_to_ela_image(image_path, 90)
    ela_img = ela_img.resize((128, 128))
    ela_array = np.array(ela_img) / 255.0
    ela_array = np.expand_dims(ela_array, axis=0)

    # Make prediction
    prediction = model.predict(ela_array, verbose=0)
    authentic_prob = prediction[0][1] * 100
    tampered_prob = prediction[0][0] * 100

    # Print result
    print(f"\nImage: {image_path}")
    print(f"Result: {'AUTHENTIC' if authentic_prob > tampered_prob else 'TAMPERED'}")
    print(f"Confidence: {max(authentic_prob, tampered_prob):.2f}%")


In [19]:
# Replace with your model and image paths
predict_image('final_forgery_detection_model.h5', '/content/1/CASIA2/Tp/Tp_D_CRN_S_N_sec00041_sec00034_11251.jpg')




Image: /content/1/CASIA2/Tp/Tp_D_CRN_S_N_sec00041_sec00034_11251.jpg
Result: TAMPERED
Confidence: 96.35%
