<a href="https://colab.research.google.com/github/Sandunika2000/BUS-Reservation-System/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from PIL import Image, ImageEnhance, ImageChops
import random
from shutil import copyfile
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle as pkl
import warnings
warnings.filterwarnings("ignore")

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("divg07/casia-20-image-tampering-detection-dataset")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'casia-20-image-tampering-detection-dataset' dataset.
Path to dataset files: /kaggle/input/casia-20-image-tampering-detection-dataset


In [None]:
import os
print(os.listdir(path))


['CASIA2']


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# =============================================================
# Copy-Move / Doctored Image Detection using ELA + VGG16
# Colab-ready, KaggleHub auto-path detection
# =============================================================

# -----------------------------
# 1️⃣ Imports
# -----------------------------
import kagglehub
import os, random, shutil
from PIL import Image, ImageEnhance, ImageChops
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras import models, layers, optimizers, callbacks
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score, precision_score, recall_score, f1_score

# reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# -----------------------------
# 2️⃣ Download dataset via KaggleHub
# -----------------------------
path = kagglehub.dataset_download("divg07/casia-20-image-tampering-detection-dataset")
print("Dataset path:", path)

AU_SRC = os.path.join(path, "CASIA2", "Au")
TP_SRC = os.path.join(path, "CASIA2", "Tp")
print("Authentic folder:", AU_SRC)
print("Tampered folder:", TP_SRC)

# sanity check
assert os.path.exists(AU_SRC), "Authentic folder does not exist!"
assert os.path.exists(TP_SRC), "Tampered folder does not exist!"

# -----------------------------
# 3️⃣ Temporary working folders for ELA
# -----------------------------
ALL_REALS = '/content/all_reals'
ALL_DOCS  = '/content/all_docs'
ELA_ROOT  = '/content/ela_images'

# clean previous runs
for p in (ALL_REALS, ALL_DOCS, ELA_ROOT):
    if os.path.exists(p):
        shutil.rmtree(p)

os.makedirs(ALL_REALS, exist_ok=True)
os.makedirs(ALL_DOCS, exist_ok=True)

# -----------------------------
# 4️⃣ ELA conversion
# -----------------------------
def image_to_ela(path, quality, resave_path):
    try:
        image = Image.open(path).convert('RGB')
        temp_path = '/tmp/temp_ela.jpg'
        image.save(temp_path, 'JPEG', quality=quality)
        resaved = Image.open(temp_path).convert('RGB')
        ela_image = ImageChops.difference(image, resaved)
        extrema = ela_image.getextrema()
        max_diff = max([e[1] for e in extrema])
        scale = 255.0 / max(1, max_diff)
        ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
        os.makedirs(resave_path, exist_ok=True)
        out_path = os.path.join(resave_path, os.path.basename(path).rsplit('.',1)[0] + '.jpg')
        ela_image.save(out_path, 'JPEG')
        return True
    except Exception as e:
        print(f"[ELA ERROR] {path} -> {e}")
        return False

def preprocess_data(src_folder, dest_folder, quality=90):
    files = [f for f in os.listdir(src_folder) if f.lower().endswith(('jpg','jpeg','png'))]
    count = 0
    for f in files:
        if image_to_ela(os.path.join(src_folder, f), quality, dest_folder):
            count += 1
    print(f"Processed {count} images from {src_folder} -> {dest_folder}")

# convert images
preprocess_data(AU_SRC, ALL_REALS, quality=90)
preprocess_data(TP_SRC, ALL_DOCS, quality=90)
print("ELA images ready.")

# -----------------------------
# 5️⃣ Train / Validation Split
# -----------------------------
TRAIN_RATIO = 0.8
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
EPOCHS = 25

def make_split(src_dir, dest_root, label_name, ratio=TRAIN_RATIO):
    files = [f for f in os.listdir(src_dir) if f.endswith('.jpg')]
    random.shuffle(files)
    n_train = int(len(files) * ratio)
    train_files, val_files = files[:n_train], files[n_train:]

    train_path = os.path.join(dest_root, 'train', label_name)
    val_path = os.path.join(dest_root, 'Val', label_name)
    os.makedirs(train_path, exist_ok=True)
    os.makedirs(val_path, exist_ok=True)

    for f in train_files:
        shutil.copy(os.path.join(src_dir, f), os.path.join(train_path, f))
    for f in val_files:
        shutil.copy(os.path.join(src_dir, f), os.path.join(val_path, f))

    print(f"{label_name}: {len(train_files)} train | {len(val_files)} val")

make_split(ALL_REALS, ELA_ROOT, 'Real')
make_split(ALL_DOCS,  ELA_ROOT, 'Doctored')

# -----------------------------
# 6️⃣ Image Generators with Augmentation
# -----------------------------
train_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=15,
    zoom_range=0.1,
    horizontal_flip=True,
    width_shift_range=0.05,
    height_shift_range=0.05
)
val_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

train_data = train_gen.flow_from_directory(
    os.path.join(ELA_ROOT, 'train'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=True,
    seed=SEED
)
val_data = val_gen.flow_from_directory(
    os.path.join(ELA_ROOT, 'Val'),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)
print("Classes:", train_data.class_indices)

# -----------------------------
# 7️⃣ Compute Class Weights
# -----------------------------
class_weights = None
if len(np.unique(train_data.classes)) > 1:
    w = compute_class_weight('balanced', classes=np.unique(train_data.classes), y=train_data.classes)
    class_weights = dict(enumerate(w))
    print("Class weights:", class_weights)

# -----------------------------
# 8️⃣ VGG16 Model
# -----------------------------
base = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
for layer in base.layers[:-4]:
    layer.trainable = False
for layer in base.layers[-4:]:
    layer.trainable = True

x = base.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
out = layers.Dense(1, activation='sigmoid')(x)

model = models.Model(inputs=base.input, outputs=out)
model.compile(optimizer=optimizers.Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# -----------------------------
# 9️⃣ Callbacks
# -----------------------------
ckpt = '/content/best_vgg16_ela.h5'
cb = [
    callbacks.ModelCheckpoint(ckpt, monitor='val_accuracy', save_best_only=True, verbose=1),
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
    callbacks.EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)
]

# -----------------------------
# 🔟 Train
# -----------------------------
steps_train = train_data.samples // BATCH_SIZE
steps_val = val_data.samples // BATCH_SIZE

history = model.fit(
    train_data,
    steps_per_epoch=steps_train,
    epochs=EPOCHS,
    validation_data=val_data,
    validation_steps=steps_val,
    class_weight=class_weights,
    callbacks=cb
)

# -----------------------------
# 11️⃣ Evaluate
# -----------------------------
model.load_weights(ckpt)

val_data.reset()
pred_probs = model.predict(val_data, steps=steps_val + 1, verbose=1)
pred_labels = (pred_probs.ravel()[:val_data.samples] > 0.5).astype(int)
true_labels = val_data.classes

acc  = accuracy_score(true_labels, pred_labels)
prec = precision_score(true_labels, pred_labels)
rec  = recall_score(true_labels, pred_labels)
f1   = f1_score(true_labels, pred_labels)

print(f"\nValidation Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")

cm = confusion_matrix(true_labels, pred_labels)
ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(val_data.class_indices.keys())).plot(cmap='Blues')
plt.show()


Using Colab cache for faster access to the 'casia-20-image-tampering-detection-dataset' dataset.
Dataset path: /kaggle/input/casia-20-image-tampering-detection-dataset
Authentic folder: /kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Au
Tampered folder: /kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Tp
Processed 7437 images from /kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Au -> /content/all_reals
Processed 2064 images from /kaggle/input/casia-20-image-tampering-detection-dataset/CASIA2/Tp -> /content/all_docs
ELA images ready.
Real: 5949 train | 1488 val
Doctored: 1651 train | 413 val
Found 7600 images belonging to 2 classes.
Found 1901 images belonging to 2 classes.
Classes: {'Doctored': 0, 'Real': 1}
Class weights: {0: np.float64(2.301635372501514), 1: np.float64(0.6387628172802151)}
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m5888925

Epoch 1/25
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22s/step - accuracy: 0.7923 - loss: 0.4952 
Epoch 1: val_accuracy improved from -inf to 0.90254, saving model to /content/best_vgg16_ela.h5




[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6376s[0m 27s/step - accuracy: 0.7925 - loss: 0.4947 - val_accuracy: 0.9025 - val_loss: 0.2191 - learning_rate: 1.0000e-04
Epoch 2/25
[1m  1/237[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:25:40[0m 22s/step - accuracy: 0.9062 - loss: 0.1218
Epoch 2: val_accuracy improved from 0.90254 to 0.90996, saving model to /content/best_vgg16_ela.h5




[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1096s[0m 5s/step - accuracy: 0.9062 - loss: 0.1218 - val_accuracy: 0.9100 - val_loss: 0.2121 - learning_rate: 1.0000e-04
Epoch 3/25
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23s/step - accuracy: 0.8800 - loss: 0.2919 
Epoch 3: val_accuracy did not improve from 0.90996
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6382s[0m 27s/step - accuracy: 0.8800 - loss: 0.2918 - val_accuracy: 0.8835 - val_loss: 0.3016 - learning_rate: 1.0000e-04
Epoch 4/25
[1m  1/237[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:27:53[0m 22s/step - accuracy: 0.9062 - loss: 0.1919
Epoch 4: val_accuracy did not improve from 0.90996
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1036s[0m 4s/step - accuracy: 0.9062 - loss: 0.1919 - val_accuracy: 0.8814 - val_loss: 0.3003 - learning_rate: 1.0000e-04
Epoch 5/25
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22s/step - accuracy: 0.8908 - loss: 0.24



[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6306s[0m 26s/step - accuracy: 0.9077 - loss: 0.2172 - val_accuracy: 0.9274 - val_loss: 0.1883 - learning_rate: 5.0000e-05
Epoch 8/25
[1m  1/237[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:19:46[0m 20s/step - accuracy: 0.9688 - loss: 0.1344
Epoch 8: val_accuracy improved from 0.92744 to 0.92903, saving model to /content/best_vgg16_ela.h5




[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1032s[0m 4s/step - accuracy: 0.9688 - loss: 0.1344 - val_accuracy: 0.9290 - val_loss: 0.1859 - learning_rate: 5.0000e-05
Epoch 9/25
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22s/step - accuracy: 0.9251 - loss: 0.1666 
Epoch 9: val_accuracy did not improve from 0.92903
[1m237/237[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6191s[0m 26s/step - accuracy: 0.9251 - loss: 0.1666 - val_accuracy: 0.9232 - val_loss: 0.1985 - learning_rate: 5.0000e-05
Epoch 10/25
[1m  1/237[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:27:07[0m 22s/step - accuracy: 0.8750 - loss: 0.1893