# 1. Data Pre-Processing

### Import Library

In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split

### Folder Paths

In [2]:
source_folder = "../dataset/ICDAR-2011"
train_folder = "../template/ICDAR-data/train"
test_folder = "../template/ICDAR-data/test"

### Train and Test Data Split

In [3]:
def split_data(source_folder, train_folder, test_folder):
    if not os.path.exists(train_folder):
        os.makedirs(train_folder)
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)

    signatures = [folder for folder in os.listdir(source_folder) if os.path.isdir(os.path.join(source_folder, folder))]
    for signature in signatures:
        signature_path = os.path.join(source_folder, signature)
        images = os.listdir(signature_path)
        train_signature, test_signature = train_test_split(images, test_size=0.2, random_state=3)

        for sig in train_signature:
            source_path = os.path.join(signature_path, sig)
            destination_path = os.path.join(train_folder, signature)
            if not os.path.exists(destination_path):
                os.makedirs(destination_path)
            destination_path = os.path.join(destination_path, sig)
            shutil.copy(source_path, destination_path)

        for sig in test_signature:
            source_path = os.path.join(signature_path, sig)
            destination_path = os.path.join(test_folder, signature)
            if not os.path.exists(destination_path):
                os.makedirs(destination_path)
            destination_path = os.path.join(destination_path, sig)
            shutil.copy(source_path, destination_path)

In [4]:
# split_data(source_folder, train_folder, test_folder)

# 2. Feature Extraction

### Load vgg16 model

In [5]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
import numpy as np

In [6]:
model = VGG16(weights='imagenet', include_top=False)

### Extract Features

In [7]:
from tensorflow.keras.models import Model
import cv2 as cv

In [8]:
layer_name = 'block5_pool'
intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output)

In [15]:
def extract_features(folder_path, batch_size=16):
    all_features = []
    all_names = []
    all_labels = []
    signatures = [folder for folder in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, folder))]

    for signature in signatures:
        signature_path = os.path.join(folder_path, signature)
        print("signature path: ", signature_path)
        images = os.listdir(signature_path)

        name = signature[:3]

        if signature.endswith("_forg"):
            label = 0
        else:
            label = 1

        image_paths = [os.path.join(signature_path, image) for image in images]

        # Process images in batches
        for i in range(0, len(image_paths), batch_size):
            batch_paths = image_paths[i:i + batch_size]
            batch_images = [cv.imread(img_path) for img_path in batch_paths]
            batch_images = [cv.resize(img, (224, 224)) for img in batch_images]

            # Convert images to a batched numpy array
            batch_images = np.array(batch_images)
            batch_images = cv.cvtColor(batch_images, cv.COLOR_BGR2RGB)  # Convert to RGB
            batch_images = batch_images.astype(np.float32) / 255.0  # Normalize
            batch_images = np.expand_dims(batch_images, axis=0)

            # Predict features for the batch
            batch_features = intermediate_layer_model.predict(batch_images)

            # Flatten and append features, names, and labels
            for j, features in enumerate(batch_features):
                all_features.append(features.flatten())
                all_names.append(name)
                all_labels.append(label)

    return np.array(all_features), all_names, all_labels

In [16]:
train_features, train_names, train_labels = extract_features(train_folder)
test_features, test_names, test_labels = extract_features(test_folder)

signature path:  ../template/ICDAR-data/train\001
signature path:  ../template/ICDAR-data/train\001_forg
signature path:  ../template/ICDAR-data/train\002
signature path:  ../template/ICDAR-data/train\002_forg
signature path:  ../template/ICDAR-data/train\003
signature path:  ../template/ICDAR-data/train\003_forg
signature path:  ../template/ICDAR-data/train\004
signature path:  ../template/ICDAR-data/train\004_forg
signature path:  ../template/ICDAR-data/train\006
signature path:  ../template/ICDAR-data/train\006_forg
signature path:  ../template/ICDAR-data/train\009
signature path:  ../template/ICDAR-data/train\009_forg
signature path:  ../template/ICDAR-data/train\012
signature path:  ../template/ICDAR-data/train\012_forg
signature path:  ../template/ICDAR-data/train\013
signature path:  ../template/ICDAR-data/train\013_forg
signature path:  ../template/ICDAR-data/train\014
signature path:  ../template/ICDAR-data/train\014_forg
signature path:  ../template/ICDAR-data/train\015
signa