# 1. Data Pre-Processing

### Import Library

In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split

### Folder Paths

In [2]:
source_folder = "../dataset/ICDAR-2011"
train_folder = "../template/ICDAR-data/train"
test_folder = "../template/ICDAR-data/test"

### Train and Test Data Split

In [3]:
def split_data(source_folder, train_folder, test_folder):
    if not os.path.exists(train_folder):
        os.makedirs(train_folder)
    if not os.path.exists(test_folder):
        os.makedirs(test_folder)

    signatures = [folder for folder in os.listdir(source_folder) if os.path.isdir(os.path.join(source_folder, folder))]
    for signature in signatures:
        signature_path = os.path.join(source_folder, signature)
        images = os.listdir(signature_path)
        train_signature, test_signature = train_test_split(images, test_size=0.2, random_state=3)

        for sig in train_signature:
            source_path = os.path.join(signature_path, sig)
            destination_path = os.path.join(train_folder, signature)
            if not os.path.exists(destination_path):
                os.makedirs(destination_path)
            destination_path = os.path.join(destination_path, sig)
            shutil.copy(source_path, destination_path)

        for sig in test_signature:
            source_path = os.path.join(signature_path, sig)
            destination_path = os.path.join(test_folder, signature)
            if not os.path.exists(destination_path):
                os.makedirs(destination_path)
            destination_path = os.path.join(destination_path, sig)
            shutil.copy(source_path, destination_path)

In [4]:
# split_data(source_folder, train_folder, test_folder)

# 2. Feature Extraction

### Load vgg16 model

In [5]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing import image
import numpy as np

In [6]:
model = VGG16(weights='imagenet', include_top=False)

### Extract Features

In [7]:
from tensorflow.keras.models import Model
import cv2 as cv

In [8]:
size = 224

In [9]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(size, size, 3))
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block5_pool').output)

In [10]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [17]:
def extract_features(folder_path):
    all_features = []
    all_names = []
    all_labels = []
    signatures = [folder for folder in os.listdir(folder_path) if os.path.isdir(os.path.join(folder_path, folder))]

    for signature in signatures:
        signature_path = os.path.join(folder_path, signature)
        print("signature path: ", signature_path)
        images = os.listdir(signature_path)

        name = signature[:3]

        if signature.endswith("_forg"):
            label = 0
        else:
            label = 1

        all_images = [cv.imread(os.path.join(signature_path, img)) for img in images]
        all_images = [cv.cvtColor(img, cv.COLOR_BGR2RGB) for img in all_images]
        all_images = [cv.resize(img, (size, size)) for img in all_images]
        all_images = np.array([preprocess_input(img) for img in all_images])

        all_features = model.predict(all_images)
        all_features = all_features.reshape((len(images), -1))
        all_names.append([name] * len(images))
        all_labels.append([label] * len(images))

    flattened_names = [item for sublist in all_names for item in sublist]
    flattened_labels = [item for sublist in all_labels for item in sublist]

    return all_features, all_names, all_labels

        

In [18]:
train_features, train_names, train_labels = extract_features(train_folder)
test_features, test_names, test_labels = extract_features(test_folder)

signature path:  ../template/ICDAR-data/train\001
signature path:  ../template/ICDAR-data/train\001_forg
signature path:  ../template/ICDAR-data/train\002
signature path:  ../template/ICDAR-data/train\002_forg
signature path:  ../template/ICDAR-data/train\003
signature path:  ../template/ICDAR-data/train\003_forg
signature path:  ../template/ICDAR-data/train\004
signature path:  ../template/ICDAR-data/train\004_forg
signature path:  ../template/ICDAR-data/train\006
signature path:  ../template/ICDAR-data/train\006_forg
signature path:  ../template/ICDAR-data/train\009
signature path:  ../template/ICDAR-data/train\009_forg
signature path:  ../template/ICDAR-data/train\012
signature path:  ../template/ICDAR-data/train\012_forg
signature path:  ../template/ICDAR-data/train\013
signature path:  ../template/ICDAR-data/train\013_forg
signature path:  ../template/ICDAR-data/train\014
signature path:  ../template/ICDAR-data/train\014_forg
signature path:  ../template/ICDAR-data/train\015
signa

In [37]:
train_names = [item for sublist in train_names for item in sublist]
train_labels = [item for sublist in train_labels for item in sublist]
test_names = [item for sublist in test_names for item in sublist]
test_labels = [item for sublist in test_labels for item in sublist]

In [35]:
def create_feature_gallery(train_features, train_names, train_labels):
    gallery_feature = []
    gallery_name = []

    for feature, name, label in zip(train_features, train_names, train_labels):
        

    # return gallery_feature, gallery_name

In [38]:
# gallery_feature, gallery_name = create_feature_gallery(train_features, train_names, train_labels)
create_feature_gallery(train_features, train_names, train_labels)

[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
[ 0.        29.506802   0.        ...  0.         0.         1.6868408] 001 1
[0. 0. 0. ... 0. 0. 0.] 001 1
