### IMPORTS

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from tensorflow.keras import layers, models
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

### SETTING THE DATASET PATH

In [None]:
## TYPES OF PRE-PROCESSED DS AVAILABLE:
D1_LINEAR_GAUSIAN_250px = "D://250px_LI_GB_D1/"
D1_LINEAR_GAUSIAN_300px = "D://300px_LI_GB_D1/"
D2_LINEAR_GAUSIAN_250px = "D://250px_LI_GB_D2/"
D2_LINEAR_GAUSIAN_300px = "D://300px_LI_GB_D2/"
D2_CUBIC_GAUSIAN_250px = "D://250px_CI_GB_D2/"
D1_D2_MIXED_250px = "D://D1_D2_MIXED_250px/"
## ^ DATASET.NO_INTERPOLATION_BLUR-FILTER_IMG-SIZE ^

#SELECTING DATASET

ds_type = D2_LINEAR_GAUSIAN_250px
input_size = int(str(ds_type)[4:7])
input_dim = 3

#SETTING THE PATH
dataset_path = signature_types = os.listdir(ds_type)
# print(signature_types)

In [None]:
print("Types of signatures: ", len(dataset_path))

In [None]:
signs = []
for item in signature_types:
  all_signs = os.listdir(ds_type+"/"+item)
  for sign in all_signs:
    if sign.endswith(".db"):
      pass
    else: 
      signs.append((item, str(ds_type+"/"+item) + "/" + sign))
print(signs)

In [None]:
signs_df = pd.DataFrame(data = signs, columns = ["sign type", "image"])
print(signs_df.head())

In [None]:
print("Total number of signs in the dataset: ", len(signs_df))

In [None]:
sign_count = signs_df["sign type"].value_counts()
print("Signatures in each category:")
print(sign_count)

### CREATING LIST ARRAY OF IMAGES WITH LABELS

In [None]:
images = []
labels = []
for i in signature_types:
  data_path = ds_type + str(i)
  filenames = [i for i in os.listdir(data_path)]
  for f in tqdm(filenames, desc=f"Processing {i}"):
    if f.endswith(".db"):
      pass
    else:
      img = cv2.imread(data_path + "/" + f)
      images.append(img)
      labels.append(i)

In [None]:
print(labels)

In [None]:
images = np.array(images)
images.shape

In [None]:
images = images.astype('float32') / 255.0

In [None]:
y = signs_df['sign type'].values
y = y.reshape(-1,1)
columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough')
Y=np.array(columnTransformer.fit_transform(y),dtype='float32')
print(Y.shape)

### SPLITTING THE DATASET (80 : 20)

In [None]:
images, Y = shuffle(images,Y,random_state=42)
train_x,test_x,train_y,test_y = train_test_split(images, Y, test_size=0.2, random_state=42)
print(train_x.shape)
print(test_x.shape)
print(train_y.shape)
print(test_y.shape)

In [None]:
train_x = train_x / 255.0
test_x = test_x / 255.0

### PROPOSED CNN MODEL (based on ijerph)

In [None]:
cnn = models.Sequential([
     layers.Conv2D(filters=32, kernel_size=(3, 3), activation= 'relu', input_shape=(input_size, input_size, input_dim)),
     layers.MaxPooling2D((2, 2)),
     layers.BatchNormalization(),
     layers.Conv2D(filters=32, kernel_size=(3, 3), activation= 'relu'),
     layers.MaxPooling2D((2, 2)),
     layers.BatchNormalization(),
     layers.Conv2D(filters=64, kernel_size=(3, 3), activation= 'relu'),
     layers.MaxPooling2D((2, 2)),
     layers.Conv2D(filters=64, kernel_size=(3, 3), activation= 'relu'),
     layers.Conv2D(filters=128, kernel_size=(3, 3), activation= 'relu'),
     layers.MaxPooling2D((2, 2)),
     layers.BatchNormalization(),
     layers.Conv2D(filters=128, kernel_size=(3, 3), activation= 'relu'),
     layers.MaxPooling2D((2, 2)),
     layers.Conv2D(filters=256, kernel_size=(3, 3), activation= 'relu'),
     layers.BatchNormalization(),
     layers.Conv2D(filters=256, kernel_size=(3, 3), activation= 'relu'),
     layers.GlobalAveragePooling2D(),
     layers.BatchNormalization(),
     layers.Dense(128, activation= 'sigmoid'),
     layers.BatchNormalization(),
     layers.Flatten(),
     layers.Dropout(0.1),
     layers.Dense(2, activation= 'softmax')
     ])

### PROPOSED CNN MODEL (based on GWO)

In [None]:
# cnn = models.Sequential([
#   layers.Conv2D(filters=55, kernel_size=(3, 3), activation= 'relu', input_shape=(250, 250, 3)),
#   layers.MaxPooling2D((2, 2)),
#   layers.Dropout(0.1),
#   layers.Conv2D(filters=130, kernel_size=(3, 3), activation= 'relu'),
#   layers.MaxPooling2D((2, 2)),
#   layers.Dropout(0.1),
#   layers.Conv2D(filters=155, kernel_size=(3, 3), activation= 'relu'),
#   layers.MaxPooling2D((2, 2)),
#   layers.Dropout(0.1),
#   layers.Flatten(),
#   layers.Dense(2, activation = 'softmax')
#   ])

In [None]:
# opti = tf.keras.optimizers.SGD(learning_rate=0.001, momentum=0.8)
# cnn.compile(optimizer=opti,loss='BinaryCrossentropy',metrics=['accuracy'])
# cnn.compile(optimizer='adam',loss='BinaryCrossentropy',metrics=['accuracy'])
cnn.compile(optimizer='adam',loss='CategoricalCrossentropy',metrics=['accuracy'])
history = cnn.fit(train_x, train_y, validation_data=(test_x,test_y), epochs = 100)

### PLOTTING training_acc & testing_acc at each epoch

In [None]:
training_acc = history.history['accuracy']
test_acc = history.history['val_accuracy']
epoch_count = range(1, len(training_acc) + 1)
plt.plot(epoch_count, training_acc, 'r--')
plt.plot(epoch_count, test_acc, 'b-')
plt.legend(['Training', 'Test'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

In [None]:
testing = cnn.evaluate(test_x, test_y)
predictions = cnn.predict(test_x)
pred_y = np.argmax(predictions, axis = 1)
test_y = np.argmax(test_y, axis=1)
print(classification_report(test_y,pred_y))

In [None]:
cm = confusion_matrix(test_y, pred_y)
print(cm)

### OBSERVATIONS
| DATASET USED            | Genuine / Forg   | random_state | Split Ratio | EPOCHS | CNN-1 (ijerph) ACC%, LOSS                        | CNN-2 (jksu) ACC%, LOSS|
| ----------------------- |      :----:      | :----------: | :---------: | :----: | :-----------------------------------------:      | :-------------------: |
| D1_LINEAR_GAUSIAN_250px | G: 1321, F: 1321 |     42       | 80:20       |   50   | TrA: 100, TeA: 100, TrL: 0.0004, TeL: 0.00005    | TrA: 50, TeA: 49, TrL: 0.69, TeL: 6.146 |
| D2_LINEAR_GAUSIAN_250px | G: 887,  F: 734  |     42       | 80:20       |   50   | TrA: 99, TeA: 96, TrL: 0.02, TeL: 0.07           | --------------------------------------- |
| D2_LINEAR_GAUSIAN_250px | G: 887,  F: 734  |     42       | 80:20       |   100   | TrA: 99, TeA: 92, TrL: 0.006, TeL: 0.27         | --------------------------------------- |