<a href="https://colab.research.google.com/github/Shri6apr/1_Google-Advanced-Data-Analytics-Professional-Certificate/blob/main/signature_verification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install opencv-python-headless numpy tensorflow pillow sklearn matplotlib

import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from PIL import Image
import matplotlib.pyplot as plt
from datetime import datetime
from google.colab import drive


In [None]:
import tensorflow as tf
print(tf.__version__)

2.18.0


In [None]:
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
class Config:
    def __init__(self):
        self.DRIVE_PATH = '/content/drive/My Drive/data'
        self.AUTHOR = '021'
        self.TRAINING_FOLDER = os.path.join(self.DRIVE_PATH, 'training', self.AUTHOR)
        self.TEST_FOLDER = os.path.join(self.DRIVE_PATH, 'test', self.AUTHOR)
        self.INPUT_SHAPE = (128, 128, 1)
        self.BATCH_SIZE = 32
        self.EPOCHS = 50
        self.MAX_PAIRS_PER_CLASS = 1000

config = Config()

# Verify paths
print(f"Training folder: {config.TRAINING_FOLDER}")
print(f"Test folder: {config.TEST_FOLDER}")
print(f"Training folder exists: {os.path.exists(config.TRAINING_FOLDER)}")
print(f"Test folder exists: {os.path.exists(config.TEST_FOLDER)}")


Training folder: /content/drive/My Drive/data/training/021
Test folder: /content/drive/My Drive/data/test/021
Training folder exists: True
Test folder exists: True


In [None]:
def load_images_from_folder(folder):
    """Load images and labels from a folder"""
    images = []
    labels = []
    filenames = []

    if not os.path.exists(folder):
        print(f"Directory {folder} does not exist.")
        return images, labels, filenames

    for filename in os.listdir(folder):
        try:
            file_path = os.path.join(folder, filename)
            img = cv2.imread(file_path)
            if img is not None:
                images.append(img)
                # Add label based on filename
                label = 1 if 'genuine' in filename.lower() else 0
                labels.append(label)
                filenames.append(filename)
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")

    print(f"Total images loaded from {folder}: {len(images)}")
    return images, labels, filenames

class SignatureVerificationSystem:
    def __init__(self, config):
        self.config = config
        self.input_shape = config.INPUT_SHAPE
        self.model = self.build_efficient_siamese_model()

    def preprocess_image(self, img):
        """Preprocess a single image"""
        try:
            # Convert to grayscale if needed
            if len(img.shape) == 3:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Resize
            img = cv2.resize(img, (self.input_shape[0], self.input_shape[1]))

            # Apply CLAHE
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            img = clahe.apply(img.astype(np.uint8))

            # Normalize and add channel dimension
            img = (img / 255.0).astype(np.float32)
            img = np.expand_dims(img, axis=-1)

            return img

        except Exception as e:
            print(f"Error preprocessing image: {str(e)}")
            return None

    def preprocess_dataset(self, images):
        """Preprocess a batch of images"""
        processed_images = []
        for img in images:
            processed_img = self.preprocess_image(img)
            if processed_img is not None:
                processed_images.append(processed_img)

        return np.array(processed_images)

    def build_efficient_siamese_model(self):
        """Build the Siamese network with ResNet50V2 backbone"""
        # Initialize ResNet50V2
        base_model = ResNet50V2(
            include_top=False,
            weights='imagenet',
            input_shape=(self.input_shape[0], self.input_shape[1], 3),
            pooling='avg'
        )

        # Freeze early layers
        for layer in base_model.layers[:-30]:
            layer.trainable = False

        def create_embedding_network():
            input_layer = layers.Input(shape=self.input_shape)

            # Convert grayscale to 3 channels
            x = layers.Conv2D(3, (1, 1))(input_layer)

            # Pass through ResNet
            x = base_model(x)

            # Add modern layers
            x = layers.Dense(512, activation='selu')(x)
            x = layers.BatchNormalization()(x)
            x = layers.Dropout(0.3)(x)
            x = layers.Dense(256, activation='selu')(x)

            return Model(input_layer, x)

        embedding_network = create_embedding_network()

        # Create Siamese network
        input_a = layers.Input(shape=self.input_shape)
        input_b = layers.Input(shape=self.input_shape)

        embedding_a = embedding_network(input_a)
        embedding_b = embedding_network(input_b)

        # Cosine similarity
        cosine_similarity = layers.Dot(axes=1, normalize=True)([embedding_a, embedding_b])

        # Attention mechanism
        attention = layers.Concatenate()([embedding_a, embedding_b])
        attention = layers.Dense(64, activation='relu')(attention)
        attention = layers.Dense(1, activation='sigmoid')(attention)

        # Combine similarity and attention
        output = layers.Average()([cosine_similarity, attention])

        model = Model(inputs=[input_a, input_b], outputs=output)
        return model

    def create_pairs(self, images, labels):
        """Create balanced pairs for training"""
        genuine_indices = np.where(np.array(labels) == 1)[0]
        forged_indices = np.where(np.array(labels) == 0)[0]

        pairs = []
        pair_labels = []

        # Create positive pairs (genuine-genuine)
        for i in range(len(genuine_indices)):
            for j in range(i + 1, len(genuine_indices)):
                pairs.append([images[genuine_indices[i]], images[genuine_indices[j]]])
                pair_labels.append(1)
                if len(pairs) >= self.config.MAX_PAIRS_PER_CLASS:
                    break
            if len(pairs) >= self.config.MAX_PAIRS_PER_CLASS:
                break

        # Create negative pairs (genuine-forged)
        num_negative = min(len(pairs), len(forged_indices) * len(genuine_indices))
        for _ in range(num_negative):
            genuine_idx = np.random.choice(genuine_indices)
            forged_idx = np.random.choice(forged_indices)
            pairs.append([images[genuine_idx], images[forged_idx]])
            pair_labels.append(0)

        return np.array(pairs), np.array(pair_labels)

    def train(self, train_pairs, train_labels, validation_data=None):
        """Train the model with advanced techniques"""
        callbacks = [
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=3,
                min_lr=1e-6,
                verbose=1
            ),
            EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True
            )
        ]

        self.model.compile(
            optimizer=Adam(learning_rate=1e-3),
            loss='binary_crossentropy',
            metrics=['accuracy', tf.keras.metrics.AUC()]
        )

        history = self.model.fit(
            [train_pairs[:, 0], train_pairs[:, 1]],
            train_labels,
            validation_data=validation_data,
            epochs=self.config.EPOCHS,
            batch_size=self.config.BATCH_SIZE,
            callbacks=callbacks
        )

        return history


In [None]:
def main():
    # Initialize system
    system = SignatureVerificationSystem(config)

    # Load training and test images
    print("Loading training images...")
    training_images, training_labels, training_filenames = load_images_from_folder(config.TRAINING_FOLDER)
    print("Loading test images...")
    test_images, test_labels, test_filenames = load_images_from_folder(config.TEST_FOLDER)

    # Preprocess images
    print("Preprocessing training images...")
    training_data = system.preprocess_dataset(training_images)
    print("Preprocessing test images...")
    test_data = system.preprocess_dataset(test_images)

    # Create pairs
    print("Creating pairs...")
    train_pairs, train_pair_labels = system.create_pairs(training_data, training_labels)
    test_pairs, test_pair_labels = system.create_pairs(test_data, test_labels)

    print(f"Training pairs shape: {train_pairs.shape}")
    print(f"Test pairs shape: {test_pairs.shape}")

    # Train model
    print("Training model...")
    history = system.train(
        train_pairs,
        train_pair_labels,
        validation_data=([test_pairs[:, 0], test_pairs[:, 1]], test_pair_labels)
    )

    # Save model
    model_save_path = os.path.join(config.DRIVE_PATH, f'signature_model_{datetime.now().strftime("%Y%m%d_%H%M%S")}.h5')
    system.model.save(model_save_path)
    print(f"Model saved to: {model_save_path}")

    # Print final metrics
    test_loss, test_accuracy, test_auc = system.model.evaluate(
        [test_pairs[:, 0], test_pairs[:, 1]],
        test_pair_labels
    )
    print(f"\nFinal Test Metrics:")
    print(f"Loss: {test_loss:.4f}")
    print(f"Accuracy: {test_accuracy:.4f}")
    print(f"AUC: {test_auc:.4f}")


In [None]:
if __name__ == "__main__":
    main()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94668760/94668760[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Loading training images...
Total images loaded from /content/drive/My Drive/data/training/021: 24
Loading test images...
Total images loaded from /content/drive/My Drive/data/test/021: 24
Preprocessing training images...
Preprocessing test images...
Creating pairs...
Training pairs shape: (132, 2, 128, 128, 1)
Test pairs shape: (132, 2, 128, 128, 1)
Training model...
Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 13s/step - accuracy: 0.6303 - auc: 0.7229 - loss: 0.6060 - val_accuracy: 0.7045 - val_auc: 0.9521 - val_loss: 0.6414 - learning_rate: 0.0010
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 13s/step - accuracy: 0.7374 - auc: 0.9837 - loss: 0.4068 - val_accuracy: 0.8561 - val_auc: 0.9812 - val_loss: 0



Model saved to: /content/drive/My Drive/data/signature_model_20250125_150650.h5
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 4s/step - accuracy: 0.9906 - auc: 0.6598 - loss: 0.0555

Final Test Metrics:
Loss: 0.0851
Accuracy: 0.9848
AUC: 0.9884


NameError: name 'model' is not defined