## Preprocessing pipeline
It includes the following steps
1. Resizing to max width of 600
2. Applying median blur
3. Grayscale conversion
4. Fast Non Local Mean denoising
5. Image segmentation
6. Image padding
7. Final Resizing
8. Negative

In [1]:
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np

In [2]:
print(os.getcwd()) 
os.chdir('../dataset/')
print(os.getcwd()) 


d:\Projects\signature_verification\Notebooks
d:\Projects\signature_verification\dataset


### 1. Resizing to max width of 600

In [3]:
def resize_max_length(image, max_length=600):
    height, width = image.shape[:2]
    longest_side = max(height, width)

    # If already smaller than or equal to max_length, return as is
    if longest_side <= max_length:
        return image

    # Calculate scale factor and new dimensions
    scale = max_length / longest_side
    new_width = int(width * scale)
    new_height = int(height * scale)

    resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
    return resized

### 2. Grayscale conversion

In [4]:
def grayscale_conversion(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return gray

### 3. Fast Non Local Mean denoising

In [16]:
def FNLM_denoising(image):
    denoised = cv2.fastNlMeansDenoising(
        src=image,              # input image (must be grayscale)
        h=10,                  # filter strength (recommended: 3-10)
        templateWindowSize=10, # size of the template patch
        searchWindowSize=21    # size of the window to search for similar patches
    )
    return denoised

### 4. Image segmentation

In [6]:
def image_segmentation(gray_image):
    threshold_value = int(np.mean(gray_image))
    _, segmented = cv2.threshold(gray_image, threshold_value, 255, cv2.THRESH_BINARY)
    return segmented

### 5. Image padding

In [7]:
def pad_to_square(image):
    height, width = image.shape[:2]

    # Initialize padding
    leftPad = rightPad = topPad = bottomPad = 0

    # Determine padding needed
    if width < height:
        pad = (height - width) // 2
        leftPad = pad
        rightPad = height - width - pad  # to handle odd differences
    else:
        pad = (width - height) // 2
        topPad = pad
        bottomPad = width - height - pad

    # Apply padding (with white background = 255)
    color = [255, 255, 255] if len(image.shape) == 3 else 255
    padded_image = cv2.copyMakeBorder(
        image,
        top=topPad,
        bottom=bottomPad,
        left=leftPad,
        right=rightPad,
        borderType=cv2.BORDER_CONSTANT,
        value=color
    )

    return padded_image

### 6. Final resizing

In [8]:
def final_resize(image, target_size=(224, 224)):
    resized = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
    return resized

### 7. Performing negative

In [9]:
def negative_image(image):
    negative = 255 - image
    return negative

## final preprocessing module

In [10]:
def preprocessing_block(image):
    resized = resize_max_length(image)    
    gray = grayscale_conversion(resized)
    denoised = FNLM_denoising(gray)
    segmented = image_segmentation(denoised)
    padded_image = pad_to_square(segmented)
    final_resized_img = final_resize(padded_image)
    negative = 255 - final_resized_img
    return negative

In [20]:
print(os.getcwd())
source_path = os.getcwd() + '/Genuine/'
destination_path = '../Preprocessed/Genuine/'
print(os.listdir(source_path))



print(source_path)
for i in range(1,11):
    files = os.listdir(os.path.join(source_path,str(i)))
    files.sort()  # Sort files to maintain order
    for file in files:
        read_img = cv2.imread(os.path.join(source_path,str(i),file))
        preprocessed_image = preprocessing_block(read_img)
        cv2.imwrite(destination_path + str(i) + '/' + file.split('.')[0] + '_pp.tif', preprocessed_image)
         
 
    

d:\Projects\signature_verification\dataset
['1', '10', '2', '3', '4', '5', '6', '7', '8', '9']
d:\Projects\signature_verification\dataset/Genuine/


## Dataset splitting

In [25]:
print(os.getcwd())
for i in range(1,11):
    for file in ['test', 'train', 'valid']:
        os.mkdir('../Final_dataset/' + str(i) + f"/{file}/" + 'Forgery')
        os.mkdir('../Final_dataset/' + str(i) + f"/{file}/" + 'Genuine')
        


d:\Projects\signature_verification\dataset


In [27]:
src_path = os.path.join(os.getcwd(), 'Genuine')
print(src_path)
print(os.getcwd())

d:\Projects\signature_verification\dataset\Genuine
d:\Projects\signature_verification\dataset


In [37]:
import os
import shutil
import random

Converting the preprocessed dataset in following format
```
Split_dataset/
├── train/
│   ├── Genuine/
│   └── Forgery/
├── valid/
│   ├── Genuine/
│   └── Forgery/
└── test/
    ├── Genuine/
    └── Forgery/
```

In [42]:
split = (0.7, 0.2, 0.1)  # train, valid, test split ratios
categories = ["Genuine", "Forgery"]
for category in categories:
    for i in range(2,11):
        src_path = os.getcwd()+ '/' + category + f'/{str(i)}/'
        dest_dir = '../Final_dataset/'+ str(i)  # Adjust as needed for your dataset structure
        files = sorted(os.listdir(src_path))  # Sort for consistency
        random.shuffle(files) 

        total = len(files)
        train_end = int(split[0] * total)
        valid_end = train_end + int(split[1] * total)
        
        splits = {
                "train": files[:train_end],
                "valid": files[train_end:valid_end],
                "test":  files[valid_end:]
            }
        
        for split_name, file_list in splits.items():
            dst_folder = os.path.join(dest_dir, split_name, category)
            os.makedirs(dst_folder, exist_ok=True)

            for file in file_list:
                src_file = os.path.join(src_path, file)
                dst_file = os.path.join(dst_folder, file)
                shutil.copy2(src_file, dst_file)
