In [3]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical

# Function to load images and labels from dataset directory
def load_data(dataset_dir):
    X, y = [], []
    for class_name in os.listdir(dataset_dir):
        class_dir = os.path.join(dataset_dir, class_name)
        if os.path.isdir(class_dir):
            for image_name in os.listdir(class_dir):
                image_path = os.path.join(class_dir, image_name)
                image = cv2.imread(image_path)
                image = cv2.resize(image, (128, 128))  # Resize images to a fixed size
                X.append(image)
                y.append(class_name)
    return np.array(X), np.array(y)

# Define CNN model architecture
def create_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, kernel_size=(3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def main():
    # Step 1: Load data
    dataset_dir = r'D:\AIML Internship\Task5_modified\cropped_signatures'
    X, y = load_data(dataset_dir)

    # Step 2: Encode class labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    num_classes = len(label_encoder.classes_)

    # Step 3: Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Step 4: Normalize pixel values to [0, 1]
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0

    # Step 5: Convert class labels to one-hot encoded vectors
    y_train = to_categorical(y_train, num_classes)
    y_test = to_categorical(y_test, num_classes)

    # Step 6: Define input shape
    input_shape = X_train[0].shape

    # Step 7: Create and train the model
    model = create_model(input_shape, num_classes)
    model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

    # Step 8: Evaluate the model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print("Test Accuracy:", test_accuracy)

    # Optionally, save the trained model for future use
    model.save("signature_verification_model.h5")

if __name__ == "__main__":
    main()


Epoch 1/10


  super().__init__(


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 205ms/step - accuracy: 0.3060 - loss: 2.2586 - val_accuracy: 0.6316 - val_loss: 1.3356
Epoch 2/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 183ms/step - accuracy: 0.6997 - loss: 1.0460 - val_accuracy: 0.9737 - val_loss: 0.2860
Epoch 3/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 207ms/step - accuracy: 0.8970 - loss: 0.3306 - val_accuracy: 0.9474 - val_loss: 0.0975
Epoch 4/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 243ms/step - accuracy: 0.9860 - loss: 0.0903 - val_accuracy: 0.9737 - val_loss: 0.2927
Epoch 5/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 228ms/step - accuracy: 0.9953 - loss: 0.0337 - val_accuracy: 1.0000 - val_loss: 0.0474
Epoch 6/10
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 205ms/step - accuracy: 0.9973 - loss: 0.0163 - val_accuracy: 0.9737 - val_loss: 0.0603
Epoch 7/10
[1m11/11[0m [32m━━━━━━━━━



Test Accuracy: 0.9684210419654846


In [5]:
from tensorflow.keras.models import load_model

# Load the model
model = load_model("signature_verification_model.h5")

# Recompile the model with the necessary configuration
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Now you can use the model for training or evaluation




In [9]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import load_model

def calculate_height_to_width_ratio(image):
    if image is None:
        return 0
    height, width, _ = image.shape
    return height / width

def divide_image_into_parts(image, num_parts):
    if image is None:
        return []
    height, width, _ = image.shape
    part_height = height // num_parts
    parts = []
    for i in range(num_parts):
        part = image[i * part_height: (i + 1) * part_height, :]
        parts.append(part)
    return parts

def resize_image(image, target_height, target_width):
    if image is None:
        return None
    return cv2.resize(image, (target_width, target_height))

def extract_signature_parameters(image_part):
    if image_part is None:
        return None
    # Placeholder function, replace with your implementation
    return "Angular"  # For demonstration, always returning "Angular"

def compare_signature_parts(part1, part2):
    if part1 is None or part2 is None:
        return False
    return np.array_equal(part1, part2)

def find_dimensions_of_letters(image):
    if image is None:
        return 0, 0
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    total_width = 0
    total_height = 0
    count = 0
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        total_width += w
        total_height += h
        count += 1
    if count == 0:
        return 0, 0
    avg_width = total_width / count
    avg_height = total_height / count
    return avg_width, avg_height

def test_signature(model_path, sample_images_folder, person_folder):
    # Step 1: Load the trained model
    model = load_model(model_path)

    # Step 2: Load sample images from the folder
    sample_image_paths = [os.path.join(sample_images_folder, filename) for filename in os.listdir(sample_images_folder)]

    # Step 3: Load person's signature images
    person_image_paths = [os.path.join(person_folder, filename) for filename in os.listdir(person_folder)]

    # Step 4: Iterate through each person's signature
    for person_image_path in person_image_paths:
        print("\nProcessing", person_image_path)
        person_image = cv2.imread(person_image_path)

        # Step 5: Calculate height-to-width ratio
        ratio = calculate_height_to_width_ratio(person_image)
        print("Height-to-Width Ratio:", ratio)

        # Step 6: Determine number of parts
        num_parts = 2  # Placeholder value, replace with your logic
        print("Number of Parts:", num_parts)

        # Step 7: Divide the image into parts
        person_parts = divide_image_into_parts(person_image, num_parts)

        # Step 8: Resize parts if necessary and find dimensions of letters
        resized_person_parts = []
        for part in person_parts:
            target_height, target_width = 100, 100  # Example target size, replace with your desired size
            resized_part = resize_image(part, target_height, target_width)
            resized_person_parts.append(resized_part)
            avg_width, avg_height = find_dimensions_of_letters(part)
            print("Average Width of Letters:", avg_width)
            print("Average Height of Letters:", avg_height)

        # Step 9: Extract signature parameters
        signature_parameters = [extract_signature_parameters(part) for part in resized_person_parts]
        print("Signature Parameters:", signature_parameters)

        # Step 10: Compare signature parts with sample images
        best_match_index = -1
        best_match_score = float('inf')
        for i, sample_image_path in enumerate(sample_image_paths):
            # Load sample image
            sample_image = cv2.imread(sample_image_path)

            # Divide sample image into parts
            sample_parts = divide_image_into_parts(sample_image, 2)  # Assuming 2 parts for comparison

            # Resize parts if necessary and find dimensions of letters
            resized_sample_parts = []
            for part in sample_parts:
                target_height, target_width = 100, 100  # Example target size, replace with your desired size
                resized_part = resize_image(part, target_height, target_width)
                resized_sample_parts.append(resized_part)
                avg_width, avg_height = find_dimensions_of_letters(part)
                print("Average Width of Letters:", avg_width)
                print("Average Height of Letters:", avg_height)

            # Compare signature parts
            score = sum(np.abs(np.subtract(part1, part2)).mean() for part1, part2 in zip(resized_person_parts, resized_sample_parts))
            print(f"Comparison Score with {os.path.basename(sample_image_path)}:", score)

            # Update best match
            if score < best_match_score:
                best_match_score = score
                best_match_index = i

        # Step 11: Print the best match result
        best_match_image = os.path.basename(sample_image_paths[best_match_index])
        print(f"\nBest Match: {best_match_image}")

# Example usage:
model_path = r"D:\AIML Internship\signature_verification_model.h5"
sample_images_folder = r"D:\AIML Internship\Task5_modified\cropped_signatures\Hussain\sample images"
person_folder = r"D:\AIML Internship\Task5_modified\cropped_signatures\Hussain"
test_signature(model_path, sample_images_folder, person_folder)





Processing D:\AIML Internship\Task5_modified\cropped_signatures\Hussain\sample images
Height-to-Width Ratio: 0
Number of Parts: 2
Signature Parameters: []
Average Width of Letters: 65.75
Average Height of Letters: 10.75
Average Width of Letters: 70.0
Average Height of Letters: 10.5
Comparison Score with signature_46.jpg: 0
Average Width of Letters: 114.0
Average Height of Letters: 19.5
Average Width of Letters: 76.66666666666667
Average Height of Letters: 16.333333333333332
Comparison Score with signature_59.jpg: 0

Processing D:\AIML Internship\Task5_modified\cropped_signatures\Hussain\signature_1.jpg
Height-to-Width Ratio: 0.35537190082644626
Number of Parts: 2
Average Width of Letters: 121.0
Average Height of Letters: 21.0
Average Width of Letters: 84.33333333333333
Average Height of Letters: 18.0
Signature Parameters: ['Angular', 'Angular']
Average Width of Letters: 65.75
Average Height of Letters: 10.75
Average Width of Letters: 70.0
Average Height of Letters: 10.5
Comparison Sco

In [None]:
#testing process

In [1]:
import os
import cv2

def preprocess_image(image_path, target_height, target_width):
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        return None
    
    # Resize the image to the target height and width
    resized_image = cv2.resize(image, (target_width, target_height))
    
    # Perform any additional preprocessing steps if needed
    
    return resized_image

def prepare_sample_images(sample_images_folder, target_height, target_width):
    sample_images = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        # Preprocess each sample image
        preprocessed_image = preprocess_image(image_path, target_height, target_width)
        if preprocessed_image is not None:
            sample_images.append(preprocessed_image)
    return sample_images

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
target_height = 100  # Define the target height for resizing
target_width = 200   # Define the target width for resizing
sample_images = prepare_sample_images(sample_images_folder, target_height, target_width)
print(f"Number of sample images prepared: {len(sample_images)}")


Number of sample images prepared: 2


In [5]:
import os
import cv2

def crop_signature(image, crop_area):
    # Crop the signature area from the image
    x, y, w, h = crop_area
    cropped_image = image[y:y+h, x:x+w]
    return cropped_image

def preprocess_sample_images(sample_images_folder, crop_areas, target_height, target_width):
    preprocessed_samples = []
    for filename, crop_area in zip(os.listdir(sample_images_folder), crop_areas):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue
        
        # Crop the signature area
        cropped_image = crop_signature(image, crop_area)
        
        # Resize the image to the target height and width
        if cropped_image.size != 0:
            resized_image = cv2.resize(cropped_image, (target_width, target_height))
            
            # Perform any additional preprocessing steps if needed
            
            # Add the preprocessed image to the list
            preprocessed_samples.append(resized_image)
        else:
            print(f"Empty image: {image_path}")
    
    return preprocessed_samples

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
crop_areas = [(100, 100, 200, 200), (150, 150, 250, 250), ...]  # Define crop areas for each sample image
target_height = 100  # Define the target height for resizing
target_width = 200   # Define the target width for resizing

preprocessed_samples = preprocess_sample_images(sample_images_folder, crop_areas, target_height, target_width)
print(f"Number of preprocessed sample images: {len(preprocessed_samples)}")


Empty image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images\121.jpg
Empty image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images\132.jpg
Number of preprocessed sample images: 0


In [8]:
import os
import cv2
from skimage.feature import hog
from skimage import exposure
import numpy as np

def preprocess_and_extract_features(sample_images_folder, crop_areas, target_height, target_width):
    preprocessed_samples = []
    features = []
    for filename, crop_area in zip(os.listdir(sample_images_folder), crop_areas):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue
        
        # Crop the signature area
        cropped_image = crop_signature(image, crop_area)
        
        # Check if the cropped image is empty
        if cropped_image.size == 0:
            print(f"Empty image after cropping: {image_path}")
            continue
        
        # Resize the image to the target height and width
        resized_image = cv2.resize(cropped_image, (target_width, target_height))
        
        # Extract HOG features from the resized image
        extracted_features = extract_hog_features(resized_image)
        
        # Add the preprocessed image and extracted features to the lists
        preprocessed_samples.append(resized_image)
        features.append(extracted_features)
    
    return preprocessed_samples, features


# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
crop_areas = [(100, 100, 200, 200), (150, 150, 250, 250), ...]  # Define crop areas for each sample image
target_height = 100  # Define the target height for resizing
target_width = 200   # Define the target width for resizing

preprocessed_samples, features = preprocess_and_extract_features(sample_images_folder, crop_areas, target_height, target_width)
print(f"Number of preprocessed sample images: {len(preprocessed_samples)}")
print(f"Number of extracted features: {len(features)}")


Empty image after cropping: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images\121.jpg
Empty image after cropping: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images\132.jpg
Number of preprocessed sample images: 0
Number of extracted features: 0


In [77]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Add features and label to the list
        features_dict = {
            "label": label,
            "stroke_features": stroke_features,
            "shape_features": shape_features,
            "angle_features": angle_features,
            "curvature_features": curvature_features
        }
        features_list.append(features_dict)
    
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"

features = extract_features_from_images(sample_images_folder)
print(f"Number of images processed: {len(features)}")

# Print features with labels
for feature_dict in features:
    label = feature_dict["label"]
    stroke_features = feature_dict["stroke_features"]
    shape_features = feature_dict["shape_features"]
    angle_features = feature_dict["angle_features"]
    curvature_features = feature_dict["curvature_features"]
    
    print(f"Label: {label}")
    print(f"Stroke Features: {stroke_features}")
    print(f"Shape Features: {shape_features}")
    print(f"Angle Features: {angle_features}")
    print(f"Curvature Features: {curvature_features}")




Number of images processed: 3
Label: 121
Stroke Features: [0.40614268 0.         0.         ... 0.04425043 0.11281691 0.19789395]
Shape Features: [2.2386363636363638, 16970.0, 592.970562338829]
Angle Features: []
Curvature Features: [0.3610460357463227, 0.2733306592640722, 0.28524028774387056, 0.22354762001348605]
Label: 132
Stroke Features: [0.18077538 0.         0.         ... 0.02412324 0.01271407 0.        ]
Shape Features: [4.407407407407407, 1372.0, 272.7695519924164]
Angle Features: []
Curvature Features: [0.16037005761779197, 0.15466054138795995, 0.16663398217033137, 0.4219698747315724, 0.1729485936257427]
Label: 153
Stroke Features: [0.33395118 0.06977021 0.01875551 ... 0.13697504 0.         0.        ]
Shape Features: [4.068965517241379, 1499.0, 273.5979790687561]
Angle Features: []
Curvature Features: [0.30036166864038766, 0.3025478311424836]


In [78]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Add features and label to the list
        features_dict = {
            "label": label,
            "stroke_features": stroke_features,
            "shape_features": shape_features,
            "angle_features": angle_features,
            "curvature_features": curvature_features
        }
        features_list.append(features_dict)
    
    return features_list

# Example usage:
sample_images_folder = rdataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"


features = extract_features_from_images(sample_images_folder)
print(f"Number of images processed: {len(features)}")

# Print features with labels
for feature_dict in features:
    label = feature_dict["label"]
    stroke_features = feature_dict["stroke_features"]
    shape_features = feature_dict["shape_features"]
    angle_features = feature_dict["angle_features"]
    curvature_features = feature_dict["curvature_features"]
    
    print(f"Label: {label}")
    print(f"Stroke Features: {stroke_features}")
    print(f"Shape Features: {shape_features}")
    print(f"Angle Features: {angle_features}")
    print(f"Curvature Features: {curvature_features}")




Number of images processed: 88
Label: 122
Stroke Features: [0.24647277 0.         0.19918008 ... 0.25015971 0.29002008 0.        ]
Shape Features: [2.0, 53.0, 33.31370830535889]
Angle Features: []
Curvature Features: [0.9759677134264297, 0.27538591252826544, 0.41993235156140035, 0.22954115633843172, 0.2900203585990963, 0.26528230349449095, 0.21901884606585334]
Label: 123
Stroke Features: [0.34430633 0.         0.         ... 0.32419013 0.         0.        ]
Shape Features: [4.785714285714286, 1577.5, 304.18376553058624]
Angle Features: []
Curvature Features: [0.16004908545017452, 0.25191524824621314, 0.3493482052137768]
Label: 124
Stroke Features: [0.42678198 0.08928572 0.42678198 ... 0.05959882 0.14135101 0.19990052]
Shape Features: [1.691358024691358, 10880.0, 432.0]
Angle Features: []
Curvature Features: [0.3521124130613778, 0.34662251029509666, 0.20105620675839506]
Label: 125
Stroke Features: [0.39019667 0.06032594 0.11446043 ... 0.22262799 0.         0.        ]
Shape Features: [

In [5]:
#Training Code:
import os
import cv2
import pandas as pd
import numpy as np
import math

def find_centroid(image):
    # Function to find centroid of an image
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary_image = cv2.threshold(gray_image, 127, 255, cv2.THRESH_BINARY)
    points = np.argwhere(binary_image == 0)
    mean_x = int(np.mean(points[:, 1]))
    mean_y = int(np.mean(points[:, 0]))
    return (mean_x, mean_y)

def find_slant_angle(centroid_left, centroid_right):
    # Function to find slant angle between two centroids
    delta_x = centroid_right[0] - centroid_left[0]
    delta_y = centroid_right[1] - centroid_left[1]
    angle_radians = math.atan2(delta_y, delta_x)
    angle_degrees = math.degrees(angle_radians)
    return angle_degrees

def divide_image_and_classify(image, num_parts, output_folder):
    # Function to divide image into parts and classify each part
    height, length = image.shape[:2]
    part_length = length // num_parts
    part_classes = []

    for i in range(num_parts):
        start_col = i * part_length
        end_col = (i + 1) * part_length

        cropped_image = image[:, start_col:end_col]

        part_filename = f"part_{i + 1}.png"
        part_path = os.path.join(output_folder, part_filename)
        cv2.imwrite(part_path, cropped_image)

        part_classification = classify_sign(cropped_image)
        print(f"{part_filename} - Classification: {part_classification}")

        part_classes.append(part_classification)

    overall_skill = "Good" if all(part_class == part_classes[0] for part_class in part_classes) else "Not Good"

    return part_classes, overall_skill

def classify_sign(image):
    # Function to classify the sign based on its characteristics
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresholded = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    aspect_ratios = [cv2.boundingRect(contour)[2] / cv2.boundingRect(contour)[3] for contour in contours]
    edges = cv2.Canny(image, 50, 150)
    contours_canny, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    aspect_ratios_canny = [cv2.boundingRect(contour)[2] / cv2.boundingRect(contour)[3] for contour in contours_canny]
    all_aspect_ratios = aspect_ratios + aspect_ratios_canny

    classification = classify_combined_aspect_ratios(all_aspect_ratios)

    return classification

def classify_combined_aspect_ratios(aspect_ratios):
    # Function to classify the sign based on combined aspect ratios
    round_threshold = 0.8
    angular_threshold = 1.2

    if all(ratio < round_threshold for ratio in aspect_ratios):
        return "Eyed"
    elif any(ratio > angular_threshold for ratio in aspect_ratios):
        return "Angular"
    else:
        return "Round"

def print_and_save_signature_dimensions(folder_path, output_excel_path, output_folder):
    # Function to process images in the given folder and save results
    dimensions_data = {
        'File Name': [],
        'Length': [],
        'Height': [],
        'Ratio (Length/Height)': [],
        'Integer Ratio': [],
        'Sign Classification': [],
        'Overall Skill': [],
        'Centroid X': [],
        'Centroid Y': [],
        'Slant Angle': []
    }

    for filename in os.listdir(folder_path):
        if filename.endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)

            if image is None:
                print(f"Error loading image: {image_path}")
                continue

            height, length = image.shape[:2]
            ratio = length / height
            int_ratio = int(round(ratio))

            output_subfolder = os.path.join(output_folder, os.path.splitext(filename)[0])
            os.makedirs(output_subfolder, exist_ok=True)

            divide_parts_folder = os.path.join(output_subfolder, "parts")
            os.makedirs(divide_parts_folder, exist_ok=True)

            part_classes, overall_skill = divide_image_and_classify(image, int_ratio, divide_parts_folder)

            image_classification = classify_sign(image)
            centroid = find_centroid(image)
            slant_angle = find_slant_angle(centroid, (length // 2, height // 2))

            print(f"{filename} - Length: {length}, Height: {height}, Ratio: {ratio:.2f}, Integer Ratio: {int_ratio}, Classification: {image_classification}, Overall Skill: {overall_skill}, Centroid: {centroid}, Slant Angle: {slant_angle}")

            dimensions_data['File Name'].append(filename)
            dimensions_data['Length'].append(length)
            dimensions_data['Height'].append(height)
            dimensions_data['Ratio (Length/Height)'].append(ratio)
            dimensions_data['Integer Ratio'].append(int_ratio)
            dimensions_data['Sign Classification'].append(image_classification)
            dimensions_data['Overall Skill'].append(overall_skill)
            dimensions_data['Centroid X'].append(centroid[0])
            dimensions_data['Centroid Y'].append(centroid[1])
            dimensions_data['Slant Angle'].append(slant_angle)

    df = pd.DataFrame(dimensions_data)
    df.to_excel(output_excel_path, index=False, engine='openpyxl')

# Paths for training data and output
training_folder = r'D:\AIML Internship\cropped_signatures\cropped_signatures\Kabeer'
output_folder_training = r'D:\AIML Internship\cropped_signatures\cropped_signatures\Kabeer\output'
output_excel_path_training = os.path.join(output_folder_training, 'training_results.xlsx')

# Perform training
print_and_save_signature_dimensions(training_folder, output_excel_path_training, output_folder_training)


part_1.png - Classification: Angular
part_2.png - Classification: Angular
1.jpg - Length: 117, Height: 69, Ratio: 1.70, Integer Ratio: 2, Classification: Angular, Overall Skill: Good, Centroid: (48, 30), Slant Angle: 21.80140948635181
part_1.png - Classification: Angular
part_2.png - Classification: Angular
10.jpg - Length: 110, Height: 64, Ratio: 1.72, Integer Ratio: 2, Classification: Angular, Overall Skill: Good, Centroid: (42, 27), Slant Angle: 21.037511025421818
part_1.png - Classification: Angular
part_2.png - Classification: Angular
11.jpg - Length: 120, Height: 67, Ratio: 1.79, Integer Ratio: 2, Classification: Angular, Overall Skill: Good, Centroid: (46, 27), Slant Angle: 23.19859051364819
part_1.png - Classification: Angular
part_2.png - Classification: Angular
12.jpg - Length: 118, Height: 64, Ratio: 1.84, Integer Ratio: 2, Classification: Angular, Overall Skill: Good, Centroid: (44, 30), Slant Angle: 7.594643368591445
part_1.png - Classification: Angular
13.jpg - Length: 12

In [82]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle



def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"

features = extract_features_from_images(sample_images_folder)



Image: 121.jpg
Features: {'label': '121', 'features': array([0.40614268, 0.        , 0.        , ..., 0.27333066, 0.28524029,
       0.22354762]), 'Length': 197, 'Height': 88, 'Ratio (Length/Height)': 2.2386363636363638, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 98, 'Centroid Y': 43, 'Slant Angle': 92.14785766601562}
Image: 132.jpg
Features: {'label': '132', 'features': array([0.18077538, 0.        , 0.        , ..., 0.16663398, 0.42196987,
       0.17294859]), 'Length': 127, 'Height': 60, 'Ratio (Length/Height)': 2.1166666666666667, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 55, 'Centroid Y': 22, 'Slant Angle': 87.39740753173828}
Image: 153.jpg
Features: {'label': '153', 'features': array([3.33951183e-01, 6.97702137e-02, 1.87555060e-02, ...,
       2.73597979e+02, 3.00361669e-01, 3.02547831e-01]), 'Length': 130, 'Height': 53, 'Ratio (Length/Height)': 2.452830188679245, 'Integer Ratio':

In [79]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Add features and label to the list
        features_dict = {
            "label": label,
            "stroke_features": stroke_features,
            "shape_features": shape_features,
            "angle_features": angle_features,
            "curvature_features": curvature_features
        }
        features_list.append(features_dict)
    
    return features_list

# Example usage:
sample_images_folder = rdataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"


features = extract_features_from_images(sample_images_folder)
print(f"Number of images processed: {len(features)}")

# Print features with labels
for feature_dict in features:
    label = feature_dict["label"]
    stroke_features = feature_dict["stroke_features"]
    shape_features = feature_dict["shape_features"]
    angle_features = feature_dict["angle_features"]
    curvature_features = feature_dict["curvature_features"]
    
    print(f"Label: {label}")
    print(f"Stroke Features: {stroke_features}")
    print(f"Shape Features: {shape_features}")
    print(f"Angle Features: {angle_features}")
    print(f"Curvature Features: {curvature_features}")




Number of images processed: 88
Label: 122
Stroke Features: [0.24647277 0.         0.19918008 ... 0.25015971 0.29002008 0.        ]
Shape Features: [2.0, 53.0, 33.31370830535889]
Angle Features: []
Curvature Features: [0.9759677134264297, 0.27538591252826544, 0.41993235156140035, 0.22954115633843172, 0.2900203585990963, 0.26528230349449095, 0.21901884606585334]
Label: 123
Stroke Features: [0.34430633 0.         0.         ... 0.32419013 0.         0.        ]
Shape Features: [4.785714285714286, 1577.5, 304.18376553058624]
Angle Features: []
Curvature Features: [0.16004908545017452, 0.25191524824621314, 0.3493482052137768]
Label: 124
Stroke Features: [0.42678198 0.08928572 0.42678198 ... 0.05959882 0.14135101 0.19990052]
Shape Features: [1.691358024691358, 10880.0, 432.0]
Angle Features: []
Curvature Features: [0.3521124130613778, 0.34662251029509666, 0.20105620675839506]
Label: 125
Stroke Features: [0.39019667 0.06032594 0.11446043 ... 0.22262799 0.         0.        ]
Shape Features: [

In [40]:
import os
import numpy as np

def extract_features_from_images(images_folder):
    features_list = []
    for filename in os.listdir(images_folder):
        image_path = os.path.join(images_folder, filename)
        # Read the image and extract features (replace this with your feature extraction method)
        features = np.random.rand(10)  # Dummy features for demonstration
        features_list.append((filename, features))
    return features_list

def compare_with_dataset(sample_features, dataset_features, threshold=0.5):
    matched_images = []
    for sample_filename, sample_feature in sample_features:
        for dataset_filename, dataset_feature in dataset_features:
            # Calculate distance between sample and dataset features (replace this with your distance metric)
            distance = np.linalg.norm(sample_feature - dataset_feature)
            # If the distance is below the threshold, consider it a match
            if distance < threshold:
                matched_images.append(dataset_filename)
                break  # Break out of the inner loop since we found a match for the current sample image
    return matched_images

# Paths to sample images and dataset images folders
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
dataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Extract features from sample images and dataset images
sample_features = extract_features_from_images(sample_images_folder)
dataset_features = extract_features_from_images(dataset_images_folder)

# Set a threshold for considering a match (adjust as needed)
threshold = 0.5

# Compare sample features with dataset features
matched_images = compare_with_dataset(sample_features, dataset_features, threshold)

# Print matched image names or numbers
print("Matched images from the dataset:")
for image_name in matched_images:
    print(image_name)

# Optionally, you can also visualize the matched images


Matched images from the dataset:


In [55]:
import os
import numpy as np

def extract_features_from_images(images_folder):
    features_list = []
    for filename in os.listdir(images_folder):
        image_path = os.path.join(images_folder, filename)
        # Read the image and extract features (replace this with your feature extraction method)
        features = np.random.rand(10)  # Dummy features for demonstration
        features_list.append((filename, features))
    return features_list

def compare_with_dataset(sample_features, dataset_features, threshold=0.5):
    matched_images = []
    for sample_filename, sample_feature in sample_features:
        for dataset_filename, dataset_feature in dataset_features:
            # Calculate distance between sample and dataset features (replace this with your distance metric)
            distance = np.linalg.norm(sample_feature - dataset_feature)
            print(f"Distance between {sample_filename} and {dataset_filename}: {distance}")
            # If the distance is below the threshold, consider it a match
            if distance < threshold:
                matched_images.append((sample_filename, dataset_filename))
                break  # Break out of the inner loop since we found a match for the current sample image
    return matched_images

# Paths to sample images and dataset images folders
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
dataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Extract features from sample images and dataset images
sample_features = extract_features_from_images(sample_images_folder)
dataset_features = extract_features_from_images(dataset_images_folder)

# Set a threshold for considering a match (adjust as needed)
threshold = 0.1  # Lower the threshold to increase the chances of finding matches

# Compare sample features with dataset features
matched_images = compare_with_dataset(sample_features, dataset_features, threshold)

# Print matched image names or numbers along with the corresponding sample image names
if matched_images:
    print("Matched images from the dataset:")
    for sample_image, dataset_image in matched_images:
        print(f"Sample Image: {sample_image}, Matched Dataset Image: {dataset_image}")
else:
    print("No matching images found in the dataset.")


Distance between 121.jpg and 122.jpg: 1.2447070170436367
Distance between 121.jpg and 123.jpg: 1.1098740516933254
Distance between 121.jpg and 124.jpg: 1.5207528448708652
Distance between 121.jpg and 125.jpg: 0.837242409107747
Distance between 121.jpg and 126.jpg: 1.5775923034156643
Distance between 121.jpg and 127.jpg: 1.3094638597271862
Distance between 121.jpg and 128.jpg: 1.2573828141559111
Distance between 121.jpg and 129.jpg: 1.2234025719136403
Distance between 121.jpg and 130.jpg: 1.3792095028204023
Distance between 121.jpg and 131.jpg: 1.311863722467605
Distance between 121.jpg and 133.jpg: 1.055495286605202
Distance between 121.jpg and 134.jpg: 1.819160154526251
Distance between 121.jpg and 135.jpg: 1.7233962629836368
Distance between 121.jpg and 136.jpg: 1.6264445339430624
Distance between 121.jpg and 137.jpg: 1.319501697354919
Distance between 121.jpg and 138.jpg: 1.397341046320521
Distance between 121.jpg and 139.jpg: 1.3980375792699746
Distance between 121.jpg and 140.jpg:

In [83]:
import os
import numpy as np

def extract_features_from_images(images_folder):
    features_list = []
    for filename in os.listdir(images_folder):
        image_path = os.path.join(images_folder, filename)
        # Read the image and extract features (replace this with your feature extraction method)
        features = np.random.rand(10)  # Dummy features for demonstration
        features_list.append((filename, features))
    return features_list

def compare_with_dataset(sample_features, dataset_features, threshold=0.5):
    matched_images = []
    for sample_filename, sample_feature in sample_features:
        matched_dataset_index = None
        for dataset_index, (dataset_filename, dataset_feature) in enumerate(dataset_features):
            # Calculate distance between sample and dataset features (replace this with your distance metric)
            distance = np.linalg.norm(sample_feature - dataset_feature)
            # If the distance is below the threshold, consider it a match
            if distance < threshold:
                matched_dataset_index = dataset_index
                print(f"Matched sample image: {sample_filename}, matched dataset image index: {matched_dataset_index}")
                break  # Break out of the inner loop since we found a match for the current sample image
        matched_images.append((sample_filename, matched_dataset_index))
    return matched_images

# Paths to sample images and dataset images folders
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
dataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Extract features from sample images and dataset images
sample_features = extract_features_from_images(sample_images_folder)
dataset_features = extract_features_from_images(dataset_images_folder)

# Set a threshold for considering a match (adjust as needed)
threshold = 0.1  # Lower the threshold to increase the chances of finding matches

# Compare sample features with dataset features
matched_images = compare_with_dataset(sample_features, dataset_features, threshold)

# Print matched image names or numbers along with the corresponding sample image names
if matched_images:
    print("Matched images from the dataset:")
    for sample_image, matched_dataset_index in matched_images:
        if matched_dataset_index is not None:
            print(f"Sample Image: {sample_image}, Matched Dataset Image Index: {matched_dataset_index}")
else:
    print("No matching images found in the dataset.")


Matched images from the dataset:


In [30]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer sample image"

features = extract_features_from_images(sample_images_folder)



Image: 1.jpg
Features: {'label': '1', 'features': array([0.04132208, 0.        , 0.0233753 , ..., 0.32553102, 0.81145465,
       0.36577128]), 'Length': 117, 'Height': 69, 'Ratio (Length/Height)': 1.6956521739130435, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 58, 'Centroid Y': 34, 'Slant Angle': 0}
Image: 56.jpg
Features: {'label': '56', 'features': array([0.02936837, 0.00273624, 0.0034611 , ..., 0.18845876, 0.35122746,
       0.35669672]), 'Length': 118, 'Height': 62, 'Ratio (Length/Height)': 1.903225806451613, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 79, 'Centroid Y': 15, 'Slant Angle': 89.0597915649414}
Number of images processed: 2


In [49]:
import os
import numpy as np

def extract_features_from_images(images_folder):
    features_list = []
    for filename in os.listdir(images_folder):
        image_path = os.path.join(images_folder, filename)
        # Read the image and extract features (replace this with your feature extraction method)
        features = np.random.rand(10)  # Dummy features for demonstration
        features_list.append((filename, features))
    return features_list

def compare_with_dataset(sample_features, dataset_features, threshold=0.5):
    matched_images = []
    for sample_filename, sample_feature in sample_features:
        matched_dataset_filenames = []  # List to store matched dataset filenames for the current sample image
        for dataset_filename, dataset_feature in dataset_features:
            # Calculate distance between sample and dataset features (replace this with your distance metric)
            distance = np.linalg.norm(sample_feature - dataset_feature)
            # If the distance is below the threshold, consider it a match
            if distance < threshold:
                matched_dataset_filenames.append(dataset_filename)
        matched_images.append((sample_filename, matched_dataset_filenames))  # Append tuple of sample image and matched dataset filenames
    return matched_images

# Paths to sample images and dataset images folders
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
dataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Extract features from sample images and dataset images
sample_features = extract_features_from_images(sample_images_folder)
dataset_features = extract_features_from_images(dataset_images_folder)

# Set a threshold for considering a match (adjust as needed)
threshold = 0.1  # Lower the threshold to increase the chances of finding matches

# Compare sample features with dataset features
matched_images = compare_with_dataset(sample_features, dataset_features, threshold)

# Print matched image names or numbers along with the corresponding sample image names
if matched_images:
    print("Matched images from the dataset:")
    for sample_image, matched_dataset_filenames in matched_images:
        if matched_dataset_filenames:
            print(f"Sample Image: {sample_image}, Matched Dataset Images: {', '.join(matched_dataset_filenames)}")
else:
    print("No matching images found in the dataset.")


Matched images from the dataset:


In [81]:
import os
import cv2
import numpy as np
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

features = extract_features_from_images(sample_images_folder)



Image: 122.jpg
Features: {'label': '122', 'features': array([0.24647277, 0.        , 0.19918008, ..., 0.29002036, 0.2652823 ,
       0.21901885]), 'Length': 169, 'Height': 56, 'Ratio (Length/Height)': 3.017857142857143, 'Integer Ratio': 3, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 85, 'Centroid Y': 27, 'Slant Angle': 91.28682708740234}
Image: 123.jpg
Features: {'label': '123', 'features': array([0.34430633, 0.        , 0.        , ..., 0.16004909, 0.25191525,
       0.34934821]), 'Length': 134, 'Height': 64, 'Ratio (Length/Height)': 2.09375, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 92, 'Centroid Y': 19, 'Slant Angle': 75.56608581542969}
Image: 124.jpg
Features: {'label': '124', 'features': array([0.42678198, 0.08928572, 0.42678198, ..., 0.35211241, 0.34662251,
       0.20105621]), 'Length': 137, 'Height': 81, 'Ratio (Length/Height)': 1.691358024691358, 'Integer Ratio': 1, 'Sign Classification': 'Angular'

In [86]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

features = extract_features_from_images(sample_images_folder)

# Convert features to DataFrame
df = pd.DataFrame(features)

# Save DataFrame to Excel file
excel_filename = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\dataset.xlsx"
df.to_excel(excel_filename, index=False)

print(f"Features saved to {excel_filename}")


Image: 122.jpg
Features: {'label': '122', 'features': array([0.24647277, 0.        , 0.19918008, ..., 0.29002036, 0.2652823 ,
       0.21901885]), 'Length': 169, 'Height': 56, 'Ratio (Length/Height)': 3.017857142857143, 'Integer Ratio': 3, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 85, 'Centroid Y': 27, 'Slant Angle': 91.28682708740234}
Image: 123.jpg
Features: {'label': '123', 'features': array([0.34430633, 0.        , 0.        , ..., 0.16004909, 0.25191525,
       0.34934821]), 'Length': 134, 'Height': 64, 'Ratio (Length/Height)': 2.09375, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 92, 'Centroid Y': 19, 'Slant Angle': 75.56608581542969}
Image: 124.jpg
Features: {'label': '124', 'features': array([0.42678198, 0.08928572, 0.42678198, ..., 0.35211241, 0.34662251,
       0.20105621]), 'Length': 137, 'Height': 81, 'Ratio (Length/Height)': 1.691358024691358, 'Integer Ratio': 1, 'Sign Classification': 'Angular'

In [88]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"

features = extract_features_from_images(sample_images_folder)

# Convert features to DataFrame
df = pd.DataFrame(features)

# Save DataFrame to Excel file
excel_filename = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample.xlsx"
df.to_excel(excel_filename, index=False)

print(f"Features saved to {excel_filename}")


Image: 121.jpg
Features: {'label': '121', 'features': array([0.40614268, 0.        , 0.        , ..., 0.27333066, 0.28524029,
       0.22354762]), 'Length': 197, 'Height': 88, 'Ratio (Length/Height)': 2.2386363636363638, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 98, 'Centroid Y': 43, 'Slant Angle': 92.14785766601562}
Image: 132.jpg
Features: {'label': '132', 'features': array([0.18077538, 0.        , 0.        , ..., 0.16663398, 0.42196987,
       0.17294859]), 'Length': 127, 'Height': 60, 'Ratio (Length/Height)': 2.1166666666666667, 'Integer Ratio': 2, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 55, 'Centroid Y': 22, 'Slant Angle': 87.39740753173828}
Image: 153.jpg
Features: {'label': '153', 'features': array([3.33951183e-01, 6.97702137e-02, 1.87555060e-02, ...,
       2.73597979e+02, 3.00361669e-01, 3.02547831e-01]), 'Length': 130, 'Height': 53, 'Ratio (Length/Height)': 2.452830188679245, 'Integer Ratio':

In [95]:
import pandas as pd
import numpy as np

# Load features from Excel files
sample_features_file =r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample.xlsx"
dataset_features_file = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\dataset.xlsx"

sample_df = pd.read_excel(sample_features_file)
dataset_df = pd.read_excel(dataset_features_file)

# Convert feature vectors to numpy arrays
sample_df['features'] = sample_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
dataset_df['features'] = dataset_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

# Define a function to calculate similarity between feature vectors
def calculate_similarity(feature_vector1, feature_vector2):
    # Here you can use any suitable similarity metric, such as Euclidean distance or cosine similarity
    return np.linalg.norm(feature_vector1 - feature_vector2)  # Euclidean distance

# Set a threshold for similarity
threshold = 0.5  # Adjust as needed

# Dictionary to store most similar dataset image for each sample image
most_similar_images = {}

# Iterate over sample images
for index, sample_row in sample_df.iterrows():
    sample_feature_vector = sample_row["features"]
    sample_label = sample_row["label"]
    most_similar_image = None
    min_similarity = float('inf')
    
    # Iterate over dataset images
    for index, dataset_row in dataset_df.iterrows():
        dataset_feature_vector = dataset_row["features"]
        dataset_label = dataset_row["label"]
        
        # Calculate similarity between sample and dataset feature vectors
        similarity = calculate_similarity(sample_feature_vector, dataset_feature_vector)
        
        # If similarity is below threshold and better than previous matches, update most similar image
        if similarity < threshold and similarity < min_similarity:
            most_similar_image = dataset_label
            min_similarity = similarity
    
    # Store the most similar dataset image for the current sample image
    if most_similar_image is not None:
        most_similar_images[sample_label] = most_similar_image

# Print most similar images for each sample image
print("Most similar images from the dataset for each sample image:")
for sample_image, similar_image in most_similar_images.items():
    print(f"Sample Image: {sample_image}, Most Similar Dataset Image: {similar_image}")


Most similar images from the dataset for each sample image:
Sample Image: 121, Most Similar Dataset Image: 136
Sample Image: 132, Most Similar Dataset Image: 203
Sample Image: 153, Most Similar Dataset Image: 208


  sample_df['features'] = sample_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
  dataset_df['features'] = dataset_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))


In [98]:
import os
import numpy as np

def extract_features_from_images(images_folder):
    features_list = []
    for filename in os.listdir(images_folder):
        image_path = os.path.join(images_folder, filename)
        # Read the image and extract features (replace this with your feature extraction method)
        features = np.random.rand(10)  # Dummy features for demonstration
        features_list.append((filename, features))
    return features_list

def compare_with_dataset(sample_features, dataset_features, threshold=0.5):
    matched_images = []
    for sample_filename, sample_feature in sample_features:
        matched_dataset_images = []
        for dataset_filename, dataset_feature in dataset_features:
            # Calculate distance between sample and dataset features (replace this with your distance metric)
            distance = np.linalg.norm(sample_feature - dataset_feature)
            # If the distance is below the threshold, consider it a match
            if distance < threshold:
                matched_dataset_images.append(dataset_filename)
        if matched_dataset_images:
            matched_images.append((sample_filename, matched_dataset_images))
    return matched_images

def test_signature_matching(sample_images_folder, dataset_images_folder, threshold=0.5):
    # Extract features from sample images and dataset images
    sample_features = extract_features_from_images(sample_images_folder)
    dataset_features = extract_features_from_images(dataset_images_folder)

    # Compare sample features with dataset features
    matched_images = compare_with_dataset(sample_features, dataset_features, threshold)

    # Print matched image names or numbers along with the corresponding sample image names
    if matched_images:
        print("Matched images from the dataset:")
        for sample_image, matched_dataset_images in matched_images:
            print(f"Sample Image: {sample_image}, Matched Dataset Images: {matched_dataset_images}")
    else:
        print("No matching images found in the dataset.")

# Paths to sample images and dataset images folders
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample images"
dataset_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Set a threshold for considering a match (adjust as needed)
threshold = 0.5

# Test the signature matching
test_signature_matching(sample_images_folder, dataset_images_folder, threshold)


No matching images found in the dataset.


In [101]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\kabeer sample image"

features = extract_features_from_images(sample_images_folder)

# Convert features to DataFrame
df = pd.DataFrame(features)

# Save DataFrame to Excel file
excel_filename = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\sample.xlsx"
df.to_excel(excel_filename, index=False)

print(f"Features saved to {excel_filename}")


Image: 1.jpg
Features: {'label': '1', 'features': array([0.04132208, 0.        , 0.0233753 , ..., 0.32553102, 0.81145465,
       0.36577128]), 'Length': 117, 'Height': 69, 'Ratio (Length/Height)': 1.6956521739130435, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 58, 'Centroid Y': 34, 'Slant Angle': 0}
Image: 56.jpg
Features: {'label': '56', 'features': array([0.02936837, 0.00273624, 0.0034611 , ..., 0.18845876, 0.35122746,
       0.35669672]), 'Length': 118, 'Height': 62, 'Ratio (Length/Height)': 1.903225806451613, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 79, 'Centroid Y': 15, 'Slant Angle': 89.0597915649414}
Number of images processed: 2
Features saved to D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\sample.xlsx


In [104]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

def extract_stroke_features(image):
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    return features

def extract_shape_features(image):
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    shape_features = []
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        area = cv2.contourArea(contour)
        perimeter = cv2.arcLength(contour, True)
        shape_features = [aspect_ratio, area, perimeter]
    return shape_features

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle


def extract_angle_features(image):
    # Calculate angle features (e.g., angles between strokes)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=100, maxLineGap=10)
    angle_features = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            angle = np.arctan2(y2 - y1, x2 - x1) * 180 / np.pi
            angle_features.append(angle)
    return angle_features

def extract_curvature_features(image):
    # Calculate curvature features
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    curvature_features = []
    if contours:
        for contour in contours:
            M = cv2.moments(contour)
            if M["m00"] != 0:
                cx = int(M["m10"] / M["m00"])
                cy = int(M["m01"] / M["m00"])
                cX, cY = cx, cy
                points = contour[:, 0]
                prev_point = points[0]
                total_distance = 0
                total_curvature = 0
                num_points = 0
                for point in points[1:]:
                    distance = np.linalg.norm(point - prev_point)
                    total_distance += distance
                    angle = np.arctan2(point[1] - prev_point[1], point[0] - prev_point[0])
                    curvature = np.abs(angle - prev_angle) / distance if num_points > 0 else 0
                    total_curvature += curvature
                    prev_point = point
                    prev_angle = angle
                    num_points += 1
                if num_points > 0:
                    mean_curvature = total_curvature / total_distance
                    curvature_features.append(mean_curvature)
    return curvature_features

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"


def extract_additional_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def extract_features_from_images(sample_images_folder):
    features_list = []
    for filename in os.listdir(sample_images_folder):
        image_path = os.path.join(sample_images_folder, filename)
        
        # Extract label from filename
        label = os.path.splitext(filename)[0]
        
        # Read the image
        image = cv2.imread(image_path)
        if image is None:
            print(f"Unable to read image: {image_path}")
            continue  # Skip this image if loading fails
        
        # Extract additional features
        additional_features = extract_additional_features(image)
        
        # Extract stroke features
        stroke_features = extract_stroke_features(image)
        
        # Extract shape features
        shape_features = extract_shape_features(image)
        
        # Extract angle features
        angle_features = extract_angle_features(image)
        
        # Extract curvature features
        curvature_features = extract_curvature_features(image)
        
        # Combine all features into a single feature vector
        all_features = np.concatenate((stroke_features, shape_features, angle_features, curvature_features))
        
        # Add features, label, and additional features to the list
        features_dict = {
            "label": label,
            "features": all_features,
            **additional_features
        }
        features_list.append(features_dict)
        
        # Print feature values
        print(f"Image: {filename}")
        print(f"Features: {features_dict}")
    
    print(f"Number of images processed: {len(features_list)}")
    return features_list

# Example usage:
sample_images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\Kabeer"

features = extract_features_from_images(sample_images_folder)

# Convert features to DataFrame
df = pd.DataFrame(features)

# Save DataFrame to Excel file
excel_filename = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\dataset.xlsx"
df.to_excel(excel_filename, index=False)

print(f"Features saved to {excel_filename}")


Image: 10.jpg
Features: {'label': '10', 'features': array([0.00934109, 0.00094517, 0.00750182, ..., 0.81145465, 0.32008575,
       0.25548501]), 'Length': 110, 'Height': 64, 'Ratio (Length/Height)': 1.71875, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 64, 'Centroid Y': 46, 'Slant Angle': 81.65257263183594}
Image: 11.jpg
Features: {'label': '11', 'features': array([0.15963098, 0.03072642, 0.08532324, ..., 0.30225756, 0.37035011,
       0.39789038]), 'Length': 120, 'Height': 67, 'Ratio (Length/Height)': 1.791044776119403, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill': 'Good', 'Centroid X': 71, 'Centroid Y': 35, 'Slant Angle': 95.05685424804688}
Image: 12.jpg
Features: {'label': '12', 'features': array([0.0278378 , 0.        , 0.00145543, ..., 0.13377541, 0.40885998,
       0.16950834]), 'Length': 118, 'Height': 64, 'Ratio (Length/Height)': 1.84375, 'Integer Ratio': 1, 'Sign Classification': 'Angular', 'Overall Skill

In [106]:
import pandas as pd
import numpy as np

# Load features from Excel files
sample_features_file =r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\sample.xlsx"
dataset_features_file = r"D:\AIML Internship\cropped_signatures\cropped_signatures\kabeer main\dataset.xlsx"

sample_df = pd.read_excel(sample_features_file)
dataset_df = pd.read_excel(dataset_features_file)

# Convert feature vectors to numpy arrays
sample_df['features'] = sample_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
dataset_df['features'] = dataset_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))

# Define a function to calculate similarity between feature vectors
def calculate_similarity(feature_vector1, feature_vector2):
    # Here you can use any suitable similarity metric, such as Euclidean distance or cosine similarity
    return np.linalg.norm(feature_vector1 - feature_vector2)  # Euclidean distance

# Set a threshold for similarity
threshold = 0.5  # Adjust as needed

# Dictionary to store most similar dataset image for each sample image
most_similar_images = {}

# Iterate over sample images
for index, sample_row in sample_df.iterrows():
    sample_feature_vector = sample_row["features"]
    sample_label = sample_row["label"]
    most_similar_image = None
    min_similarity = float('inf')
    
    # Iterate over dataset images
    for index, dataset_row in dataset_df.iterrows():
        dataset_feature_vector = dataset_row["features"]
        dataset_label = dataset_row["label"]
        
        # Calculate similarity between sample and dataset feature vectors
        similarity = calculate_similarity(sample_feature_vector, dataset_feature_vector)
        
        # If similarity is below threshold and better than previous matches, update most similar image
        if similarity < threshold and similarity < min_similarity:
            most_similar_image = dataset_label
            min_similarity = similarity
    
    # Store the most similar dataset image for the current sample image
    if most_similar_image is not None:
        most_similar_images[sample_label] = most_similar_image

# Print most similar images for each sample image
print("Most similar images from the dataset for each sample image:")
for sample_image, similar_image in most_similar_images.items():
    print(f"Sample Image: {sample_image}, Most Similar Dataset Image: {similar_image}")


Most similar images from the dataset for each sample image:
Sample Image: 1, Most Similar Dataset Image: 71
Sample Image: 56, Most Similar Dataset Image: 12


  sample_df['features'] = sample_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))
  dataset_df['features'] = dataset_df['features'].apply(lambda x: np.fromstring(x[1:-1], sep=' '))


In [None]:
 # Define input and output folders
    signs_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"
    output_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Parts folder"
    csv_file =r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\dataset.xlsx"


In [7]:
import os
import cv2
import numpy as np
from skimage.feature import hog  # Import the hog function

# Define the extract_features function with the updated code
def extract_features(images):
    features = []
    for filename, image in images:
        # Calculate image features
        height, length, _ = image.shape
        ratio = length / height
        integer_ratio = int(ratio)
        # Classify the sign
        sign_classification = classify_sign(image)
        # Assess overall skill
        overall_skill = assess_skill(image)
        
        # Calculate centroid coordinates for each partitioned image
        centroid_x_list, centroid_y_list = [], []
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours:
            for part_index, contour in enumerate(contours):
                M = cv2.moments(contour)
                if M["m00"] != 0:
                    centroid_x = int(M["m10"] / M["m00"])
                    centroid_y = int(M["m01"] / M["m00"])
                    centroid_x_list.append(centroid_x)
                    centroid_y_list.append(centroid_y)
                else:
                    centroid_x_list.append(-1)
                    centroid_y_list.append(-1)
        else:
            centroid_x_list.append(-1)
            centroid_y_list.append(-1)

        # Calculate slant angle
        slant_angle = calculate_slant_angle(image)
        
        # Append features to the list
        features.append({
            'File Name': filename,
            'Length': length,
            'Height': height,
            'Ratio (Length/Height)': ratio,
            'Integer Ratio': integer_ratio,
            'Sign Classification': sign_classification,
            'Overall Skill': overall_skill,
            'Centroid X': centroid_x_list,
            'Centroid Y': centroid_y_list,
            'Slant Angle': slant_angle
        })
    return features


# Function to classify the sign
def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

# Function to assess the overall skill of the signature
def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"

# Function to calculate slant angle
def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle

import pandas as pd

# Define a function to save the extracted features to an Excel file
def save_features_to_excel(features, excel_file):
    df = pd.DataFrame(features)
    df.to_excel(excel_file, index=False)

# Call the function to save the features to an Excel file
save_features_to_excel(features, 'extracted_features.xlsx')


# Main function
def main():
    # Define input and output folders
    signs_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"
    output_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Parts folder"
    csv_file =r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\dataset.xlsx"

    # Load and preprocess images
    images = load_and_preprocess_images(signs_folder)

    # Divide images into parts based on ratio
    divide_images_into_parts(images, output_folder)

    # Extract features from each image
    features = extract_features(images)

    # Save extracted features to a CSV file
    save_features_to_csv(features, csv_file)

if __name__ == "__main__":
    main()


NameError: name 'features' is not defined

In [10]:
import os
import cv2
import numpy as np
import pandas as pd

def partition_and_save(image, num_parts, output_folder):
    height, width, _ = image.shape
    part_width = width // num_parts
    
    for i in range(num_parts):
        part_image = image[:, i*part_width:(i+1)*part_width, :]
        cv2.imwrite(os.path.join(output_folder, f"part_{i+1}.png"), part_image)

def extract_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"

def calculate_slant_angle(image):
    # Replace this with your method for calculating the slant angle
    # For demonstration purposes, let's return a default angle of 0
    return 0

# Define the directory containing the images
images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Initialize an empty list to store the features of all images
features_list = []

# Iterate over all images in the folder
for filename in os.listdir(images_folder):
    image_path = os.path.join(images_folder, filename)
    
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        continue  # Skip this image if loading fails
    
    # Partition the image based on its ratio and save the parts to folders
    ratio = image.shape[1] / image.shape[0]
    if ratio > 4:
        num_parts = 4
    elif ratio > 3:
        num_parts = 3
    elif ratio > 2:
        num_parts = 2
    else:
        num_parts = 1
    
    output_folder = os.path.join(images_folder, f"{num_parts}_parts", filename.split('.')[0])
    os.makedirs(output_folder, exist_ok=True)
    
    partition_and_save(image, num_parts, output_folder)
    
    # Extract features from the image
    image_features = extract_features(image)
    
    # Add the features to the list
    features_list.append(image_features)

# Create a DataFrame from the list of features
df = pd.DataFrame(features_list)

# Save the DataFrame to an Excel file
df.to_excel('extracted_features.xlsx', index=False)


In [32]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog

def partition_and_save(image, num_parts, output_folder):
    height, width, _ = image.shape
    part_width = width // num_parts
    
    for i in range(num_parts):
        part_image = image[:, i*part_width:(i+1)*part_width, :]
        part_folder = os.path.join(output_folder, f"part_{i+1}")
        os.makedirs(part_folder, exist_ok=True)
        cv2.imwrite(os.path.join(part_folder, f"{i+1}.png"), part_image)

def extract_features(image):
    # Calculate length, height, ratio, integer ratio
    height, length, _ = image.shape
    ratio = length / height
    integer_ratio = int(ratio)
    
    # Classify the sign
    sign_classification = classify_sign(image)
    
    # Assess overall skill
    overall_skill = assess_skill(image)
    
    # Calculate centroid coordinates
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        contour = max(contours, key=cv2.contourArea)
        M = cv2.moments(contour)
        if M["m00"] != 0:
            centroid_x = int(M["m10"] / M["m00"])
            centroid_y = int(M["m01"] / M["m00"])
        else:
            centroid_x, centroid_y = -1, -1
    else:
        centroid_x, centroid_y = -1, -1
    
    # Calculate slant angle
    slant_angle = calculate_slant_angle(image)
    
    return {
        'File Name': filename,
        'Length': length,
        'Height': height,
        'Ratio (Length/Height)': ratio,
        'Integer Ratio': integer_ratio,
        'Sign Classification': sign_classification,
        'Overall Skill': overall_skill,
        'Centroid X': centroid_x,
        'Centroid Y': centroid_y,
        'Slant Angle': slant_angle
    }

def classify_sign(image):
    # Replace this with your own sign classification method
    # Example: Determine if the sign is angular, circular, or something else
    # You may use machine learning models, rules-based systems, etc.
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate stroke features using HOG
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    features, _ = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                      cells_per_block=(2, 2), visualize=True, block_norm='L2-Hys')
    
    # Simple rule-based classification
    if len(features) < 1000:
        return "Circular"
    elif "eyed" in filename.lower():  # Check if filename contains "eyed"
        return "Eyed"
    else:
        return "Angular"

def assess_skill(image):
    # Replace this with your own method for assessing the overall skill of the signature
    # Example: Use machine learning models or rules-based systems to evaluate the signature's quality
    # For demonstration purposes, let's assume a simple rule-based system
    
    # Calculate shape features (e.g., aspect ratio, area, perimeter)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour = contours[0] if contours else None
    
    # Simple rule-based assessment
    if contour is not None:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if aspect_ratio > 0.5:
            return "Good"
        else:
            return "Poor"
    else:
        return "Poor"

def calculate_slant_angle(image):
    # Convert image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Threshold the image to obtain a binary image
    _, binary = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
    
    # Find contours in the binary image
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Find the largest contour (signature)
    largest_contour = max(contours, key=cv2.contourArea)
    
    # Check if the contour contains at least 5 points
    if len(largest_contour) < 5:
        # If the contour has too few points, return a default angle (0 degrees)
        return 0
    
    # Perform principal component analysis (PCA) on the largest contour
    _, _, angle = cv2.fitEllipse(largest_contour)
    
    # The angle returned by fitEllipse is the angle of rotation of the fitted ellipse
    # We use this as an estimate of the slant angle of the signature
    return angle

# Define the directory containing the images
images_folder = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain"

# Initialize an empty list to store the features of all images
features_list = []

# Iterate over all images in the folder
for filename in os.listdir(images_folder):
    image_path = os.path.join(images_folder, filename)
    
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Unable to read image: {image_path}")
        continue  # Skip this image if loading fails
    
    # Partition the image based on its aspect ratio and save the parts to folders
    ratio = image.shape[1] / image.shape[0]
    if ratio > 4:
        num_parts = 4
    elif ratio > 3:
        num_parts = 3
    elif ratio > 2:
        num_parts = 2
    else:
        num_parts = 1
    
    output_folder = os.path.join(images_folder, f"{num_parts}_parts", filename.split('.')[0])
    os.makedirs(output_folder, exist_ok=True)
    
    partition_and_save(image, num_parts, output_folder)
    
    # Extract features from the image
    image_features = extract_features(image)
    
    # Add the image name to the features dictionary
    image_features['File Name'] = filename
    
    # Append the features to the list
    features_list.append(image_features)




Unable to read image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain\1_parts
Unable to read image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain\2_parts
Unable to read image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain\3_parts
Unable to read image: D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\Hussain\4_parts


In [33]:
# Convert the features list to a DataFrame
features_df = pd.DataFrame(features_list)

# Define the output Excel file
output_excel = r"D:\AIML Internship\cropped_signatures\cropped_signatures\Hussain\sample.xlsx"

# Save the DataFrame to an Excel file
features_df.to_excel(output_excel, index=False)