In [112]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [113]:
IMAGE_PATH = 'Dataset'
IMAGE_FOR_MATCHING_PATH = 'DatasetForMatching'

In [114]:
def preprocess_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.GaussianBlur(img, (5, 5), 0)
    img = cv2.equalizeHist(img)
    return img

In [115]:
def compute_match_percentage(img1, img2, feature_detector):
    """Compute the match percentage between two images using keypoints and RANSAC."""
    
    # Detect keypoints and compute descriptors for both images
    keypoints1, descriptors1 = feature_detector.detectAndCompute(img1, None)
    keypoints2, descriptors2 = feature_detector.detectAndCompute(img2, None)

    if isinstance(feature_detector, cv2.ORB):  # ORB uses binary descriptors (Hamming distance)
        bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
        matches = bf.match(descriptors1, descriptors2)
    else:
        # For SIFT or other detectors, use FLANN (descriptors are float32)
        index_params = dict(algorithm=1, trees=5)
        search_params = dict(checks=50)
        flann = cv2.FlannBasedMatcher(index_params, search_params)
        matches = flann.knnMatch(descriptors1, descriptors2, k=2)

    # Lowe's ratio test (only for FLANN-based matching)
    if not isinstance(feature_detector, cv2.ORB):
        good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]
    else:
        good_matches = matches  # No need for ratio test in BFMatcher with crossCheck=True

    # Estimate homography using RANSAC if there are enough good matches
    valid_matches = 0
    if len(good_matches) >= 4:
        src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
        dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
        _, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        if mask is not None:
            valid_matches = sum(mask.ravel())

    total_keypoints = len(keypoints1)
    match_percentage = (valid_matches / total_keypoints * 100) if total_keypoints > 0 else 0
    return len(keypoints1), valid_matches, match_percentage


In [116]:
def generate_dataset(dataset_path, matching_images_path):
    data = []
    matching_images = {}
    for f in os.listdir(matching_images_path):
        if f.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(matching_images_path, f)
            preprocessed_image = preprocess_image(img_path)
            matching_images[os.path.basename(f)] = preprocessed_image

    for root, _, files in os.walk(dataset_path):
        nominal_label = os.path.basename(root)
        print(f"Processing {nominal_label}")
        
        for file in tqdm(files):
            if file.endswith(('.png', '.jpg', '.jpeg')):

                img_path = os.path.join(root, file)
                img_preprocessed = preprocess_image(img_path)
                img = cv2.imread(img_path)

                # Collect image metadata
                img_name = os.path.basename(file)
                height, width = img.shape[:2]
                total_pixels = height * width
                avg_color_R, avg_color_G, avg_color_B = img.mean(axis=(0, 1)).tolist()

                # Match with each image in DatasetForMatching
                sift_match_results = {}
                for match_name, match_img in matching_images.items():
                    _, _, match_percentage = compute_match_percentage(img_preprocessed, match_img, feature_detector = cv2.SIFT_create())
                    sift_match_results[f"sift_match_percentage_with_{match_name}"] = match_percentage

                orb_match_results = {}
                for match_name, match_img in matching_images.items():
                    _, _, match_percentage = compute_match_percentage(img_preprocessed, match_img, feature_detector = cv2.ORB_create())
                    orb_match_results[f"orb_match_percentage_with_{match_name}"] = match_percentage

                # Append data
                row = {
                    "image_name": img_name,
                    "nominal_label": nominal_label,
                    "total_pixels": total_pixels,
                    "avg_color_R": avg_color_R,
                    "avg_color_G": avg_color_G,
                    "avg_color_B": avg_color_B,
                    **sift_match_results,
                    **orb_match_results
                }
                data.append(row)

    # Save as a DataFrame
    df = pd.DataFrame(data)
    df.to_csv("rupiah_classification.csv", index=False)
    print("Dataset saved as rupiah_classification.csv")

In [117]:
# Paths to the dataset folders
generate_dataset(IMAGE_PATH, IMAGE_FOR_MATCHING_PATH)

Processing Dataset


0it [00:00, ?it/s]


Processing 1000


100%|██████████| 128/128 [1:08:22<00:00, 32.05s/it]


Processing 10000


100%|██████████| 128/128 [13:53<00:00,  6.51s/it]


Processing 100000


100%|██████████| 128/128 [1:22:22<00:00, 38.62s/it]


Processing 2000


100%|██████████| 128/128 [1:07:28<00:00, 31.63s/it]


Processing 20000


100%|██████████| 128/128 [1:09:28<00:00, 32.57s/it]


Processing 5000


100%|██████████| 128/128 [58:50<00:00, 27.58s/it] 


Processing 50000


100%|██████████| 128/128 [08:18<00:00,  3.90s/it]

Dataset saved as rupiah_classification.csv





In [118]:
rp_classification_df = pd.read_csv("rupiah_classification.csv")
rp_classification_df.head(5)

Unnamed: 0,image_name,nominal_label,total_pixels,avg_color_R,avg_color_G,avg_color_B,sift_match_percentage_with_belakang_100k.jpg,sift_match_percentage_with_belakang_10k.jpg,sift_match_percentage_with_belakang_1k.jpg,sift_match_percentage_with_belakang_20k.jpg,...,orb_match_percentage_with_belakang_2k.jpg,orb_match_percentage_with_belakang_50k.jpg,orb_match_percentage_with_belakang_5k.jpg,orb_match_percentage_with_depan_100k.jpg,orb_match_percentage_with_depan_10k.jpg,orb_match_percentage_with_depan_1k.jpg,orb_match_percentage_with_depan_20k.jpg,orb_match_percentage_with_depan_2k.jpg,orb_match_percentage_with_depan_50k.jpg,orb_match_percentage_with_depan_5k.jpg
0,IMG_20241214_181230.jpg,1000,12000000,93.22206,108.722223,119.818832,0.094594,0.174736,0.059121,0.063062,...,2.0,2.0,1.8,2.0,2.0,1.6,2.0,1.8,2.0,1.8
1,IMG_20241214_181244.jpg,1000,12000000,140.698959,153.735422,161.946505,0.052033,0.212724,0.021425,0.035199,...,1.8,1.8,1.8,1.8,2.2,2.0,2.0,1.6,1.8,1.8
2,IMG_20241214_181308.jpg,1000,12000000,83.061434,97.298658,97.750542,0.222297,0.092062,0.042663,0.083081,...,2.6,2.4,2.0,2.0,2.0,2.4,2.4,2.0,2.4,2.2
3,IMG_20241214_181328.jpg,1000,12000000,123.816459,153.400466,155.28063,0.201326,0.267918,0.03407,0.068141,...,2.2,2.2,1.6,2.0,2.2,1.6,2.4,1.8,2.0,2.0
4,IMG_20241214_181348.jpg,1000,12000000,120.363446,133.425556,151.094102,0.116144,0.223179,0.03416,0.063765,...,2.0,2.0,2.4,1.8,2.2,2.0,2.2,1.8,1.8,1.8
