In [2]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog, local_binary_pattern

In [3]:
image_folder = './data/dataset/train'  # replace with your dataset path
csv_output = './data/csv_data/image_features.csv'

In [4]:
# Parameters for HOG
hog_params = {
    'orientations': 9,
    'pixels_per_cell': (8, 8),
    'cells_per_block': (2, 2),
    'block_norm': 'L2-Hys'
}
# Parameters for LBP
lbp_radius = 3
lbp_n_points = 8 * lbp_radius

In [5]:
# Helper function to extract features
def extract_features(image):
    # Convert image to grayscale if it's not
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # HOG features
    hog_features = hog(image, **hog_params)

    # LBP features
    lbp = local_binary_pattern(image, lbp_n_points, lbp_radius, method="uniform")
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, lbp_n_points + 3),
                               range=(0, lbp_n_points + 2), density=True)

    # Concatenate HOG and LBP features
    features = np.concatenate([hog_features, lbp_hist])
    return features


In [6]:
# Create the CSV file
rows = []
count =0
for class_name in os.listdir(image_folder):
    class_folder = os.path.join(image_folder, class_name)
    
    if os.path.isdir(class_folder):
        for image_name in os.listdir(class_folder):
            image_path = os.path.join(class_folder, image_name)
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            
            if image is not None:
                # Resize image if necessary (e.g., keeping it at 300x400 as per your dataset)
                image = cv2.resize(image, (300, 400))
                
                # Extract features
                features = extract_features(image)
                
                # Add class label and features to the row
                row = [class_name] + features.tolist()
                rows.append(row)
                print(count+=1)

In [7]:
# Convert to a DataFrame
columns = ['class'] + [f'feature_{i}' for i in range(len(rows[0]) - 1)]
df = pd.DataFrame(rows, columns=columns)

In [11]:
df.head()

Unnamed: 0,class,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,...,feature_63520,feature_63521,feature_63522,feature_63523,feature_63524,feature_63525,feature_63526,feature_63527,feature_63528,feature_63529
0,1,0.051925,0.017997,0.025946,0.043911,0.086917,0.043214,0.04439,0.049777,0.011125,...,0.012442,0.012667,0.010417,0.011175,0.011658,0.015,0.0202,0.029092,0.055175,0.462833
1,1,0.288338,0.117399,0.039849,0.111411,0.174083,0.201733,0.053494,0.240872,0.112938,...,0.012967,0.019492,0.013208,0.013483,0.014258,0.017542,0.022783,0.029708,0.082942,0.457258
2,1,0.284242,0.070533,0.015522,0.051071,0.168526,0.046519,0.189982,0.162796,0.072049,...,0.016258,0.016933,0.0144,0.015508,0.017375,0.020325,0.023067,0.023558,0.064383,0.407867
3,1,0.234659,0.162645,0.043538,0.113221,0.234659,0.129164,0.229182,0.071867,0.176108,...,0.007467,0.010342,0.007967,0.009542,0.011417,0.015067,0.023442,0.036692,0.074617,0.555125
4,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.015133,0.02185,0.013692,0.011792,0.011017,0.013583,0.019492,0.027633,0.095808,0.42495


In [8]:
# Save to CSV
df.to_csv(csv_output, index=False)

print(f"Feature extraction completed. CSV saved as {csv_output}")

Feature extraction completed. CSV saved as ./data/csv_data/image_features.csv


## test dataset conversion

In [None]:
test_image_folder = './data/dataset/test'
output_csv = './data/csv_data/test_data_features.csv'

In [None]:
rows = []
count = 0

# Iterate over all images in the test image folder
for image_name in os.listdir(test_image_folder):
    image_path = os.path.join(test_image_folder, image_name)
    
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    if image is not None:
        # Resize image if necessary (keeping it at 300x400 as per your dataset)
        image = cv2.resize(image, (300, 400))
        
        # Extract features
        features = extract_features(image)
        
        # Add image name and features to the row
        row = [image_name] + features.tolist()
        rows.append(row)
        print(f"Processed {count + 1} images")
        count += 1


In [None]:
# Create a DataFrame
df = pd.DataFrame(rows, columns=['Sample Name'] + [f'Feature_{i}' for i in range(1, len(features) + 1)])

In [None]:
# Save the DataFrame to a CSV file
df.to_csv(output_csv, index=False)

print(f"Feature extraction completed and saved to {output_csv}")
