In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm

In [2]:
def load_and_preprocess_image(path, size=(64, 64)):
    # Load image in grayscale mode
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    
    # Resize image
    img = cv2.resize(img, size)
    
    # Normalize pixel values to [0, 1]
    img = img / 255.0
    
    # Optionally, apply histogram equalization
    img = cv2.equalizeHist(np.uint8(img * 255))
    
    # Flatten the image to one dimension
    return img.flatten()

In [3]:
def load_dataset(directory, size=(64, 64)):
    
    features = []
    labels = []
    
    # Iterate over all folders in the directory
    for label in tqdm(os.listdir(directory), desc="Entire Progress"):
        subdir = os.path.join(directory, label)
        if os.path.isdir(subdir):
            # Iterate over all files in the subfolder
            for filename in os.listdir(subdir):
                file_path = os.path.join(subdir, filename)
                if file_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    # Preprocess the image and append to data
                    img = load_and_preprocess_image(file_path, size)
                    features.append(img)
                    labels.append(label)
    
    return features, labels

In [5]:
# Usage
dataset_dir = 'Augmented_dataset'  # Path to your dataset folder
features, labels = load_dataset(dataset_dir)
features = np.array(features)
labels = np.array(labels)


Entire Progress:   0%|          | 0/8 [00:00<?, ?it/s]

Entire Progress: 100%|██████████| 8/8 [01:08<00:00,  8.60s/it]


In [6]:
# If you need to check the shapes and types
print("Features shape:", features.shape)
print("Labels shape:", labels.shape)

Features shape: (2847, 4096)
Labels shape: (2847,)


In [7]:
from sklearn.preprocessing import LabelEncoder
#create Labelencoder's object
le = LabelEncoder()

target = le.fit_transform(labels)

In [10]:
le.fit(labels)

unique_labels = le.classes_

print("Unique values and their corresponding encoded labels:")
for index, label in enumerate(unique_labels):
    print(f"Original label: {label}, Encoded value: {index}")

Unique values and their corresponding encoded labels:
Original label: ca, Encoded value: 0
Original label: cha, Encoded value: 1
Original label: ga, Encoded value: 2
Original label: ja, Encoded value: 3
Original label: ka, Encoded value: 4
Original label: kha, Encoded value: 5
Original label: nga, Encoded value: 6
Original label: nya, Encoded value: 7


In [7]:
import numpy as np
# Assuming features and labels have been generated by the load_dataset function
# Save features and labels to disk
np.save('features.npy', features)
np.save('target.npy', target)