In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from PIL import Image

# Path to your data folder
data_dir = "data"
img_size = (32, 32)

images, labels = [], []

for fname in os.listdir(data_dir):
    if fname.lower().endswith((".png", ".jpg", ".jpeg")):   # accept jpg & png
        label = fname.split(".")[0]   # "001he"
        filepath = os.path.join(data_dir, fname)

        # Load & preprocess
        img = Image.open(filepath).convert("L")  # grayscale
        img = img.resize(img_size)
        img = np.array(img).astype("float32") / 255.0

        images.append(img)
        labels.append(label)

# Convert to arrays
images = np.array(images)[..., None]  # add channel dim
labels = np.array(labels)

# Encode string labels -> integers
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

print("Total images:", len(images))
print("Unique labels:", len(le.classes_))
print("Sample labels:", le.classes_[:10])  # check first 10

Total images: 37652
Unique labels: 238
Sample labels: ['001he' '002hu' '003hi' '004ha' '005hy' '006hh' '007ho' '008le' '009lu'
 '010li']


In [3]:
le.classes_

array(['001he', '002hu', '003hi', '004ha', '005hy', '006hh', '007ho',
       '008le', '009lu', '010li', '011la', '012ly', '013ll', '014lo',
       '015_h', '016_h', '017_h', '018_h', '019_h', '020_h', '021_h',
       '022me', '023mu', '024mi', '025ma', '026my', '027mm', '028mo',
       '029_s', '030_s', '031_s', '032_s', '033_s', '034_s', '035_s',
       '036re', '037ru', '038ri', '039ra', '040ry', '041rr', '042ro',
       '043se', '044su', '045si', '046sa', '047sy', '048ss', '049so',
       '050_S', '051_S', '052_S', '053_S', '054_S', '055_S', '056_S',
       '057qe', '058qu', '059qi', '060qa', '061qy', '062qq', '063qo',
       '064be', '065bu', '066bi', '067ba', '068by', '069bb', '070bo',
       '071ve', '072vu', '073vi', '074va', '075vy', '076vv', '077vo',
       '078te', '079tu', '080ti', '081ta', '082ty', '083tt', '084to',
       '085Ce', '086Cu', '087Ci', '088Ca', '089Cy', '090CC', '091Co',
       '092He', '093Hu', '094Hi', '095Ha', '096Hy', '097HH', '098Ho',
       '099ne', '100