In [21]:
import os
import pandas as pd
import seaborn as sns
import cv2
import numpy as np
from PIL import Image
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from skimage import io, color
from skimage.feature import local_binary_pattern
from joblib import Parallel, delayed
from skimage.transform import resize


In [22]:
images_dir = '/kaggle/input/nhapmoncv/data/images'
classes = [d for d in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, d))]

label_map = {cls: idx for idx, cls in enumerate(classes)}

data = []
for cls in classes:
    cls_folder = os.path.join(images_dir, cls)
    for fname in os.listdir(cls_folder):
        if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
            file_path = os.path.join(cls_folder, fname)
            label = label_map[cls]
            data.append((file_path, label))


classes = [d.split("-")[-1] for d in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, d))]
label_map = {cls: idx for idx, cls in enumerate(classes)}


In [23]:
df = pd.DataFrame(data, columns=['filepath', 'label'])
print(df.head())
print("Number of images:", len(df))
print("Number of classes:", len(classes))

label_map = {v: k for k, v in label_map.items()}
df["breed"] = df["label"].map(label_map)

                                            filepath  label
0  /kaggle/input/nhapmoncv/data/images/n02091635-...      0
1  /kaggle/input/nhapmoncv/data/images/n02091635-...      0
2  /kaggle/input/nhapmoncv/data/images/n02091635-...      0
3  /kaggle/input/nhapmoncv/data/images/n02091635-...      0
4  /kaggle/input/nhapmoncv/data/images/n02091635-...      0
Number of images: 20580
Number of classes: 120


In [24]:
print(df.head())

                                            filepath  label       breed
0  /kaggle/input/nhapmoncv/data/images/n02091635-...      0  otterhound
1  /kaggle/input/nhapmoncv/data/images/n02091635-...      0  otterhound
2  /kaggle/input/nhapmoncv/data/images/n02091635-...      0  otterhound
3  /kaggle/input/nhapmoncv/data/images/n02091635-...      0  otterhound
4  /kaggle/input/nhapmoncv/data/images/n02091635-...      0  otterhound


In [25]:
def compute_lbp(img_path, target_size=(128, 128), P=8, R=1):
        img = io.imread(img_path)
        img = resize(img, target_size)
        if img.shape[-1] == 4:
            img = img[:, :, :3]

        img_gray = (color.rgb2gray(img) * 255).astype('uint8')

        lbp = local_binary_pattern(img_gray, P=P, R=R, method='uniform')

        n_bins = int(lbp.max() + 1)
        hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins), density=True)

        return hist

In [26]:
features_list = Parallel(n_jobs=-1, backend='loky')(
    delayed(compute_lbp)(p) for p in df["filepath"]
)

valid_mask = [f is not None for f in features_list]
X = np.vstack([f for f in features_list if f is not None])
y = df.loc[valid_mask, "label"].values

print(f"LBP features extracted for {len(X)} of {len(df)} images")

LBP features extracted for 20580 of 20580 images


In [27]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
rf = RandomForestClassifier()

In [28]:
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)

print("Accuracy (LBP):", accuracy_score(y_test, y_pred) * 100)

Accuracy (LBP): 3.231292517006803
