In [10]:
# Import Libraries
import os
import cv2
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [2]:
# Parameters
IMG_SIZE = 128
DATA_PATH = "./Data/Scores"

In [3]:
# Load Dataset
X, y, class_names = [], [], []
class_map = {}

print("[INFO] Loading dataset...")

for shape_name in os.listdir(DATA_PATH):  # e.g., Circle, Square
    shape_path = os.path.join(DATA_PATH, shape_name)
    if not os.path.isdir(shape_path):
        continue

    for score in os.listdir(shape_path):  # e.g., 0,1,2...
        score_path = os.path.join(shape_path, score)
        if not os.path.isdir(score_path):
            continue

        label = f"{shape_name}_{score}"
        if label not in class_map:
            class_map[label] = len(class_map)
            class_names.append(label)

        for file in os.listdir(score_path):
            if file.endswith((".png", ".jpg", ".jpeg")):
                path = os.path.join(score_path, file)
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                X.append(img)
                y.append(class_map[label])

X = np.array(X, dtype="float32") / 255.0
y = np.array(y)

print(f"[INFO] Dataset loaded: {X.shape[0]} samples, {len(class_map)} classes")

[INFO] Loading dataset...
[INFO] Dataset loaded: 4297 samples, 34 classes


In [4]:
# Check Class Distribution
class_counts = Counter(y)
print("\n[INFO] Class distribution before filtering:")
for cls, count in class_counts.items():
    print(f"Class {class_names[cls]}: {count} samples")


[INFO] Class distribution before filtering:
Class Triangle_0: 3 samples
Class Triangle_4: 38 samples
Class Triangle_5: 539 samples
Class Star_3: 102 samples
Class Star_2: 8 samples
Class Star_0: 13 samples
Class Star_4: 250 samples
Class Star_5: 100 samples
Class Wave_3: 109 samples
Class Wave_2: 4 samples
Class Wave_0: 2 samples
Class Wave_4: 434 samples
Class Circle_3: 112 samples
Class Circle_2: 1 samples
Class Circle_0: 5 samples
Class Circle_4: 457 samples
Class Square_0: 1 samples
Class Square_4: 45 samples
Class Square_5: 540 samples
Class Overlapped pencils_3: 8 samples
Class Overlapped pencils_2: 2 samples
Class Overlapped pencils_0: 12 samples
Class Overlapped pencils_4: 54 samples
Class Overlapped pencils_6: 226 samples
Class Overlapped pencils_5: 143 samples
Class Overlapped circle_2: 1 samples
Class Overlapped circle_0: 3 samples
Class Overlapped circle_4: 9 samples
Class Overlapped circle_6: 386 samples
Class Overlapped circle_5: 135 samples
Class Diagonal_3: 2 samples
C

In [12]:
# Filter Rare Classes (< 2 samples)
valid_classes = np.unique(y)
class_names_filtered = [class_names[i] for i in valid_classes]

In [13]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print(f"[INFO] Training set: {X_train.shape[0]} samples")
print(f"[INFO] Test set: {X_test.shape[0]} samples")

[INFO] Training set: 3435 samples
[INFO] Test set: 859 samples


In [14]:
# Feature Extraction (Flatten Images)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

In [15]:
# Random Forest
print("\n[INFO] Training Random Forest...")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_flat, y_train)


[INFO] Training Random Forest...


In [16]:
# Evaluate
y_pred_rf = rf.predict(X_test_flat)
acc_rf = accuracy_score(y_test, y_pred_rf)
print(f"[RESULT] Random Forest Accuracy: {acc_rf:.4f}")

[RESULT] Random Forest Accuracy: 0.5448


In [17]:
# Use filtered class names to match current labels
print(classification_report(
    y_test,
    y_pred_rf,
    labels=valid_classes,
    target_names=class_names_filtered
))

                      precision    recall  f1-score   support

          Triangle_0       0.00      0.00      0.00         1
          Triangle_4       0.00      0.00      0.00         8
          Triangle_5       0.56      0.83      0.67       108
              Star_3       0.00      0.00      0.00        20
              Star_2       0.00      0.00      0.00         2
              Star_0       0.00      0.00      0.00         3
              Star_4       0.42      0.32      0.36        50
              Star_5       0.00      0.00      0.00        20
              Wave_3       0.00      0.00      0.00        22
              Wave_2       0.00      0.00      0.00         1
              Wave_0       0.00      0.00      0.00         0
              Wave_4       0.60      0.77      0.68        87
            Circle_3       0.00      0.00      0.00        22
            Circle_0       0.00      0.00      0.00         1
            Circle_4       0.56      0.73      0.63        91
       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
# Train Gradient Boosting
print("\n[INFO] Training Gradient Boosting...")
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb.fit(X_train_flat, y_train)


[INFO] Training Gradient Boosting...


In [20]:
# Evaluate
y_pred_gb = gb.predict(X_test_flat)
acc_gb = accuracy_score(y_test, y_pred_gb)
print(f"[RESULT] Gradient Boosting Accuracy: {acc_gb:.4f}")

[RESULT] Gradient Boosting Accuracy: 0.4796


In [21]:
# Use filtered class names to match actual labels
print(classification_report(
    y_test,
    y_pred_gb,
    labels=valid_classes,
    target_names=class_names_filtered
))

                      precision    recall  f1-score   support

          Triangle_0       0.00      0.00      0.00         1
          Triangle_4       0.00      0.00      0.00         8
          Triangle_5       0.62      0.74      0.67       108
              Star_3       0.10      0.05      0.07        20
              Star_2       0.00      0.00      0.00         2
              Star_0       0.00      0.00      0.00         3
              Star_4       0.48      0.20      0.28        50
              Star_5       0.14      0.10      0.12        20
              Wave_3       0.09      0.05      0.06        22
              Wave_2       0.00      0.00      0.00         1
              Wave_0       0.00      0.00      0.00         0
              Wave_4       0.58      0.66      0.61        87
            Circle_3       0.20      0.05      0.07        22
            Circle_0       0.00      0.00      0.00         1
            Circle_4       0.47      0.65      0.54        91
       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
