In [3]:
!pip install scikit-image



In [9]:
import os, glob
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog
from skimage import exposure
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter
import matplotlib.pyplot as plt

In [10]:
# Parameters
IMG_SIZE = 128
DATA_PATH = "./Data/Scores" 
BATCH_SIZE = 32

In [11]:
# Build dataset
paths, labels = [], []

for shape_name in os.listdir(DATA_PATH):
    shape_path = os.path.join(DATA_PATH, shape_name)
    if not os.path.isdir(shape_path):
        continue
    for score in os.listdir(shape_path):
        score_path = os.path.join(shape_path, score)
        if not os.path.isdir(score_path):
            continue
        label = f"{shape_name}_{score}"
        for ext in ("*.png", "*.jpg", "*.jpeg"):
            for p in glob.glob(os.path.join(score_path, ext)):
                paths.append(p)
                labels.append(label)

df = pd.DataFrame({"path": paths, "label": labels})
print(df.head())

print("Total samples:", len(df))
print("Unique classes:", df['label'].nunique())

                                              path       label
0         ./Data/Scores/Triangle/0/img2302-T-0.png  Triangle_0
1         ./Data/Scores/Triangle/0/img1567-T-0.png  Triangle_0
2         ./Data/Scores/Triangle/0/img2184-T-0.png  Triangle_0
3  ./Data/Scores/Triangle/4/img4155-T-4(10111).png  Triangle_4
4  ./Data/Scores/Triangle/4/img3477-T-4(10111).png  Triangle_4
Total samples: 4297
Unique classes: 34


In [12]:
# Feature extraction function
def extract_features(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    
    # HOG features
    hog_feats = hog(img, orientations=9, pixels_per_cell=(8,8),
                    cells_per_block=(2,2), block_norm='L2-Hys', feature_vector=True)
    
    # Hu Moments
    moments = cv2.moments(img)
    hu_feats = cv2.HuMoments(moments).flatten()
    
    # Area (simple intensity sum)
    area = [np.sum(img)/255.0]
    
    return np.hstack([hog_feats, hu_feats, area])

In [16]:
counts = Counter(y)
mask = np.array([counts[label] > 1 for label in y])

X = X[mask]
y = y[mask]
y_labels = y_labels[mask]

In [17]:
# ---------------- 5. Train/test split ----------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# ---------------- 6. Hyperparameter tuning ----------------
C_values = [0.01, 0.1, 1, 10, 100]
train_acc, val_acc = [], []

for C in C_values:
    clf = LinearSVC(C=C, class_weight='balanced', max_iter=5000)
    clf.fit(X_train, y_train)
    train_acc.append(clf.score(X_train, y_train))
    val_acc.append(clf.score(X_test, y_test))

In [None]:
# ---------------- 7. Plot train/validation accuracy ----------------
plt.figure(figsize=(8,5))
plt.plot(C_values, train_acc, 'b-o', label='Train Accuracy')
plt.plot(C_values, val_acc, 'r-o', label='Validation Accuracy')
plt.xscale('log')
plt.xlabel('C value (log scale)')
plt.ylabel('Accuracy')
plt.title('LinearSVC Accuracy for different C values')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# ---------------- 8. Train final model ----------------
best_C = C_values[np.argmax(val_acc)]
print(f"Best C: {best_C}")

clf_final = LinearSVC(C=best_C, class_weight='balanced', max_iter=5000)
clf_final.fit(X_train, y_train)

In [None]:
# ---------------- 9. Evaluate ----------------
y_pred = clf_final.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {acc:.4f}")

In [None]:
# Only include classes present in test set
unique_test_labels = np.unique(y_test)
print(classification_report(
    y_test,
    y_pred,
    labels=unique_test_labels,
    target_names=le.classes_[unique_test_labels]
))