In [8]:
import numpy as np
from numpy.random import random
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib


## Global Parameters



In [9]:
KERNEL = "rbf"
GLOBAL_C = 10
GAMMA = "scale"
THRESHOLD = 0.6

In [10]:
CLASS_MAPPING = {
    "cardboard": 0,
    "glass": 1,
    "metal": 2,
    "paper": 3,
    "plastic": 4,
    "trash": 5,
    "unknown": 6
}


## Load Features

In [11]:
X_train = np.load("../features/X_train_scaled.npy")
X_test = np.load("../features/X_test_scaled.npy")
y_train = np.load("../features/y_train.npy")
y_test = np.load("../features/y_test.npy")
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(2208, 2048) (2208,)
(372, 2048) (372,)


## Model Training

In [12]:
svm = SVC(
    kernel=KERNEL,
    C=GLOBAL_C,
    gamma=GAMMA,
    probability=True,
    random_state=42
)

svm.fit(X_train, y_train)

0,1,2
,"C  C: float, default=1.0 Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty. For an intuitive visualization of the effects of scaling the regularization parameter C, see :ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.",10
,"kernel  kernel: {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, default='rbf' Specifies the kernel type to be used in the algorithm. If none is given, 'rbf' will be used. If a callable is given it is used to pre-compute the kernel matrix from data matrices; that matrix should be an array of shape ``(n_samples, n_samples)``. For an intuitive visualization of different kernel types see :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.",'rbf'
,"degree  degree: int, default=3 Degree of the polynomial kernel function ('poly'). Must be non-negative. Ignored by all other kernels.",3
,"gamma  gamma: {'scale', 'auto'} or float, default='scale' Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. - if ``gamma='scale'`` (default) is passed then it uses  1 / (n_features * X.var()) as value of gamma, - if 'auto', uses 1 / n_features - if float, must be non-negative. .. versionchanged:: 0.22  The default value of ``gamma`` changed from 'auto' to 'scale'.",'scale'
,"coef0  coef0: float, default=0.0 Independent term in kernel function. It is only significant in 'poly' and 'sigmoid'.",0.0
,"shrinking  shrinking: bool, default=True Whether to use the shrinking heuristic. See the :ref:`User Guide `.",True
,"probability  probability: bool, default=False Whether to enable probability estimates. This must be enabled prior to calling `fit`, will slow down that method as it internally uses 5-fold cross-validation, and `predict_proba` may be inconsistent with `predict`. Read more in the :ref:`User Guide `.",True
,"tol  tol: float, default=1e-3 Tolerance for stopping criterion.",0.001
,"cache_size  cache_size: float, default=200 Specify the size of the kernel cache (in MB).",200
,"class_weight  class_weight: dict or 'balanced', default=None Set the parameter C of class i to class_weight[i]*C for SVC. If not given, all classes are supposed to have weight one. The ""balanced"" mode uses the values of y to automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.",


## Model Evaluation

In [13]:
y_pred = svm.predict(X_test)
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.8924731182795699

Classification Report:
               precision    recall  f1-score   support

   cardboard       0.98      0.92      0.95        49
       glass       0.86      0.91      0.88        78
       metal       0.84      0.95      0.89        62
       paper       0.90      0.93      0.92        90
     plastic       0.91      0.82      0.86        71
       trash       0.94      0.68      0.79        22

    accuracy                           0.89       372
   macro avg       0.90      0.87      0.88       372
weighted avg       0.90      0.89      0.89       372



## Save Model

In [14]:
joblib.dump(svm, "../models/svm_model.pkl")

['../models/svm_model.pkl']

## Model Prediction with Rejection

In [15]:
def svm_predict_with_rejection(model, X, threshold=0.6):
    """
    Predict classes using SVM with rejection.
    Samples with max probability < threshold are labeled as Unknown (ID 6).
    """
    probs = model.predict_proba(X)
    max_probs = np.max(probs, axis=1)
    preds = model.predict(X)

    final_preds = []
    for pred, conf in zip(preds, max_probs):
        if conf < threshold:
            final_preds.append(6)  # Unknown
        else:
            final_preds.append(CLASS_MAPPING[pred])

    return np.array(final_preds)


In [16]:
y_pred = svm_predict_with_rejection(svm, X_test, threshold=THRESHOLD)

print("\nAccuracy (accepted samples):", accuracy_score(y_test, y_pred))
print("\nClassification Report (accepted samples):\n", classification_report(y_test, y_pred))



Accuracy (accepted samples): 0.0


ValueError: Mix of label input types (string and number)