## Classification with saved Descriptor Data

In [22]:
import math
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, balanced_accuracy_score

In [23]:
# Load data
#df = np.load('Descriptors/features_Zernike_grid256ord12.npz')
#df = np.load('Descriptors/features_FPFH_sampS15000.npz')
#df = np.load('Descriptors/features_Auto3DGM_500.npz')
df = np.load('Descriptors/features_SDFD_128.npz')
#print(df)

X = df['X']
y = df['y'].astype(int)
ids = df['ids']
#print(len(ids))

assert X.shape[0] == y.shape[0] == ids.shape[0], "X, y, ids need to have the same number of samples."
#assert X.shape[1] == 49, f"Erwarte 49 Features, habe {X.shape[1]}"
print(len(X))

85


In [24]:
keep_mask = ~np.char.endswith(ids, 'A')

# Delete all dublicate scans with Suffix -A
X = X[keep_mask]
y = y[keep_mask]
ids = ids[keep_mask]

print(f"{len(keep_mask)} Objects kept.")

85 Objects kept.


In [25]:
print(f"{len(ids)} individuals, with a 80-20 split is {len(ids)-math.ceil(len(ids)*0.2)} : {math.ceil(len(ids)*0.2)} (rounded).")

82 individuals, with a 80-20 split is 65 : 17 (rounded).


In [26]:
# Choose same IDs as PointNet
test_ids = {
    "OvisAries-03189", "OvisAries-03190", "OvisAries-03204", "OvisAries-03205", "OvisAries-03208", "OvisAries-03219", "OvisAries-3F", 
    "OvisAries-03223", "OvisAries-03", "OvisAries-27",
    "GazellaSubgutturosa-02026", "GazellaSubgutturosa-02033", "GazellaSubgutturosa-02041", "GazellaSubgutturosa-02051", "GazellaSubgutturosa-02067",
    "GazellaSubgutturosa-02073", "GazellaSubgutturosa-02074"}

#test_ids = {
#    "SAPM-MA-03189", "SAPM-MA-03190", "SAPM-MA-03204", "SAPM-MA-03205", "SAPM-MA-03208", "SAPM-MA-03219", "SPM-MA-3F", 
#    "SPM-MA-03223", "THE-MA-03", "THE-MA-27",
#    "SAPM-MA-02026", "SAPM-MA-02033", "SAPM-MA-02041", "SAPM-MA-02051", "SAPM-MA-02067",
#    "SAPM-MA-02073", "SAPM-MA-02074"}

# ID -> Index
id_to_idx = {id_: i for i, id_ in enumerate(ids)}

missing = sorted([t for t in test_ids if t not in id_to_idx])
if missing:
    raise ValueError(f"These IDs were not found: {missing}")

test_idx = np.array([id_to_idx[t] for t in test_ids], dtype=int)
test_idx = np.unique(test_idx)
train_idx = np.setdiff1d(np.arange(len(ids)), test_idx)

X_train, y_train = X[train_idx], y[train_idx]
X_test,  y_test  = X[test_idx],  y[test_idx]
ids_test = ids[test_idx]

print("Train size:", X_train.shape, "Test size:", X_test.shape)

Train size: (65, 2097152) Test size: (17, 2097152)


In [27]:
# Model: Scaling + SVM (RBF Kernel as good default)
clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC(
        kernel="rbf",
        C=1.0,
        gamma="scale",
        class_weight="balanced",   # because of class inbalance
        probability=False,
        random_state=0
    ))
])

In [28]:
# Train/Eval
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

print("\nBalanced Accuracy:", balanced_accuracy_score(y_test, y_pred))
print("\nConfusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nReport:\n", classification_report(y_test, y_pred, digits=3))

# Check classification by ID:
print("\nPredictions by ID:")
for _id, yt, yp in zip(ids_test, y_test, y_pred):
    print(f"{_id}: true={yt} pred={yp}")


Balanced Accuracy: 1.0

Confusion matrix:
 [[10  0]
 [ 0  7]]

Report:
               precision    recall  f1-score   support

           0      1.000     1.000     1.000        10
           1      1.000     1.000     1.000         7

    accuracy                          1.000        17
   macro avg      1.000     1.000     1.000        17
weighted avg      1.000     1.000     1.000        17


Predictions by ID:
OvisAries-03189: true=0 pred=0
OvisAries-03190: true=0 pred=0
OvisAries-03204: true=0 pred=0
OvisAries-03205: true=0 pred=0
OvisAries-03208: true=0 pred=0
OvisAries-03219: true=0 pred=0
OvisAries-03223: true=0 pred=0
OvisAries-3F: true=0 pred=0
OvisAries-03: true=0 pred=0
OvisAries-27: true=0 pred=0
GazellaSubgutturosa-02026: true=1 pred=1
GazellaSubgutturosa-02033: true=1 pred=1
GazellaSubgutturosa-02041: true=1 pred=1
GazellaSubgutturosa-02051: true=1 pred=1
GazellaSubgutturosa-02067: true=1 pred=1
GazellaSubgutturosa-02073: true=1 pred=1
GazellaSubgutturosa-02074: true=1