In [None]:
import sys
sys.path.insert(0, "..")

from joblib import Parallel, delayed
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

from HAR.transformers import CSIMinMaxScaler, Rocket
from HAR.classifiers import RidgeVotingClassifier
from HAR.io import load_dataset

class RidgeVotingClassifierExt(RidgeVotingClassifier):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

    def predict(self, X):
        n_samples, n_sc, *_ = X.shape
        final_predictions = np.zeros((n_samples,))
        disagree_sc = []
        for isample in range(n_samples):
            predictions = Parallel(n_jobs=1, backend="threading")(
                delayed(self._score)(
                    self._models[m_], np.expand_dims(X[isample, m_, :], axis=0)
                )
                for m_ in range(n_sc)
            )
            predictions = np.array(predictions).squeeze()
            unique, counts = np.unique(predictions, return_counts=True)
            best = unique[np.argmax(counts)]
            final_predictions[isample] = best
            disagree_sc.extend(list(np.where(predictions != best)))
        return final_predictions, disagree_sc

pipe = Pipeline(
    [
        ("scaler", CSIMinMaxScaler()),
        ("rocket", Rocket(n_kernels=500)),
        ("classifier", RidgeVotingClassifierExt(n_classes=5)),  # Updated for 5 classes
    ]
)

# Load data
X, y, nsamples, classnames, dim = load_dataset("/home/ravindu/Desktop/final/dataset/mat/HAR_complete.mat")
X = X.reshape(X.shape[0], *dim)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

pipe.fit_transform(X_train, y_train)
pipe["rocket"].save("../artifacts/nbs-v1/kernels.pkl")
pipe["classifier"].save("../artifacts/nbs-v1/models.pkl")

y_pred, disagree_sc1 = pipe.predict(X_test)

print(f"\n> Test Accuracy: {accuracy_score(y_test, y_pred)*100:.4f}%")
print("\n> Confusion Matrix: ")
print(confusion_matrix(y_test, y_pred))
print("\n> Classification Report :")
print(classification_report(y_test, y_pred, target_names=classnames))

sns.set_theme()
hist_kwargs = {"bins": 54, "kde": True, "stat": "density"}
plt.figure(figsize=(8, 4), constrained_layout=True)
sns.histplot(data=np.concatenate(disagree_sc1), **hist_kwargs, label="HAR_complete")
plt.ylabel("Disagreement Density")
plt.xlabel("Subcarriers")
plt.legend()
plt.show()