## Load the data

In [None]:
import numpy as np
import matplotlib.pylab as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import shap

In [None]:
path_to_dataset = ''
dataset = np.load(path_to_dataset, allow_pickle=True)

In [None]:
dataset[0].keys()

In [None]:
dataset[0]['radar_parameters']

In [None]:
labels = [data['class_name'] for data in dataset]
unique_classes = list(set(labels)) #unique classes in dataset
unique_classes

In [None]:
for label in list(set(labels)):
    print(f'Num of {label} in dataset: {labels.count(label)}')

## Preapare radar parameters

In [None]:
radar_parameters = [data['radar_parameters'] for data in dataset]

In [None]:
for i, data in enumerate(dataset):
    radar_parameters[i]['class_name']=data['class_name']

In [None]:
params = ['azimuth', 'velocity', 'snr_db']

for i, data in enumerate(dataset):
    for param in params:       
        radar_parameters[i][f'{param}_mean'] = data[param].mean()
        radar_parameters[i][f'{param}_min'] = data[param].min()
        radar_parameters[i][f'{param}_max'] = data[param].max()
        radar_parameters[i][f'{param}_std'] = data[param].std()

In [None]:
tracker_data = pd.DataFrame(radar_parameters)

In [None]:
tracker_data

## Divde dataset to train and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    tracker_data.loc[:, tracker_data.columns != 'class_name'], tracker_data.class_name, test_size=0.33, random_state=42)

In [None]:
X_train.shape, X_test.shape

In [None]:
encoder_labels = LabelEncoder()
encoder_labels = encoder_labels.fit(['person', 'bicycle', 'uav', 'vehicle'])
print("encoded labels")
print(
        f"Class 'person' is {encoder_labels.transform(['person'])[0]} and "
        f"class 'bicycle' is {encoder_labels.transform(['bicycle'])[0]} and "
        f"Class 'uav' is {encoder_labels.transform(['uav'])[0]} and "
        f"class 'vehicle' is {encoder_labels.transform(['vehicle'])[0]}")
y_train = encoder_labels.transform(y_train)
y_test = encoder_labels.transform(y_test)

## Train simple model eg. XGBoost

In [None]:
model = XGBClassifier(use_label_encoder=False)
model.fit(X_train, y_train, eval_metric='mlogloss')

In [None]:
predicted = model.predict(X_test)
results_df = pd.DataFrame(zip(predicted, y_test), columns=['predicted', 'true'])
report_pp = classification_report(results_df.true, results_df.predicted, target_names=['person', 'bicycle', 'uav', 'vehicle'])
print(report_pp)

cm = confusion_matrix(results_df.true, results_df.predicted, labels=model.classes_)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['person', 'bicycle', 'uav', 'vehicle'])
cm_display.plot()

In [None]:
features_importances = model.feature_importances_

for i in np.argsort(features_importances)[::-1]:
    print(f'{X_train.columns[i]} : {features_importances[i]}')

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, show=True,  class_names=['person', 'bicycle', 'uav', 'vehicle'])