# AIoT Project

In [None]:
import os

# basic data engineering
import pandas as pd
import numpy as np
import scipy

# plotting
import matplotlib.pyplot as plt
import seaborn as sns

# db
import pymongo

# configs & other
import yaml
from tqdm.notebook import tqdm_notebook
from datetime import datetime
from time import time

from psynlig import pca_explained_variance_bar

# utils processing
from utils import sliding_window_pd
from utils import apply_filter
from utils import filter_instances
from utils import flatten_instances_df
from utils import df_rebase
from utils import rename_df_column_values
from utils import encode_labels

# utils visualization
from utils_visual import plot_instance_time_domain
from utils_visual import plot_instance_3d
from utils_visual import plot_np_instance
from utils_visual import plot_heatmap
from utils_visual import plot_scatter_pca

%load_ext autoreload
%autoreload 2

Start time of execution

In [None]:
time_start = time()

## Load configuration

In [None]:
config_path = os.path.join(os.getcwd(), "config.yml")

with open(config_path) as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
client = pymongo.MongoClient(config["client"])

In [None]:
db = client[config["db"]]
coll = db[config["col"]]

In [None]:
found_keys = coll.distinct("label")
print("Existing DB keys:", found_keys)

## Load data

In [None]:
documents = list(coll.find())
dfs = []

In [None]:
for document in documents:
    df_doc = pd.DataFrame(document["data"])
    df_doc["label"] = document["label"]
    dfs.append(df_doc)

## Explore the nature of the data

In [None]:
df = pd.concat(dfs, ignore_index=True)

In [None]:
order_list = list(documents[0]['data'].keys()) + ['label']
ref_list = order_list.copy()

df = df_rebase(df, order_list, ref_list)

## Apply filter

In [None]:
signal_columns = ["acc_x", "acc_y", "acc_z", "gyro_x", "gyro_y", "gyro_z"]

X = df[signal_columns].to_numpy()

df[signal_columns]= apply_filter(X, order=4, wn=0.1, filter_type="lowpass")

## Transform the list of DataFrames to NumPy array

Transform the list of DataFrames to NumPy array that contains the windows: (instances, x, y)

In [None]:
windows = sliding_window_pd(df, ws=20, overlap=20)

In [None]:
window_labels = []

for window in windows:
   
    labels_in_window = window["label"].values
    
    unique, counts = np.unique(labels_in_window, return_counts=True)
    majority_label = unique[np.argmax(counts)]
    window_labels.append(majority_label)

In [None]:
signal_only_windows = [w[signal_columns] for w in windows]
filtered_signal_only = filter_instances(signal_only_windows, order=4, wn=0.1, filter_type="lowpass")

## Flatten the 2D window instances

Flatten the X NumPy array that contains the 2D window instances

In [None]:
flattened_df = flatten_instances_df(filtered_signal_only)
print(flattened_df)

In [None]:
y = df["label"]
final_df = rename_df_column_values(flattened_df.to_numpy(), window_labels, flattened_df.columns.tolist())
print(final_df.iloc[:, -1].unique())

X= final_df.iloc[:, :-1]
y= final_df.iloc[:, -1]

## Train/Test split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=True, random_state=42)

## Scaling

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
scaler = MinMaxScaler()

## Transform to 2D again

In [None]:
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Dimensionality Reduction with PCA using the 1D (flattened) data

In [None]:
# add transformers
from sklearn.decomposition import PCA

### PCA with 2 Components

### PCA with 3 Components

In [None]:
pca2d = PCA(n_components=3)  # for 3D visualization
pca2d.fit(X_train_scaled)

In [None]:
X_train_pca = pca2d.transform(X_train_scaled)
X_test_pca = pca2d.transform(X_test_scaled)

### PCA with X% of the variance of the dataset, for training the statistical AI Models

In [None]:
pca_explained_variance_bar(pca2d, alpha=0.8)

X_train_pca_df = pd.DataFrame(X_train_pca, columns=["PC1", "PC2", "PC3"])
X_train_pca_df["label"] = y_train.reset_index(drop=True)

plot_scatter_pca(X_train_pca_df, c_name="label")

## Classifier - Statistical Learning

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [None]:
y_train=encode_labels(y_train)
y_test =encode_labels(y_test)

### Apply SVC Classifier

In [None]:
svc = SVC(kernel='rbf', C=1, gamma='scale')

svc.fit(X_train_pca, y_train)
y_pred_svc = svc.predict(X_test_pca)

### Evaluate simple classifier - SVC Classifier

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
from sklearn.metrics import classification_report

In [None]:
print("=== SVC Classification Report ===")
print(classification_report(y_test, y_pred_svc))

cm_svc = confusion_matrix(y_test, y_pred_svc, labels=svc.classes_)
disp_svc = ConfusionMatrixDisplay(confusion_matrix=cm_svc, display_labels=svc.classes_)
disp_svc.plot()
plt.title("SVC Confusion Matrix")
plt.show()

###  Apply Random Forest Classifier 

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_pca, y_train)
y_pred_rf = rf.predict(X_test_pca)

### Evaluate RandomForestClassifier

In [None]:
print("=== Random Forest Classification Report ===")
print(classification_report(y_test, y_pred_rf))

cm_rf = confusion_matrix(y_test, y_pred_rf, labels=rf.classes_)
disp_rf = ConfusionMatrixDisplay(confusion_matrix=cm_rf, display_labels=rf.classes_)
disp_rf.plot()
plt.title("Random Forest Confusion Matrix")
plt.show()

### Apply optimization with Grid Search and Cross-validation

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': ['scale', 0.01, 0.1, 1],
    'kernel': ['rbf']
}

In [None]:
grid_search = GridSearchCV(SVC(), param_grid, cv=3, verbose=1, n_jobs=-1)
grid_search.fit(X_train_pca, y_train)

print("=== Best parameters from GridSearchCV ===")
print(grid_search.best_params_)

### Evaluate optimized classifier

In [None]:
best_svc = grid_search.best_estimator_
y_pred_best_svc = best_svc.predict(X_test_pca)

print("=== Best SVC Classification Report ===")
print(classification_report(y_test, y_pred_best_svc))

cm_best = confusion_matrix(y_test, y_pred_best_svc, labels=best_svc.classes_)
disp_best = ConfusionMatrixDisplay(confusion_matrix=cm_best, display_labels=best_svc.classes_)
disp_best.plot()
plt.title("Best SVC (GridSearch) Confusion Matrix")
plt.show()


## Classifier - Neural Network

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, Flatten

In [None]:
input_data_shape = X_train_2d[0].shape
print("Type of the input shape object:", type(input_data_shape))
X_train_2d[0].shape

In [None]:
y_np_array = np.array(y)
n_outputs = len(np.unique(y_np_array))
print("Number of outputs (classes) the model to predict:", n_outputs)

### Create the Neural Network (NN) Architecture and instantiate the model

In [None]:
model = Sequential()

"""
BUILD YOUR MODEL ARCHITECTURE HERE
"""

model.add(Dense(n_outputs, activation="softmax"))

Plot the Architecture of ot the TensorFlow model

Plot the summary of the TensorFlow model

### Build the NN model

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["ADD METRIC"])

In [None]:
from utils import encode_labels

### Train the NN model

### Evaluate the model on the test data

### Plot and interpret the learning curves: Loss and Accuracy based on the training and validation sets