#### Progetto di corso per APPLICAZIONI DELL'INTELLIGENZA ARTIFICIALE (AA 2024-2025)
#### Stud: Marzio Della Bosca


Questo notebook si occupa di aggregare dei set di feature provenienti da **Motion Sense**, **Heterogeneity Human Activity Recognition**, **Wisdm** e **Moby Act** dove possibile (es. stesse etichette di attività). Le attività proposte nel Notebook sono quelle di accgregazione dei dati, uniformamento delle etichette e scalamento dei dati.

I dati caricati, anche se provenienti da dataset e device a diversa frequenza di campionamento, sono provenienti da estrazioni mediante catch22 e tsfel su finestre di 2 secondi il che rende più semplice l'aggregazione (mantenendo coerenza nell'unione dei dati).

In [1]:
import warnings
warnings.filterwarnings("ignore")
from tqdm import tqdm
import time
import os

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler

In [2]:
# Load dataset

motion_catch22 = np.load('motion_catch22_40.npy')
motion_tsfel = np.load('motion_tsfel_40.npy')
motion_labels = np.load('motion_labels_40.npy')

moby_catch22 = np.load('mobiact_catch22_40.npy')
moby_tsfel = np.load('mobiact_tsfel_40.npy')
moby_labels = np.load('mobyact_labels_40.npy')

hhar_catch22 = np.load('hhar_catch22.npy')
hhar_tsfel = np.load('hhar_tsfel.npy')
hhar_labels = np.load('hhar_labels.npy', allow_pickle=True)

wisdm_catch22 = np.load('wisdm_catch22.npy')
wisdm_tsfel = np.load('wisdm_tsfel.npy')
wisdm_labels = np.load('wisdm_labels.npy')

In [3]:
print("Motion Catch22 shape:", motion_catch22.shape)
print("Motion TSFEL shape:", motion_tsfel.shape)
print("Motion labels shape:", motion_labels.shape)

print("\nMobyAct Catch22 shape:", moby_catch22.shape)
print("MobyAct TSFEL shape:", moby_tsfel.shape)
print("MobyAct labels shape:", moby_labels.shape)

print("\nHHAR Catch22 shape:", hhar_catch22.shape)
print("HHAR TSFEL shape:", hhar_tsfel.shape)
print("HHAR labels shape:", hhar_labels.shape)

print("\nWISDM Catch22 shape:", wisdm_catch22.shape)
print("WISDM TSFEL shape:", wisdm_tsfel.shape)
print("WISDM labels shape:", wisdm_labels.shape)

Motion Catch22 shape: (35321, 22, 6)
Motion TSFEL shape: (35321, 45, 6)
Motion labels shape: (35321,)

MobyAct Catch22 shape: (281167, 22, 6)
MobyAct TSFEL shape: (281167, 45, 6)
MobyAct labels shape: (281167,)

HHAR Catch22 shape: (46787, 6, 22)
HHAR TSFEL shape: (46787, 6, 45)
HHAR labels shape: (46787,)

WISDM Catch22 shape: (22857, 6, 22)
WISDM TSFEL shape: (22857, 6, 45)
WISDM labels shape: (22857,)


In [4]:
# WISDM : Walking A - Jogging B - Stairs C - Sitting D - Standing E
# MOBY : 1-Walking, 2-Jogging, 3-StairsDwn, 4-StairsUp, 5-Sitting, 6-Standing, 7-Running
# MOTION : 0-Downstair, 1-Upstair, 2-Walking, 3-Jogging, 4-Sitting, 5-Standing
# HHAR : Biking, Sitting, Standing, Walking, Stair Up, Stair down


# Verifico i vari formati delle label e li uniformo
print("\nUnique labels in Motion dataset:", np.unique(motion_labels))
print("Unique labels in MobyAct dataset:", np.unique(moby_labels))
print("Unique labels in HHAR dataset:", np.unique(hhar_labels))
print("Unique labels in WISDM dataset:", np.unique(wisdm_labels))



Unique labels in Motion dataset: [0. 1. 2. 3. 4. 5.]
Unique labels in MobyAct dataset: [1. 2. 3. 4. 5. 6. 7.]
Unique labels in HHAR dataset: ['bike' 'sit' 'stairsdown' 'stairsup' 'stand' 'walk']
Unique labels in WISDM dataset: ['A' 'B' 'C' 'D' 'E']


In [5]:
# Le nuove label sono: 0-Walking, 1-Jogging, 2-DownStairs, 3-UpStairs, 4-Sitting, 5-Standing
for i in range(len(motion_labels)):
    if motion_labels[i] == 0:  # Downstair
        motion_labels[i] = 2
    elif motion_labels[i] == 1:  # Upstair
        motion_labels[i] = 3
    elif motion_labels[i] == 2:  # Walking
        motion_labels[i] = 0
    elif motion_labels[i] == 3:  # Jogging
        motion_labels[i] = 1


for i in range(len(moby_labels)):
    if moby_labels[i] == 1:  # Walking
        moby_labels[i] = 0
    elif moby_labels[i] == 2:  # Jogging
        moby_labels[i] = 1
    elif moby_labels[i] == 3:  # Stairs Down
        moby_labels[i] = 2
    elif moby_labels[i] == 4:  # Stairs Up
        moby_labels[i] = 3
    elif moby_labels[i] == 5:  # Sitting
        moby_labels[i] = 4
    elif moby_labels[i] == 6:  # Standing
        moby_labels[i] = 5
    elif moby_labels[i] == 7:  # Running - lo metto come jogging
        moby_labels[i] = 1

hhar_map = {                                                            # con hhar e wisdm ho dovuto fare un mapping manuale per ottenere un vettore di label di interi
    'walk': 0, 'stairsdown': 2, 'stairsup': 3,
    'sit': 4, 'stand': 5, 'bike': 9
}
hhar_labels = np.array([hhar_map[l] for l in hhar_labels], dtype=int)

wisdm_map = {'A': 0, 'B': 1, 'C': 9, 'D': 4, 'E': 5}
wisdm_labels = np.array([wisdm_map[l] for l in wisdm_labels], dtype=int)

motion_labels = np.array(motion_labels, dtype=int)
moby_labels = np.array(moby_labels, dtype=int)

print("\nUnique labels in Motion dataset:", np.unique(motion_labels))
print("Unique labels in MobyAct dataset:", np.unique(moby_labels))
print("Unique labels in HHAR dataset:", np.unique(hhar_labels))
print("Unique labels in WISDM dataset:", np.unique(wisdm_labels))


Unique labels in Motion dataset: [0 1 2 3 4 5]
Unique labels in MobyAct dataset: [0 1 2 3 4 5]
Unique labels in HHAR dataset: [0 2 3 4 5 9]
Unique labels in WISDM dataset: [0 1 4 5 9]


In [6]:
hhar_labels = np.array(hhar_labels)
wisdm_labels = np.array(wisdm_labels)

hhar_bad_idxs = np.where(hhar_labels == 9)[0]
wisdm_bad_idxs = np.where(wisdm_labels == 9)[0]

# Elimina label e dati con indice 9 per HHAR
hhar_labels_y = np.delete(hhar_labels, hhar_bad_idxs, axis=0)
hhar_X_catch22 = np.delete(hhar_catch22, hhar_bad_idxs, axis=0)
hhar_X_tsfel = np.delete(hhar_tsfel, hhar_bad_idxs, axis=0)

# Stessa cosa per WISDM
wisdm_labels_y = np.delete(wisdm_labels, wisdm_bad_idxs, axis=0)
wisdm_X_catch22 = np.delete(wisdm_catch22, wisdm_bad_idxs, axis=0)
wisdm_X_tsfel = np.delete(wisdm_tsfel, wisdm_bad_idxs, axis=0)

print("\nHHAR Catch22 shape:", hhar_X_catch22.shape)
print("HHAR TSFEL shape:", hhar_X_tsfel.shape)
print("HHAR labels shape:", hhar_labels_y.shape)

print("\nWISDM Catch22 shape:", wisdm_X_catch22.shape)
print("WISDM TSFEL shape:", wisdm_X_tsfel.shape)
print("WISDM labels shape:", wisdm_labels_y.shape)

print("\nUnique labels in Motion dataset:", np.unique(motion_labels))
print("Unique labels in MobyAct dataset:", np.unique(moby_labels))
print("Unique labels in HHAR dataset:", np.unique(hhar_labels_y))
print("Unique labels in WISDM dataset:", np.unique(wisdm_labels_y))


HHAR Catch22 shape: (40586, 6, 22)
HHAR TSFEL shape: (40586, 6, 45)
HHAR labels shape: (40586,)

WISDM Catch22 shape: (18326, 6, 22)
WISDM TSFEL shape: (18326, 6, 45)
WISDM labels shape: (18326,)

Unique labels in Motion dataset: [0 1 2 3 4 5]
Unique labels in MobyAct dataset: [0 1 2 3 4 5]
Unique labels in HHAR dataset: [0 2 3 4 5]
Unique labels in WISDM dataset: [0 1 4 5]


In [7]:
# Unisci i dataset HHAR e WISDM lungo l'asse 0
combined_labels = np.concatenate((hhar_labels_y, wisdm_labels_y), axis=0)
combined_X_catch22 = np.concatenate((hhar_X_catch22, wisdm_X_catch22), axis=0)
combined_X_tsfel = np.concatenate((hhar_X_tsfel, wisdm_X_tsfel), axis=0)

# Controlla la coerenza delle dimensioni
print("Shape combined_labels:", combined_labels.shape)
print("Shape combined_X_catch22:", combined_X_catch22.shape)
print("Shape combined_X_tsfel:", combined_X_tsfel.shape)

# Verifica la presenza di NaN o Inf nei dataset combinati
print("NaN in combined_labels:", np.isnan(combined_labels).any())
print("NaN in combined_X_catch22:", np.isnan(combined_X_catch22).any())
print("NaN in combined_X_tsfel:", np.isnan(combined_X_tsfel).any())

print("Inf in combined_labels:", np.isinf(combined_labels).any())
print("Inf in combined_X_catch22:", np.isinf(combined_X_catch22).any())
print("Inf in combined_X_tsfel:", np.isinf(combined_X_tsfel).any())

Shape combined_labels: (58912,)
Shape combined_X_catch22: (58912, 6, 22)
Shape combined_X_tsfel: (58912, 6, 45)
NaN in combined_labels: False
NaN in combined_X_catch22: False
NaN in combined_X_tsfel: False
Inf in combined_labels: False
Inf in combined_X_catch22: False
Inf in combined_X_tsfel: False


In [8]:
# 2. Flatten (campioni, features * canali) 
motion_catch22_flat = motion_catch22.reshape(motion_catch22.shape[0], -1)
moby_catch22_flat = moby_catch22.reshape(moby_catch22.shape[0], -1)
hhar_catch22_flat = hhar_X_catch22.reshape(hhar_X_catch22.shape[0], -1)
wisdm_catch22_flat = wisdm_X_catch22.reshape(wisdm_X_catch22.shape[0], -1)

# 3. Concatena i dataset flattenati
catch_22_uni = np.concatenate([motion_catch22_flat, moby_catch22_flat, hhar_catch22_flat, wisdm_catch22_flat], axis=0)

motion_tsfel_flat = motion_tsfel.reshape(motion_tsfel.shape[0], -1)
moby_tsfel_flat = moby_tsfel.reshape(moby_tsfel.shape[0], -1)
hhar_tsfel_flat = hhar_X_tsfel.reshape(hhar_X_tsfel.shape[0], -1)
wisdm_tsfel_flat = wisdm_X_tsfel.reshape(wisdm_X_tsfel.shape[0], -1)

tsfel_uni = np.concatenate([motion_tsfel_flat, moby_tsfel_flat, hhar_tsfel_flat, wisdm_tsfel_flat], axis=0)

# 4. Concatena le label
labels_uni = np.concatenate([motion_labels, moby_labels, hhar_labels_y, wisdm_labels_y], axis=0)

print("\nCatch22 dataset shape:", catch_22_uni.shape)
print("TSFEL dataset shape:", tsfel_uni.shape)
print("Labels shape:", labels_uni.shape)


Catch22 dataset shape: (375400, 132)
TSFEL dataset shape: (375400, 270)
Labels shape: (375400,)


In [10]:
print("Controllo su catch22_uni:")
print("NaN in catch22_uni:", np.isnan(catch_22_uni).any())
print("Inf in catch22_uni:", np.isinf(catch_22_uni).any())
print("\nControllo su tsfel_uni:")
print("NaN in tsfel_uni:", np.isnan(tsfel_uni).any())
print("Inf in tsfel_uni:", np.isinf(tsfel_uni).any())

Controllo su catch22_uni:
NaN in catch22_uni: True
Inf in catch22_uni: False

Controllo su tsfel_uni:
NaN in tsfel_uni: True
Inf in tsfel_uni: False


In [14]:
# Trova gli indici con NaN in catch22 e tsfel
nan_indices_catch22 = np.where(np.isnan(catch_22_uni).any(axis=1))[0]
nan_indices_tsfel = np.where(np.isnan(tsfel_uni).any(axis=1))[0]

# Unione degli indici da rimuovere
indices_to_remove = np.union1d(nan_indices_catch22, nan_indices_tsfel)

# Rimuovi gli indici da catch22, tsfel e y
catch_22_uni = np.delete(catch_22_uni, indices_to_remove, axis=0)
tsfel_uni = np.delete(tsfel_uni, indices_to_remove, axis=0)
labels_uni = np.delete(labels_uni, indices_to_remove, axis=0)

# Stampa le nuove shape per verifica
print(f"Shape catch22 dopo rimozione: {catch_22_uni.shape}")
print(f"Shape tsfel dopo rimozione: {tsfel_uni.shape}")
print(f"Shape labels dopo rimozione: {labels_uni.shape}")

# Trova gli indici dei valori NaN in catch22
nan_indices_catch22 = np.where(np.isnan(catch_22_uni))
print("Indici dei NaN in catch22:", nan_indices_catch22)

# Trova gli indici dei valori NaN in tsfel
nan_indices_tsfel = np.where(np.isnan(tsfel_uni))
print("Indici dei NaN in tsfel:", nan_indices_tsfel)

print("Controllo su catch22_uni:")
print("NaN in catch22_uni:", np.isnan(catch_22_uni).any())
print("Inf in catch22_uni:", np.isinf(catch_22_uni).any())
print("\nControllo su tsfel_uni:")
print("NaN in tsfel_uni:", np.isnan(tsfel_uni).any())
print("Inf in tsfel_uni:", np.isinf(tsfel_uni).any())

Shape catch22 dopo rimozione: (375358, 132)
Shape tsfel dopo rimozione: (375358, 270)
Shape labels dopo rimozione: (375358,)
Indici dei NaN in catch22: (array([], dtype=int64), array([], dtype=int64))
Indici dei NaN in tsfel: (array([], dtype=int64), array([], dtype=int64))
Controllo su catch22_uni:
NaN in catch22_uni: False
Inf in catch22_uni: False

Controllo su tsfel_uni:
NaN in tsfel_uni: False
Inf in tsfel_uni: False


In [15]:
# Scalo i dati con StandardScaler e li salvo
scaler = StandardScaler()
catch_22_uni = scaler.fit_transform(catch_22_uni)
tsfel_uni = scaler.fit_transform(tsfel_uni)

print("Controllo su catch22_uni:")
print("NaN in catch22_uni:", np.isnan(catch_22_uni).any())
print("Inf in catch22_uni:", np.isinf(catch_22_uni).any())
print("\nControllo su tsfel_uni:")
print("NaN in tsfel_uni:", np.isnan(tsfel_uni).any())
print("Inf in tsfel_uni:", np.isinf(tsfel_uni).any())

Controllo su catch22_uni:
NaN in catch22_uni: False
Inf in catch22_uni: False

Controllo su tsfel_uni:
NaN in tsfel_uni: False
Inf in tsfel_uni: False


In [18]:
print(f"Shape catch22 dopo rimozione: {catch_22_uni.shape}")
print(f"Shape tsfel dopo rimozione: {tsfel_uni.shape}")
print(f"Shape labels dopo rimozione: {labels_uni.shape}")

Shape catch22 dopo rimozione: (375358, 132)
Shape tsfel dopo rimozione: (375358, 270)
Shape labels dopo rimozione: (375358,)


In [16]:
np.save('catch22_uni.npy', catch_22_uni)
np.save('tsfel_uni.npy', tsfel_uni)
np.save('labels_uni.npy', labels_uni)

In [17]:
print("Shape combined_labels:", combined_labels.shape)
print("Shape combined_X_catch22:", combined_X_catch22.shape)
print("Shape combined_X_tsfel:", combined_X_tsfel.shape)

Shape combined_labels: (58912,)
Shape combined_X_catch22: (58912, 6, 22)
Shape combined_X_tsfel: (58912, 6, 45)


In [19]:
np.save('catch22_3d_uni.npy', combined_X_catch22)
np.save('tsfel_3d_uni.npy', combined_X_tsfel)
np.save('labels_3d_uni.npy', combined_labels)