# Import

In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import Counter
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix


# Data exploring

In [2]:
#data exploring
df = pd.read_csv('DATI.csv')

df

Unnamed: 0,timestamp,accX,accY,accZ,gyroX,gyroY,gyroZ,Label
0,1.733400e+09,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']",,,
1,1.733400e+09,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']",,,
2,1.733400e+09,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']",,,
3,1.733400e+09,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']",,,
4,1.733400e+09,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']",,,
...,...,...,...,...,...,...,...,...
41141,1.733401e+09,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']",,,
41142,1.733401e+09,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']",,,
41143,1.733401e+09,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']",,,
41144,1.733401e+09,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']",,,


# Data cleaning

In [3]:
df['gyroX'].head
#abbiamo sbagliato a raccogliere i dati e abbiamo inserito la label nella colonna sbagliata

<bound method NDFrame.head of 0         ['fermo', 'fermo', 'fermo']
1         ['fermo', 'fermo', 'fermo']
2         ['fermo', 'fermo', 'fermo']
3         ['fermo', 'fermo', 'fermo']
4         ['fermo', 'fermo', 'fermo']
                     ...             
41141     ['sotto', 'fermo', 'fermo']
41142     ['sotto', 'fermo', 'fermo']
41143    ['sotto', 'avanti', 'fermo']
41144     ['sotto', 'fermo', 'fermo']
41145     ['sotto', 'fermo', 'fermo']
Name: gyroX, Length: 41146, dtype: object>

In [4]:
df = df.drop('gyroY', axis=1)
df = df.drop('Label', axis=1)
df = df.drop('gyroZ', axis=1)


In [5]:
df = df.rename(columns={"gyroX": "Label"})
df

Unnamed: 0,timestamp,accX,accY,accZ,Label
0,1.733400e+09,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.733400e+09,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.733400e+09,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,1.733400e+09,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,1.733400e+09,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...,...
41141,1.733401e+09,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,1.733401e+09,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,1.733401e+09,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']"
41144,1.733401e+09,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


In [6]:
df = df.drop('timestamp', axis = 1)
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']"
41144,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


# aggregamento dati

In [7]:

def aggrega(df, colonna, batch_size=5):
    """
    Assegna la label più frequente ogni blocco di 10
    da specificare il df e la ['']
    """
    
    lista = []  
    
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i + batch_size]  
        
        labels = batch[colonna]  #label del batch
        counter = Counter(labels)     #trova la label più frequente 
        most_common_label = counter.most_common(1)[0][0]
        
        lista.extend([most_common_label] * len(batch))
    
    return pd.Series(lista, index=df.index)

In [8]:
df['Label'] = aggrega(df, colonna='Label')

In [9]:
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,9.898576,-6.979712,-6.392718,"['sotto', 'fermo', 'fermo']"
41144,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


# Data cleaning PT 2

In [10]:
# la label è salvata come una stringa unica, quindi la puliamo e dividiamo con strip
# che dividerà in elementi di una lista
df['Label'] = df['Label'].apply(lambda x: x.replace('[',''))
df['Label'] = df['Label'].apply(lambda x: x.replace(']',''))
df['Label'] = df['Label'].apply(lambda x: x.replace('\'',''))

df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"fermo, fermo, fermo"
1,1.937141,5.244785,-1.023467,"fermo, fermo, fermo"
2,1.398141,5.400785,-0.677045,"fermo, fermo, fermo"
3,0.800141,5.587785,-0.298913,"fermo, fermo, fermo"
4,0.290141,5.723785,0.281959,"fermo, fermo, fermo"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"sotto, fermo, fermo"
41142,8.776462,-0.726462,-0.613555,"sotto, fermo, fermo"
41143,9.898576,-6.979712,-6.392718,"sotto, fermo, fermo"
41144,8.937576,-0.515378,0.985285,"sotto, fermo, fermo"


In [11]:
df['Label'] = df['Label'].apply(lambda x: x.split(','))
df['Label']
#trasfroma in lista

0        [fermo,  fermo,  fermo]
1        [fermo,  fermo,  fermo]
2        [fermo,  fermo,  fermo]
3        [fermo,  fermo,  fermo]
4        [fermo,  fermo,  fermo]
                  ...           
41141    [sotto,  fermo,  fermo]
41142    [sotto,  fermo,  fermo]
41143    [sotto,  fermo,  fermo]
41144    [sotto,  fermo,  fermo]
41145    [sotto,  fermo,  fermo]
Name: Label, Length: 41146, dtype: object

In [12]:
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"[fermo, fermo, fermo]"
1,1.937141,5.244785,-1.023467,"[fermo, fermo, fermo]"
2,1.398141,5.400785,-0.677045,"[fermo, fermo, fermo]"
3,0.800141,5.587785,-0.298913,"[fermo, fermo, fermo]"
4,0.290141,5.723785,0.281959,"[fermo, fermo, fermo]"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"[sotto, fermo, fermo]"
41142,8.776462,-0.726462,-0.613555,"[sotto, fermo, fermo]"
41143,9.898576,-6.979712,-6.392718,"[sotto, fermo, fermo]"
41144,8.937576,-0.515378,0.985285,"[sotto, fermo, fermo]"


In [13]:
# come scelta progettuale ho eliminato le label non necessarie:

def trasforma(df):
    zero = abs(df['accX'])
    uno = abs(df['accY'])
    due = abs(df['accZ'])
    
    if zero > max(uno, due):
        return df['Label'][0].strip()
    elif uno > max(zero, due):
        return df['Label'][1].strip()
    elif due > max(zero, uno):
        return df['Label'][2].strip()

In [14]:
df['Label'] = df.apply(trasforma, axis = 1)
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,fermo
1,1.937141,5.244785,-1.023467,fermo
2,1.398141,5.400785,-0.677045,fermo
3,0.800141,5.587785,-0.298913,fermo
4,0.290141,5.723785,0.281959,fermo
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,sotto
41142,8.776462,-0.726462,-0.613555,sotto
41143,9.898576,-6.979712,-6.392718,sotto
41144,8.937576,-0.515378,0.985285,sotto


In [15]:
df['Label'].value_counts()
#controllo se ci sono label vuote (no)

Label
fermo       26367
avanti       4122
destra       3474
sotto        3230
indietro     1780
sopra        1257
sinistra      916
Name: count, dtype: int64

In [18]:
binary_labels = pd.get_dummies(df['Label'])
print(binary_labels.head())


   avanti  destra  fermo  indietro  sinistra  sopra  sotto
0   False   False   True     False     False  False  False
1   False   False   True     False     False  False  False
2   False   False   True     False     False  False  False
3   False   False   True     False     False  False  False
4   False   False   True     False     False  False  False


# train test split

In [19]:
# Caricamento e suddivisione dei dati
x = df.drop('Label', axis  = 1)
y = binary_labels

In [20]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

# normalizzazione

In [21]:
scaler = MinMaxScaler(feature_range=(0, 1))
x_train  = scaler.fit_transform(x_train)
x_test  = scaler.transform(x_test)

In [22]:
import joblib

joblib.dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [23]:
print(x_train.shape)
y_train.shape

(32916, 3)


(32916, 7)

# convertire i dati per il modello

In [24]:
sequence_length = 5 #sequenza degli istanti temporali

def create_sequences(train, sequence_length):
    
    seq_temp = []
    for i in range(len(train) - sequence_length + 1):
        seq_temp.append(train[i:i + sequence_length])
    return np.array(seq_temp)

x_train = create_sequences(x_train, sequence_length)
x_test = create_sequences(x_test, sequence_length)

print(x_train.shape)
x_test.shape

(32912, 5, 3)


(8226, 5, 3)

In [25]:
#scartiamo da y_test i dati in eccesso che non riescono a creare una sequenza intera
diff = len(y_test) - len(x_test)
print(diff)

if diff > 0:
    y_test = y_test[:-diff]

4


# addestramento

In [26]:
from keras.callbacks import EarlyStopping

In [27]:
#modello
model = Sequential()
model.add(LSTM(64, input_shape=(sequence_length, 3), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32))
model.add(Dropout(0.2))
model.add(Dense(7, activation='softmax'))

early_stopping = EarlyStopping(
    monitor='val_loss',             
    patience=2,                     
    restore_best_weights=True       
)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=20, batch_size=32, validation_split=0.2, 
          callbacks= early_stopping)



  super().__init__(**kwargs)


Epoch 1/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 11ms/step - accuracy: 0.5935 - loss: 0.3426 - val_accuracy: 0.6433 - val_loss: 0.2631
Epoch 2/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.6895 - loss: 0.2265 - val_accuracy: 0.7794 - val_loss: 0.1524
Epoch 3/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.8052 - loss: 0.1438 - val_accuracy: 0.8189 - val_loss: 0.1338
Epoch 4/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.8248 - loss: 0.1309 - val_accuracy: 0.8499 - val_loss: 0.1108
Epoch 5/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8418 - loss: 0.1194 - val_accuracy: 0.8770 - val_loss: 0.0987
Epoch 6/20
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8712 - loss: 0.1058 - val_accuracy: 0.8961 - val_loss: 0.0857
Epoch 7/20
[1m823/823[0m

<keras.src.callbacks.history.History at 0x1c3a4c7f520>

In [28]:
from sklearn.metrics import accuracy_score
# Valutazione

predizioni = model.predict(x_test)
soglia_corretto = 0.6
predicted_labels = (predizioni > soglia_corretto)

accuracy = accuracy_score(y_test, predicted_labels)

[1m258/258[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [29]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9089472404570873


In [33]:
from sklearn.metrics import classification_report

print(classification_report(y_test, predicted_labels, target_names=binary_labels.columns))


              precision    recall  f1-score   support

      avanti       0.90      0.83      0.87       824
      destra       0.91      0.82      0.86       695
       fermo       0.95      0.94      0.95      5273
    indietro       0.79      0.89      0.84       356
    sinistra       0.85      0.83      0.84       183
       sopra       0.90      0.78      0.84       250
       sotto       0.92      0.92      0.92       645

   micro avg       0.93      0.91      0.92      8226
   macro avg       0.89      0.86      0.87      8226
weighted avg       0.93      0.91      0.92      8226
 samples avg       0.91      0.91      0.91      8226



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#model.save('model_movement.keras')