# Import

In [2]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter
import joblib


# Data exploring

In [3]:
#data exploring
df = pd.read_csv('DATI.csv')

df

Unnamed: 0,timestamp,accX,accY,accZ,gyroX,gyroY,gyroZ,Label
0,1.733400e+09,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']",,,
1,1.733400e+09,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']",,,
2,1.733400e+09,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']",,,
3,1.733400e+09,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']",,,
4,1.733400e+09,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']",,,
...,...,...,...,...,...,...,...,...
41141,1.733401e+09,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']",,,
41142,1.733401e+09,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']",,,
41143,1.733401e+09,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']",,,
41144,1.733401e+09,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']",,,


# Data cleaning

In [9]:
df = df.drop('gyroY', axis=1)
df = df.drop('Label', axis=1)
df = df.drop('gyroZ', axis=1)
df = df.rename(columns={"gyroX": "Label"})
df

Unnamed: 0,timestamp,accX,accY,accZ,Label
0,1.733400e+09,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.733400e+09,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.733400e+09,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,1.733400e+09,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,1.733400e+09,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...,...
41141,1.733401e+09,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,1.733401e+09,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,1.733401e+09,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']"
41144,1.733401e+09,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


In [10]:
df = df.drop('timestamp', axis = 1)
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,9.898576,-6.979712,-6.392718,"['sotto', 'avanti', 'fermo']"
41144,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


# aggregamento dati

In [11]:

def aggrega(df, colonna, batch_size=5):
    """
    Assegna la label più frequente ogni blocco di 10
    da specificare il df e la ['']
    """
    
    lista = []  
    
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i + batch_size]  
        
        labels = batch[colonna]  #label del batch
        counter = Counter(labels)     #trova la label più frequente 
        most_common_label = counter.most_common(1)[0][0]
        
        lista.extend([most_common_label] * len(batch))
    
    return pd.Series(lista, index=df.index)

In [12]:
df['Label'] = aggrega(df, colonna='Label')

In [13]:
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"['fermo', 'fermo', 'fermo']"
1,1.937141,5.244785,-1.023467,"['fermo', 'fermo', 'fermo']"
2,1.398141,5.400785,-0.677045,"['fermo', 'fermo', 'fermo']"
3,0.800141,5.587785,-0.298913,"['fermo', 'fermo', 'fermo']"
4,0.290141,5.723785,0.281959,"['fermo', 'fermo', 'fermo']"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"['sotto', 'fermo', 'fermo']"
41142,8.776462,-0.726462,-0.613555,"['sotto', 'fermo', 'fermo']"
41143,9.898576,-6.979712,-6.392718,"['sotto', 'fermo', 'fermo']"
41144,8.937576,-0.515378,0.985285,"['sotto', 'fermo', 'fermo']"


# Data cleaning PT 2

In [14]:
# la label è salvata come una stringa unica, quindi la puliamo e dividiamo con strip
# che dividerà in elementi di una lista
df['Label'] = df['Label'].apply(lambda x: x.replace('[',''))
df['Label'] = df['Label'].apply(lambda x: x.replace(']',''))
df['Label'] = df['Label'].apply(lambda x: x.replace('\'',''))

df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,"fermo, fermo, fermo"
1,1.937141,5.244785,-1.023467,"fermo, fermo, fermo"
2,1.398141,5.400785,-0.677045,"fermo, fermo, fermo"
3,0.800141,5.587785,-0.298913,"fermo, fermo, fermo"
4,0.290141,5.723785,0.281959,"fermo, fermo, fermo"
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,"sotto, fermo, fermo"
41142,8.776462,-0.726462,-0.613555,"sotto, fermo, fermo"
41143,9.898576,-6.979712,-6.392718,"sotto, fermo, fermo"
41144,8.937576,-0.515378,0.985285,"sotto, fermo, fermo"


In [15]:
df['Label'] = df['Label'].apply(lambda x: x.split(','))
df['Label']
#trasfroma in lista

0        [fermo,  fermo,  fermo]
1        [fermo,  fermo,  fermo]
2        [fermo,  fermo,  fermo]
3        [fermo,  fermo,  fermo]
4        [fermo,  fermo,  fermo]
                  ...           
41141    [sotto,  fermo,  fermo]
41142    [sotto,  fermo,  fermo]
41143    [sotto,  fermo,  fermo]
41144    [sotto,  fermo,  fermo]
41145    [sotto,  fermo,  fermo]
Name: Label, Length: 41146, dtype: object

In [16]:
def trasforma(df):
    zero = abs(df['accX'])
    uno = abs(df['accY'])
    due = abs(df['accZ'])
    
    if zero > max(uno, due):
        return df['Label'][0].strip()
    elif uno > max(zero, due):
        return df['Label'][1].strip()
    elif due > max(zero, uno):
        return df['Label'][2].strip()

In [17]:
df['Label'] = df.apply(trasforma, axis=1)
df

Unnamed: 0,accX,accY,accZ,Label
0,2.261141,5.214785,-1.392926,fermo
1,1.937141,5.244785,-1.023467,fermo
2,1.398141,5.400785,-0.677045,fermo
3,0.800141,5.587785,-0.298913,fermo
4,0.290141,5.723785,0.281959,fermo
...,...,...,...,...
41141,8.508712,-1.795462,0.818229,sotto
41142,8.776462,-0.726462,-0.613555,sotto
41143,9.898576,-6.979712,-6.392718,sotto
41144,8.937576,-0.515378,0.985285,sotto


In [18]:
df['Label'].value_counts()
#controllo se ci sono label vuote (no)

Label
fermo       26367
avanti       4122
destra       3474
sotto        3230
indietro     1780
sopra        1257
sinistra      916
Name: count, dtype: int64

In [19]:
binary_labels = pd.get_dummies(df['Label'])

# train test split

In [41]:
# Caricamento e suddivisione dei dati
x = df.drop('Label', axis  = 1)
y = binary_labels

In [42]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

# normalizzazione

In [43]:
scaler = MinMaxScaler(feature_range=(0, 1))
x_train  = scaler.fit_transform(x_train)
x_test  = scaler.transform(x_test)

In [44]:
joblib.dump(scaler, 'scaler.joblib')

['scaler.joblib']

In [45]:
print(x_train.shape)
y_train.shape

(32916, 3)


(32916, 7)

# convertire i dati per il modello

In [46]:
sequence_length = 5 #sequenza degli istanti temporali

def create_sequences(train, sequence_length):
    
    seq_temp = []
    for i in range(len(train) - sequence_length + 1):
        seq_temp.append(train[i:i + sequence_length])
    return np.array(seq_temp)

x_train = create_sequences(x_train, sequence_length)
x_test = create_sequences(x_test, sequence_length)

print(x_train.shape)
x_test.shape

(32912, 5, 3)


(8226, 5, 3)

In [47]:
#scartiamo da y_test i dati in eccesso che non riescono a creare una sequenza intera
diff = len(y_test) - len(x_test)
print(diff)

diff2 = len(y_train) - len(x_train)
print(diff2)
if diff > 0:
    y_test = y_test[:-diff]


if diff2 > 0:
    y_train = y_train[:-diff2]
  

4
4


# addestramento

In [48]:
x_train = torch.tensor(x_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)

In [49]:
print(y_test.shape)
print(x_test.shape)
print(y_train.shape)
print(x_train.shape)

torch.Size([8226, 7])
torch.Size([8226, 5, 3])
torch.Size([32912, 7])
torch.Size([32912, 5, 3])


In [50]:
# Definizione del modello
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = self.dropout(lstm_out[:, -1, :])  # Usa l'ultimo timestep
        output = self.fc(lstm_out)
        return output

In [51]:
# Parametri del modello
input_size = x_train.shape[2]
hidden_size = 64
output_size = y_train.shape[1]
model = LSTMModel(input_size, hidden_size, output_size)

# Funzione di perdita e ottimizzatore
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training
num_epochs = 20
batch_size = 32

In [52]:
def train_model(model, criterion, optimizer, x_train, y_train, num_epochs, batch_size):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for i in range(0, len(x_train), batch_size):
            x_batch = x_train[i:i + batch_size]
            y_batch = y_train[i:i + batch_size]

            optimizer.zero_grad()
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(x_train):.4f}")

train_model(model, criterion, optimizer, x_train, y_train, num_epochs, batch_size)

Epoch 1/20, Loss: 0.0092
Epoch 2/20, Loss: 0.0072
Epoch 3/20, Loss: 0.0061
Epoch 4/20, Loss: 0.0052
Epoch 5/20, Loss: 0.0043
Epoch 6/20, Loss: 0.0035
Epoch 7/20, Loss: 0.0027
Epoch 8/20, Loss: 0.0024
Epoch 9/20, Loss: 0.0023
Epoch 10/20, Loss: 0.0022
Epoch 11/20, Loss: 0.0021
Epoch 12/20, Loss: 0.0021
Epoch 13/20, Loss: 0.0021
Epoch 14/20, Loss: 0.0020
Epoch 15/20, Loss: 0.0020
Epoch 16/20, Loss: 0.0020
Epoch 17/20, Loss: 0.0019
Epoch 18/20, Loss: 0.0019
Epoch 19/20, Loss: 0.0019
Epoch 20/20, Loss: 0.0019


In [53]:

# Valutazione
model.eval()
with torch.no_grad():
    predizioni = torch.sigmoid(model(x_test))
    predicted_labels = (predizioni > 0.6).float()

accuracy = accuracy_score(y_test.numpy(), predicted_labels.numpy())
print(f"Accuracy: {accuracy}")

# Classification report
print(classification_report(y_test.numpy(), predicted_labels.numpy(), target_names=binary_labels.columns))



Accuracy: 0.8949671772428884
              precision    recall  f1-score   support

      avanti       0.92      0.66      0.77       824
      destra       0.90      0.90      0.90       695
       fermo       0.95      0.93      0.94      5273
    indietro       0.86      0.87      0.87       356
    sinistra       0.87      0.78      0.82       183
       sopra       0.95      0.72      0.82       250
       sotto       0.84      0.98      0.90       645

   micro avg       0.92      0.90      0.91      8226
   macro avg       0.90      0.84      0.86      8226
weighted avg       0.93      0.90      0.91      8226
 samples avg       0.90      0.90      0.90      8226



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [54]:
# Salva il modello
torch.save(model.state_dict(), 'model_movement.pth')