In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [23]:
# Load the data into a pandas DataFrame
data = pd.read_csv('../data/all_data_featured.csv', parse_dates=['time'])

# Extract relevant features and the target variable
features = ['Latitude (°)', 'Longitude (°)', 'Horizontal Accuracy (m)', 'Acceleration x (m/s^2)', 'Acceleration y (m/s^2)', 'Acceleration z (m/s^2)', 'Gyroscope x (rad/s)', 'Gyroscope y (rad/s)', 'Gyroscope z (rad/s)']
target = 'transportation_mode'

print(data.shape)

# Fill missing values (if any) or drop them
data = data.dropna(subset=features + [target])

# data.shape

(33539, 41)


## Pre-process the data

In [24]:
# Normalize the features
scalar = StandardScaler()
data[features] = scalar.fit_transform(data[features])

# One-hot encode the target variable
encoder = OneHotEncoder(sparse_output=False)
encoded_target = encoder.fit_transform(data[[target]])

# Create sequences of data for LSTM input
sequence_length = 10
X, y = [], []
for experiment_id in data['expirement_id'].unique():
    experiment_data = data[data['expirement_id'] == experiment_id]
    for i in range(len(experiment_data) - sequence_length):
        X.append(experiment_data[features].iloc[i:i+sequence_length].values)
        y.append(encoded_target[i + sequence_length])

X = np.array(X)
y = np.array(y)

## Build the LSTM Model

In [25]:
# Define the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(sequence_length, len(features)), return_sequences=False))
model.add(Dense(4, activation='relu'))
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print model summary
model.summary()

  super().__init__(**kwargs)


## Train the model

In [26]:
# Train the model
history = model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.0153 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 2/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0151 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 3/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0156 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 4/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0152 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 5/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0169 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 6/10
[1m834/834[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0154 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 7/10
[1m834/834[0m [32m━━━━━━━━

## Make predictions

In [27]:
# Make predictions
predictions = model.predict(X)


[1m1043/1043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 594us/step


## Evaluate the model

In [28]:
from sklearn.metrics import f1_score, classification_report

# Evaluate the model on the training data
loss, accuracy = model.evaluate(X, y, verbose=0)
print(f'Accuracy: {accuracy:.4f}')
print(f'Loss: {loss:.4f}')

# Make predictions
predictions = model.predict(X)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y, axis=1)

# Calculate F1-score
f1 = f1_score(true_classes, predicted_classes, average='weighted')
print(f'F1-score: {f1:.4f}')

# Extract class names from the encoder
class_names = encoder.categories_[0]

# Print classification report with explicit labels
labels = range(len(class_names))
report = classification_report(true_classes, predicted_classes, target_names=class_names, labels=labels)
print(report)

Accuracy: 0.0129
Loss: nan
[1m1043/1043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 549us/step
F1-score: 0.0003
              precision    recall  f1-score   support

        bike       0.01      1.00      0.03       430
         car       0.00      0.00      0.00     32929
       train       0.00      0.00      0.00         0
        walk       0.00      0.00      0.00         0

   micro avg       0.01      0.01      0.01     33359
   macro avg       0.00      0.25      0.01     33359
weighted avg       0.00      0.01      0.00     33359



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
