### Joris

In [1]:
import pandas as pd
import matplotlib.pyplot as plt	
import seaborn as sns
import os
import glob
from pykalman import KalmanFilter
import numpy as np
import matplotlib.ticker as ticker

In [None]:
def plot_sensor_data_plt(sensor_data_dict, time_col="Time (s)"):
    fig, axs = plt.subplots(len(sensor_data_dict), 1, figsize=(15, 20), sharex=True)

    if len(sensor_data_dict) == 1:
        axs = [axs]

    for ax, (label, df) in zip(axs, sensor_data_dict.items()):
        if time_col not in df.columns:
            continue
        df = df.dropna()  # Verwijder lege rijen

        time = df[time_col]
        for col in df.columns:
            if col == time_col:
                continue
            ax.plot(time, df[col], label=col)

        ax.set_title(label)
        ax.legend(loc='upper right')
        ax.grid(False)  # Verwijder gridlines

    axs[-1].set_xlabel('Time (s)')
    plt.tight_layout()
    plt.show()


In [None]:
def plot_sensor_data(sensor_data_dict, time_col="Time (s)", dpi=300,
                              palette=["#FF5733", "#33FF57", "#3357FF"], linewidth=1):
    sns.set(style="whitegrid", context="talk", font_scale=1.1)
    num_plots = len(sensor_data_dict)
    
    fig, axs = plt.subplots(num_plots, 1, figsize=(18, 5 * num_plots), sharex=True, dpi=dpi)
    if num_plots == 1:
        axs = [axs]

    for ax, (label, df) in zip(axs, sensor_data_dict.items()):
        if time_col not in df.columns:
            continue

        df_long = df.melt(id_vars=[time_col], var_name="Sensor Axis", value_name="Value")

        sns.set_palette(palette)
        sns.lineplot(data=df_long, x=time_col, y="Value", hue="Sensor Axis", ax=ax, linewidth=linewidth)

        ax.set_title(label, weight='bold', fontsize=14)
        ax.set_ylabel("Value")
        ax.grid(False)

        # Legend outside
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.85, box.height])  # Shrink plot to make space
        ax.legend(title="Axis", loc='center left', bbox_to_anchor=(1.01, 0.5), fontsize='small', title_fontsize='small')

        ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: f'{x:g}'))

    axs[-1].set_xlabel("Time (s)", weight='bold')
    
    plt.show()

Joris

In [2]:
metro_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Accelerometer.csv")
metro_barometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Barometer.csv")
metro_gyroscope_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Gyroscope.csv")
metro_linear_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Linear Accelerometer.csv")
metro_location_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Location.csv")
metro_magnetometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Magnetometer.csv")
metro_proximity_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_joris\Proximity.csv")

In [3]:
sensor_data_dict_metro_2 = {
    "Accelerometer": metro_accelerometer_df_2,
    "Gyroscope": metro_gyroscope_df_2,
    "Linear Accelerometer": metro_linear_accelerometer_df_2,
    "Magnetometer": metro_magnetometer_df_2,
    "Barometer": metro_barometer_df_2,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}


In [4]:
accelerometer_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Accelerometer.csv")
barometer_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Barometer.csv")
gyroscope_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Gyroscope.csv")
linear_accelerometer_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Linear Accelerometer.csv")
location_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Location.csv")
magnetometer_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Magnetometer.csv")
proximity_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_fietsen_joris\Proximity.csv")

In [5]:
sensor_data_dict_fietsen_2 = {
    "Accelerometer": accelerometer_df_fietsen_2,
    "Gyroscope": gyroscope_df_fietsen_2,
    "Linear Accelerometer": linear_accelerometer_df_fietsen_2,
    "Magnetometer": magnetometer_df_fietsen_2,
    "Barometer": barometer_df_fietsen_2
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

In [None]:
# auto_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Accelerometer.csv")
# auto_barometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Barometer.csv")
# auto_gyroscope_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Gyroscope.csv")
# auto_linear_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Linear Accelerometer.csv")
# auto_location_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Location.csv")
# auto_magnetometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Magnetometer.csv")
# auto_proximity_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris\Proximity.csv")

In [None]:
# sensor_data_dict_auto_2 = {
#     "Accelerometer": auto_accelerometer_df_2,
#     "Gyroscope": auto_gyroscope_df_2,
#     "Linear Accelerometer": auto_linear_accelerometer_df_2,
#     "Magnetometer": auto_magnetometer_df_2,
#     "Barometer": auto_barometer_df_2,
#     # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
# }



In [7]:
auto_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Accelerometer.csv")
auto_barometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Barometer.csv")
auto_gyroscope_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Gyroscope.csv")
auto_linear_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Linear Accelerometer.csv")
auto_location_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Location.csv")
auto_magnetometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Magnetometer.csv")
auto_proximity_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_joris_2\Proximity.csv")

In [8]:
sensor_data_dict_auto_2 = {
    "Accelerometer": auto_accelerometer_df_2,
    "Gyroscope": auto_gyroscope_df_2,
    "Linear Accelerometer": auto_linear_accelerometer_df_2,
    "Magnetometer": auto_magnetometer_df_2,
    "Barometer": auto_barometer_df_2,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

In [9]:
rennen_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Accelerometer.csv")
rennen_barometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Barometer.csv")
rennen_gyroscope_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Gyroscope.csv")
rennen_linear_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Linear Accelerometer.csv")
rennen_location_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Location.csv")
rennen_magnetometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Magnetometer.csv")
rennen_proximity_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_joris\Proximity.csv")

In [10]:
sensor_data_dict_rennen_2 = {
    "Accelerometer": rennen_accelerometer_df_2,
    "Gyroscope": rennen_gyroscope_df_2,
    "Linear Accelerometer": rennen_linear_accelerometer_df_2,
    "Magnetometer": rennen_magnetometer_df_2,
    "Barometer": rennen_barometer_df_2,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

Puck

In [144]:
ren_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Accelerometer.csv")
ren_barometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Barometer.csv")
ren_gyroscope_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Gyroscope.csv")
ren_linear_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Linear Accelerometer.csv")
ren_location_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Location.csv")
ren_magnetometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Magnetometer.csv")
ren_proximity_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_rennen_puck\Proximity.csv")

In [145]:
sensor_data_dict_rennen = {
    "Accelerometer": ren_accelerometer_df,
    "Gyroscope": ren_gyroscope_df,
    "Linear Accelerometer": ren_linear_accelerometer_df,
    "Magnetometer": ren_magnetometer_df,
    "Barometer": ren_barometer_df,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}



In [146]:
fiets_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Accelerometer.csv")
fiets_barometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Barometer.csv")
fiets_gyroscope_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Gyroscope.csv")
fiets_linear_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Linear Accelerometer.csv")
fiets_location_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Location.csv")
fiets_magnetometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Magnetometer.csv")
fiets_proximity_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_2_fietsen_puck\Proximity.csv")


In [147]:
sensor_data_dict_fietsen = {
    "Accelerometer": fiets_accelerometer_df,
    "Gyroscope": fiets_gyroscope_df,
    "Linear Accelerometer": fiets_linear_accelerometer_df,
    "Magnetometer": fiets_magnetometer_df,
    "Barometer": fiets_barometer_df,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}




In [148]:
trein_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Accelerometer.csv")
trein_barometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Barometer.csv")
trein_gyroscope_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Gyroscope.csv")
trein_linear_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Linear Accelerometer.csv")
trein_location_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Location.csv")
trein_magnetometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Magnetometer.csv")
trein_proximity_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_puck\Proximity.csv")

In [149]:
sensor_data_dict_trein = {
    "Accelerometer": trein_accelerometer_df,
    "Gyroscope": trein_gyroscope_df,
    "Linear Accelerometer": trein_linear_accelerometer_df,
    "Magnetometer": trein_magnetometer_df,
    "Barometer": trein_barometer_df,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

In [150]:
auto_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Accelerometer.csv")
auto_barometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Barometer.csv")
auto_gyroscope_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Gyroscope.csv")   
auto_linear_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Linear Accelerometer.csv")
auto_location_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Location.csv")
auto_magnetometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Magnetometer.csv")
auto_proximity_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_auto_puck\Proximity.csv")

In [151]:
sensor_data_dict_auto = {
    "Accelerometer": auto_accelerometer_df,
    "Gyroscope": auto_gyroscope_df,
    "Linear Accelerometer": auto_linear_accelerometer_df,
    "Magnetometer": auto_magnetometer_df,
    "Barometer": auto_barometer_df,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

In [152]:
metro_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Accelerometer.csv")
metro_barometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Barometer.csv")
metro_gyroscope_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Gyroscope.csv")
metro_linear_accelerometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Linear Accelerometer.csv")
metro_location_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Location.csv")
metro_magnetometer_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Magnetometer.csv")
metro_proximity_df = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_metro_puck\Proximity.csv")

In [153]:
sensor_data_dict_metro = {
    "Accelerometer": metro_accelerometer_df,
    "Gyroscope": metro_gyroscope_df,
    "Linear Accelerometer": metro_linear_accelerometer_df,
    "Magnetometer": metro_magnetometer_df,
    "Barometer": metro_barometer_df,
    # Voeg hier eventueel meer toe zoals hartslag, licht, labels etc.
}

Sjoerd

In [11]:
trein_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Accelerometer.csv")
trein_barometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Barometer.csv")
trein_gyroscope_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Gyroscope.csv")
trein_linear_accelerometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Linear Accelerometer.csv")
trein_location_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Location.csv")
trein_magnetometer_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Magnetometer.csv")
trein_proximity_df_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\experiment_trein_sjoerd\Proximity.csv")


In [12]:
sensor_data_dict_trein_2 = {
    "Accelerometer": trein_accelerometer_df_2,
    "Gyroscope": trein_gyroscope_df_2,
    "Linear Accelerometer": trein_linear_accelerometer_df_2,
    "Magnetometer": trein_magnetometer_df_2,
    "Barometer": trein_barometer_df_2,
    # Voeg hier eventueel meer sensoren toe indien beschikbaar
}



In [None]:
# def create_summary_table(sensor_data_dict, mode_name):
#     summaries = []
#     for sensor_name, df in sensor_data_dict.items():
#         desc = df.describe().T[['mean', 'std', 'min', 'max', 'count']]
#         desc['sensor'] = sensor_name
#         desc['feature'] = desc.index
#         desc['mode'] = mode_name
#         summaries.append(desc.reset_index(drop=True))
    
#     summary_df = pd.concat(summaries, ignore_index=True)
#     summary_df = summary_df[['mode', 'sensor', 'feature', 'count', 'mean', 'std', 'min', 'max']]
#     return summary_df

# # Voorbeeld met meerdere modi
# summary_fietsen = create_summary_table(sensor_data_dict_fietsen, "Bike")
# summary_auto = create_summary_table(sensor_data_dict_auto, "Car")
# summary_runnen = create_summary_table(sensor_data_dict_rennen, "Running")
# summary_trein = create_summary_table(sensor_data_dict_trein, "Train")
# summary_metro = create_summary_table(sensor_data_dict, "Metro")

# # Alles combineren in één grote tabel
# combined_summary = pd.concat([summary_fietsen, summary_auto, summary_metro, summary_trein, summary_runnen], ignore_index=True)

# print(combined_summary.head())
# combined_summary.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data\summary_sensor_data.csv", index=False)

Kalman

In [None]:
# def apply_kalman_and_impute(df, columns, time_col="Time (s)", threshold_std=3):
#     result_df = df.copy()
#     for col in columns:
#         data = df[col].values
#         # Kalman filter instellen
#         kf = KalmanFilter(initial_state_mean=0, n_dim_obs=1)
#         kf = kf.em(data, n_iter=5)
#         filtered_state_means, _ = kf.filter(data)

#         # Outlier detectie
#         residuals = data - filtered_state_means.flatten()
#         threshold = threshold_std * np.std(residuals)
#         outliers = np.abs(residuals) > threshold

#         # Imputatie
#         result_df[col + "_filtered"] = filtered_state_means
#         result_df[col + "_imputed"] = data.copy()
#         result_df.loc[outliers, col + "_imputed"] = filtered_state_means.flatten()[outliers]

#         # Plot (optioneel)
#         plt.figure(figsize=(12, 4))
#         plt.plot(df[time_col], data, label='Origineel', alpha=0.5)
#         plt.plot(df[time_col], filtered_state_means, label='Kalman Filter', linewidth=2)
#         plt.plot(df[time_col], result_df[col + "_imputed"], label='Imputed', linestyle='--')
#         plt.scatter(df[time_col][outliers], data[outliers], color='red', label='Outliers', zorder=5)
#         plt.title(f"Kalman filtering & imputatie - {col}")
#         plt.legend()
#         plt.xlabel("Tijd (s)")
#         plt.ylabel(col)
#         plt.grid(False)
#         plt.tight_layout()
#         plt.show()

#     return result_df

### Creëeren dataset

In [None]:
# trein_accelerometer_df.columns = [col.strip().strip('"') for col in trein_accelerometer_df.columns]  # Schoon kolomnamen op

# # Maak een nieuwe kolom 'second' als integer tijd
# trein_accelerometer_df['second'] = trein_accelerometer_df['Time (s)'].astype(float).astype(int)

# # Gemiddelde per seconde berekenen
# df_avg = trein_accelerometer_df.groupby('second').mean().reset_index()

# # Optioneel: hernoem 'second' terug naar 'Time (s)' als je dat prettiger vindt
# df_avg = df_avg.rename(columns={'second': 'Time (s)'})

In [13]:
from functools import reduce

In [14]:
def aggregate_sensor_data(sensor_data_dict):
    aggregated_dfs = []

    for name, df in sensor_data_dict.items():
        # Kolomnamen opschonen
        df.columns = [col.strip().strip('"') for col in df.columns]

        # Tijdkolom gebruiken
        time_col = df.columns[0]
        df['second'] = df[time_col].astype(float).astype(int)

        # Gemiddelde per seconde
        df_avg = df.groupby('second').mean().reset_index()

        # Hernoemen van kolommen
        df_avg = df_avg.rename(columns={col: f"{name}_{col}" for col in df_avg.columns if col != 'second'})
        aggregated_dfs.append(df_avg)

    # Merge alles op 'second'
    final_df = reduce(lambda left, right: pd.merge(left, right, on='second', how='outer'), aggregated_dfs)

    # Sorteren en opschonen
    final_df = final_df.sort_values('second').reset_index(drop=True)
    final_df = final_df.rename(columns={'second': 'Time (s)'})
    columns_to_drop = [col for col in final_df.columns if 'Time' in col and col != 'Time (s)']
    final_df = final_df.drop(columns=columns_to_drop)

    return final_df

In [158]:
final_df_metro = aggregate_sensor_data(sensor_data_dict_metro)
final_df_rennen = aggregate_sensor_data(sensor_data_dict_rennen)
final_df_fietsen = aggregate_sensor_data(sensor_data_dict_fietsen)
final_df_trein = aggregate_sensor_data(sensor_data_dict_trein)

final_df_auto_pre = aggregate_sensor_data(sensor_data_dict_auto)
final_df_auto = final_df_auto_pre[final_df_auto_pre['Time (s)'] > 120]

In [15]:
final_df_fietsen_2 = aggregate_sensor_data(sensor_data_dict_fietsen_2)
final_df_auto_2 = aggregate_sensor_data(sensor_data_dict_auto_2)
final_df_rennen_2 = aggregate_sensor_data(sensor_data_dict_rennen_2)
final_df_metro_2 = aggregate_sensor_data(sensor_data_dict_metro_2)
final_df_trein_2 = aggregate_sensor_data(sensor_data_dict_trein_2)

In [160]:
final_df_metro = final_df_metro[(final_df_metro['Time (s)'] > 10) & (final_df_metro['Time (s)'] < final_df_metro['Time (s)'].max() - 10)]
final_df_rennen = final_df_rennen[(final_df_rennen['Time (s)'] > 10) & (final_df_rennen['Time (s)'] < final_df_rennen['Time (s)'].max() - 10)]
final_df_fietsen = final_df_fietsen[(final_df_fietsen['Time (s)'] > 10) & (final_df_fietsen['Time (s)'] < final_df_fietsen['Time (s)'].max() - 10)]
final_df_trein = final_df_trein[(final_df_trein['Time (s)'] > 10) & (final_df_trein['Time (s)'] < final_df_trein['Time (s)'].max() - 10)]
final_df_auto = final_df_auto[(final_df_auto['Time (s)'] > 10) & (final_df_auto['Time (s)'] < final_df_auto['Time (s)'].max() - 10)]

In [16]:
final_df_fietsen_2 = final_df_fietsen_2[(final_df_fietsen_2['Time (s)'] > 10) & (final_df_fietsen_2['Time (s)'] < final_df_fietsen_2['Time (s)'].max() - 10)]
final_df_auto_2 = final_df_auto_2[(final_df_auto_2['Time (s)'] > 10) & (final_df_auto_2['Time (s)'] < final_df_auto_2['Time (s)'].max() - 10)]
final_df_rennen_2 = final_df_rennen_2[(final_df_rennen_2['Time (s)'] > 10) & (final_df_rennen_2['Time (s)'] < final_df_rennen_2['Time (s)'].max() - 10)]
final_df_metro_2 = final_df_metro_2[(final_df_metro_2['Time (s)'] > 10) & (final_df_metro_2['Time (s)'] < final_df_metro_2['Time (s)'].max() - 10)]
final_df_trein_2 = final_df_trein_2[(final_df_trein_2['Time (s)'] > 10) & (final_df_trein_2['Time (s)'] < final_df_trein_2['Time (s)'].max() - 10)]

In [None]:
final_df_rennen

In [162]:
final_df_metro['Barometer_X (hPa)'] = final_df_metro['Barometer_X (hPa)'].interpolate(method='linear')
final_df_rennen['Barometer_X (hPa)'] = final_df_rennen['Barometer_X (hPa)'].interpolate(method='linear')
final_df_fietsen['Barometer_X (hPa)'] = final_df_fietsen['Barometer_X (hPa)'].interpolate(method='linear')
final_df_trein['Barometer_X (hPa)'] = final_df_trein['Barometer_X (hPa)'].interpolate(method='linear')
final_df_auto['Barometer_X (hPa)'] = final_df_auto['Barometer_X (hPa)'].interpolate(method='linear')

In [17]:
final_df_fietsen_2['Barometer_X (hPa)'] = final_df_fietsen_2['Barometer_X (hPa)'].interpolate(method='linear')
final_df_auto_2['Barometer_X (hPa)'] = final_df_auto_2['Barometer_X (hPa)'].interpolate(method='linear')
final_df_rennen_2['Barometer_X (hPa)'] = final_df_rennen_2['Barometer_X (hPa)'].interpolate(method='linear')
final_df_metro_2['Barometer_X (hPa)'] = final_df_metro_2['Barometer_X (hPa)'].interpolate(method='linear')
final_df_trein_2['Barometer_X (hPa)'] = final_df_trein_2['Barometer_X (hPa)'].interpolate(method='linear')

In [None]:
final_df_metro.isna().sum()

In [164]:
final_df_metro.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\metro.csv", index=False)
final_df_rennen.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\rennen.csv", index=False)
final_df_fietsen.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\fietsen.csv", index=False)
final_df_trein.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\trein.csv", index=False)
final_df_auto.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\auto.csv", index=False)

In [18]:
final_df_metro_2.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\metro2.csv", index=False)
final_df_rennen_2.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\rennen2.csv", index=False)
final_df_fietsen_2.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\fietsen2.csv", index=False)
final_df_auto_2.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\auto2.csv", index=False)
final_df_trein_2.to_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\data_voor_puck\trein2.csv", index=False)

In [19]:
final_df_rennen = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\rennen_kalman_filtered.csv")
final_df_auto = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\auto_kalman_filtered.csv")
final_df_fietsen = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\fietsen_kalman_filtered.csv")
final_df_trein = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\trein_kalman_filtered.csv")
final_df_metro = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\metro_kalman_filtered.csv")

In [20]:
final_df_rennen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\rennen2_kalman_filtered.csv")
final_df_auto_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\auto2_kalman_filtered.csv")
final_df_fietsen_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\fietsen2_kalman_filtered.csv")
final_df_trein_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\trein2_kalman_filtered.csv")
final_df_metro_2 = pd.read_csv(r"C:\Users\jdrij\OneDrive\Bureaublad\Master\ML_Github\ML4QS-project\final_datasets\metro2_kalman_filtered.csv")

## Lagged features

In [21]:
def add_lagged_features(df, lags=[1, 3, 10]):
    """
    Voeg lagged statistical features toe:
    - std voor lags > 1
    - mean voor niet-gyroscoop features
    - sum voor gyroscoop features
    Kolomnamen met eenheden worden gestandaardiseerd.
    """
    rename_map = {
        "Accelerometer_X (m/s^2)_kalman": "Accelerometer_X",
        "Accelerometer_Y (m/s^2)_kalman": "Accelerometer_Y",
        "Accelerometer_Z (m/s^2)_kalman": "Accelerometer_Z",
        "Gyroscope_X (rad/s)_kalman": "Gyroscope_X",
        "Gyroscope_Y (rad/s)_kalman": "Gyroscope_Y",
        "Gyroscope_Z (rad/s)_kalman": "Gyroscope_Z",
        "Linear Accelerometer_X (m/s^2)_kalman": "Linear_Accelerometer_X",
        "Linear Accelerometer_Y (m/s^2)_kalman": "Linear_Accelerometer_Y",
        "Linear Accelerometer_Z (m/s^2)_kalman": "Linear_Accelerometer_Z",
        "Magnetometer_X (µT)_kalman": "Magnetometer_X",
        "Magnetometer_Y (µT)_kalman": "Magnetometer_Y",
        "Magnetometer_Z (µT)_kalman": "Magnetometer_Z",
        "Barometer_X (hPa)_kalman": "Barometer_X",
        "Time (s)": "Time"
    }


    # Kolomnamen opschonen en hernoemen
    df.columns = df.columns.str.strip()
    df = df.rename(columns=rename_map)

    gyroscope_cols = ["Gyroscope_X", "Gyroscope_Y", "Gyroscope_Z"]
    all_feature_cols = [col for col in rename_map.values() if not col.startswith('Time')]
    non_gyro_cols = [col for col in all_feature_cols if col not in gyroscope_cols]

    for col in all_feature_cols:
        if col not in df.columns:
            print(f"⚠️ Kolom '{col}' ontbreekt in DataFrame. Sla over.")
            continue

        for lag in lags:
            # Alleen std voor lag > 1
            if lag > 1:
                df[f"{col}_std_{lag}"] = df[col].rolling(window=lag).std()

            if col in non_gyro_cols:
                df[f"{col}_mean_{lag}"] = df[col].rolling(window=lag).mean()

            if col in gyroscope_cols:
                df[f"{col}_sum_{lag}"] = df[col].rolling(window=lag).sum()

    #df.dropna(inplace=True)
    return df

In [22]:
def add_lagged_features_2(df, lags=[1, 3, 10]):
    """
    Voeg lagged statistical features toe:
    - std voor lags > 1
    - mean voor niet-gyroscoop features
    - sum voor gyroscoop features
    Kolomnamen met eenheden worden gestandaardiseerd.
    """
    rename_map = {
        "Accelerometer_X (m/s^2)": "Accelerometer_X",
        "Accelerometer_Y (m/s^2)": "Accelerometer_Y",
        "Accelerometer_Z (m/s^2)": "Accelerometer_Z",
        "Gyroscope_X (rad/s)": "Gyroscope_X",
        "Gyroscope_Y (rad/s)": "Gyroscope_Y",
        "Gyroscope_Z (rad/s)": "Gyroscope_Z",
        "Linear Accelerometer_X (m/s^2)": "Linear Accelerometer_X",
        "Linear Accelerometer_Y (m/s^2)": "Linear Accelerometer_Y",
        "Linear Accelerometer_Z (m/s^2)": "Linear Accelerometer_Z",
        "Magnetometer_X (µT)": "Magnetometer_X",
        "Magnetometer_Y (µT)": "Magnetometer_Y",
        "Magnetometer_Z (µT)": "Magnetometer_Z",
        "Barometer_X (hPa)": "Barometer_X"
    }

    # Kolomnamen opschonen en hernoemen
    df.columns = df.columns.str.strip()
    df = df.rename(columns=rename_map)

    gyroscope_cols = ["Gyroscope_X", "Gyroscope_Y", "Gyroscope_Z"]
    all_feature_cols = [col for col in rename_map.values() if not col.startswith('Time')]
    non_gyro_cols = [col for col in all_feature_cols if col not in gyroscope_cols]

    for col in all_feature_cols:
        if col not in df.columns:
            print(f"⚠️ Kolom '{col}' ontbreekt in DataFrame. Sla over.")
            continue

        for lag in lags:
            # Alleen std voor lag > 1
            if lag > 1:
                df[f"{col}_std_{lag}"] = df[col].rolling(window=lag).std()

            if col in non_gyro_cols:
                df[f"{col}_mean_{lag}"] = df[col].rolling(window=lag).mean()

            if col in gyroscope_cols:
                df[f"{col}_sum_{lag}"] = df[col].rolling(window=lag).sum()

    #df.dropna(inplace=True)
    return df


In [23]:
metro = add_lagged_features(final_df_metro).iloc[9:]
rennen = add_lagged_features(final_df_rennen).iloc[9:]
fietsen = add_lagged_features(final_df_fietsen).iloc[9:]
trein = add_lagged_features(final_df_trein).iloc[9:]
auto = add_lagged_features(final_df_auto).iloc[9:]

In [24]:
fietsen2 = add_lagged_features(final_df_fietsen_2).iloc[9:]
auto2 = add_lagged_features(final_df_auto_2).iloc[9:]
rennen2 = add_lagged_features(final_df_rennen_2).iloc[9:]
metro2 = add_lagged_features(final_df_metro_2).iloc[9:]
trein2 = add_lagged_features(final_df_trein_2).iloc[9:]

In [25]:
metro['acc_magnitude'] = np.sqrt(metro['Accelerometer_X']**2 + metro['Accelerometer_Y']**2 + metro['Accelerometer_Z']**2)
rennen['acc_magnitude'] = np.sqrt(rennen['Accelerometer_X']**2 + rennen['Accelerometer_Y']**2 + rennen['Accelerometer_Z']**2)
fietsen['acc_magnitude'] = np.sqrt(fietsen['Accelerometer_X']**2 + fietsen['Accelerometer_Y']**2 + fietsen['Accelerometer_Z']**2)
trein['acc_magnitude'] = np.sqrt(trein['Accelerometer_X']**2 + trein['Accelerometer_Y']**2 + trein['Accelerometer_Z']**2)
auto['acc_magnitude'] = np.sqrt(auto['Accelerometer_X']**2 + auto['Accelerometer_Y']**2 + auto['Accelerometer_Z']**2)

In [26]:
metro2['acc_magnitude'] = np.sqrt(metro2['Accelerometer_X']**2 + metro2['Accelerometer_Y']**2 + metro2['Accelerometer_Z']**2)
rennen2['acc_magnitude'] = np.sqrt(rennen2['Accelerometer_X']**2 + rennen2['Accelerometer_Y']**2 + rennen2['Accelerometer_Z']**2)
fietsen2['acc_magnitude'] = np.sqrt(fietsen2['Accelerometer_X']**2 + fietsen2['Accelerometer_Y']**2 + fietsen2['Accelerometer_Z']**2)
trein2['acc_magnitude'] = np.sqrt(trein2['Accelerometer_X']**2 + trein2['Accelerometer_Y']**2 + trein2['Accelerometer_Z']**2)
auto2['acc_magnitude'] = np.sqrt(auto2['Accelerometer_X']**2 + auto2['Accelerometer_Y']**2 + auto2['Accelerometer_Z']**2)

In [27]:
metro['Linear_acc_magnitude'] = np.sqrt(metro['Linear_Accelerometer_X']**2 + metro['Linear_Accelerometer_Y']**2 + metro['Linear_Accelerometer_Z']**2)
rennen['Linear_acc_magnitude'] = np.sqrt(rennen['Linear_Accelerometer_X']**2 + rennen['Linear_Accelerometer_Y']**2 + rennen['Linear_Accelerometer_Z']**2)
fietsen['Linear_acc_magnitude'] = np.sqrt(fietsen['Linear_Accelerometer_X']**2 + fietsen['Linear_Accelerometer_Y']**2 + fietsen['Linear_Accelerometer_Z']**2)
trein['Linear_acc_magnitude'] = np.sqrt(trein['Linear_Accelerometer_X']**2 + trein['Linear_Accelerometer_Y']**2 + trein['Linear_Accelerometer_Z']**2)
auto['Linear_acc_magnitude'] = np.sqrt(auto['Linear_Accelerometer_X']**2 + auto['Linear_Accelerometer_Y']**2 + auto['Linear_Accelerometer_Z']**2)

In [28]:
metro2['Linear_acc_magnitude'] = np.sqrt(metro2['Linear_Accelerometer_X']**2 + metro2['Linear_Accelerometer_Y']**2 + metro2['Linear_Accelerometer_Z']**2)
rennen2['Linear_acc_magnitude'] = np.sqrt(rennen2['Linear_Accelerometer_X']**2 + rennen2['Linear_Accelerometer_Y']**2 + rennen2['Linear_Accelerometer_Z']**2)
fietsen2['Linear_acc_magnitude'] = np.sqrt(fietsen2['Linear_Accelerometer_X']**2 + fietsen2['Linear_Accelerometer_Y']**2 + fietsen2['Linear_Accelerometer_Z']**2)       
trein2['Linear_acc_magnitude'] = np.sqrt(trein2['Linear_Accelerometer_X']**2 + trein2['Linear_Accelerometer_Y']**2 + trein2['Linear_Accelerometer_Z']**2)
auto2['Linear_acc_magnitude'] = np.sqrt(auto2['Linear_Accelerometer_X']**2 + auto2['Linear_Accelerometer_Y']**2 + auto2['Linear_Accelerometer_Z']**2)

In [29]:
missing1 = rennen.isnull().sum()
missing1 = missing1[missing1 > 0]  # Alleen kolommen met > 0 NaNs
missing1

Series([], dtype: int64)

### XGBoost

In [30]:
metro['transport_mode'] = 'metro'
rennen['transport_mode'] = 'rennen'
fietsen['transport_mode'] = 'fietsen'
trein['transport_mode'] = 'trein'
auto['transport_mode'] = 'auto'

In [31]:
fietsen2['transport_mode'] = 'fietsen'
auto2['transport_mode'] = 'auto'
rennen2['transport_mode'] = 'rennen'
metro2['transport_mode'] = 'metro'
trein2['transport_mode'] = 'trein'

In [43]:
train_metro = metro.iloc[:776].copy()
train_rennen = rennen.iloc[:776].copy()
train_fietsen = fietsen.iloc[:776].copy()
train_trein = trein.iloc[:776].copy()
train_auto = auto.iloc[:776].copy()

In [44]:
print("Max Time (s) values for each DataFrame:")
print("train_rennen:", len(train_rennen["Time"]))
print("train_fietsen:", len(train_fietsen["Time"]))
print("train_trein:", len(train_trein["Time"]))
print("train_auto:", len(train_auto["Time"]))
print("train_metro:", len(train_metro["Time"]))

Max Time (s) values for each DataFrame:
train_rennen: 776
train_fietsen: 776
train_trein: 776
train_auto: 776
train_metro: 776


In [45]:
test_fietsen2 = fietsen2[:776].copy()
test_auto2 = auto2[:776].copy()
test_rennen2 = rennen2[:776].copy()
test_metro2 = metro2[:776].copy()
test_trein2 = trein2[:776].copy()

In [46]:
print("Max Time (s) values for each DataFrame:")
print("test_fietsen2:", len(test_fietsen2["Time"]))
print("test_auto2:", len(test_auto2["Time"]))
print("test_rennen2:", len(test_rennen2["Time"]))   
print("test_metro2:", len(test_metro2["Time"]))
print("test_trein2:", len(test_trein2["Time"]))

Max Time (s) values for each DataFrame:
test_fietsen2: 776
test_auto2: 776
test_rennen2: 776
test_metro2: 776
test_trein2: 776


In [None]:
# train_metro = train_metro.loc[:, ~train_metro.columns.str.startswith('Accelerometer')]
# train_rennen = train_rennen.loc[:, ~train_rennen.columns.str.startswith('Accelerometer')]
# train_fietsen = train_fietsen.loc[:, ~train_fietsen.columns.str.startswith('Accelerometer')]
# train_trein = train_trein.loc[:, ~train_trein.columns.str.startswith('Accelerometer')]
# train_auto = train_auto.loc[:, ~train_auto.columns.str.startswith('Accelerometer')]

In [None]:
# test_fietsen2 = test_fietsen2.loc[:, ~test_fietsen2.columns.str.startswith('Accelerometer')]

In [47]:
train_df = pd.concat([train_metro, train_rennen, train_fietsen, train_trein, train_auto], ignore_index=True)

In [48]:
test_df = pd.concat([test_metro2, test_rennen2, test_fietsen2, test_trein2, test_auto2], ignore_index=True)

In [49]:
train_columns = set(train_df.columns)
test_columns = set(test_df.columns)

# Verschillen vinden
only_in_train = train_columns - test_columns
only_in_test = test_columns - train_columns

print("Kolommen die alleen in train_df_all staan:", only_in_train)
print("Kolommen die alleen in test_df staan:", only_in_test)

Kolommen die alleen in train_df_all staan: set()
Kolommen die alleen in test_df staan: set()


In [None]:
train_df.columns = train_df.columns.str.replace(' ', '_')
test_df.columns = test_df.columns.str.replace(' ', '_')

In [None]:
original_df = train_df_all.copy()
original_test = test_df.copy()

In [None]:
train_df_all = original_df[original_df.columns[:11].tolist() + ['transport_mode']]
test_df = original_test[original_test.columns[:11].tolist() + ['transport_mode']]

In [None]:
train_df_all.shape

In [None]:
train_df_all.columns

In [None]:
new_rows = []
for transport_mode in ['metro', 'trein', 'rennen']:
    # Create a row where 'transport_mode' is set and all other columns are 0.0
    row = {col: 0.0 for col in test_df.columns if col != 'transport_mode'}
    row['transport_mode'] = transport_mode
    new_rows.append(row)

# Convert to a DataFrame
new_rows_df = pd.DataFrame(new_rows)

# Concatenate the new rows to the existing test_df
test_df = pd.concat([test_df, new_rows_df], ignore_index=True)

XGBoost

In [50]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Features en labels splitsen
X_train = train_df.drop(columns=['transport_mode', 'Time'])  # drop Time en label
y_train = train_df['transport_mode']

X_test = test_df.drop(columns=['transport_mode', 'Time'])
y_test = test_df['transport_mode']

# Encode labels (strings → integers)
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

label_mapping = dict(zip(le.classes_, range(len(le.classes_))))
print("Label to integer mapping:", label_mapping)


Label to integer mapping: {'auto': 0, 'fietsen': 1, 'metro': 2, 'rennen': 3, 'trein': 4}


In [51]:
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train_enc)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [52]:
y_pred = model.predict(X_test)
print(classification_report(y_test_enc, y_pred, target_names=le.classes_))


              precision    recall  f1-score   support

        auto       0.45      1.00      0.62       776
     fietsen       0.00      0.00      0.00       776
       metro       0.89      1.00      0.94       776
      rennen       1.00      0.91      0.95       776
       trein       0.00      0.00      0.00       776

    accuracy                           0.58      3880
   macro avg       0.47      0.58      0.50      3880
weighted avg       0.47      0.58      0.50      3880



In [53]:
from sklearn.metrics import accuracy_score

# Accuracy
accuracy_xgb = accuracy_score(y_test_enc, y_pred)
print(f"Accuracy: {accuracy_xgb:.4f}")


Accuracy: 0.5827


In [54]:
print("Train accuracy:", model.score(X_train, y_train_enc))

Train accuracy: 1.0


In [55]:
from sklearn.metrics import accuracy_score, confusion_matrix

# Create a DataFrame comparing actual vs predicted values
comparison_df_xgb = pd.DataFrame({
    'Actual': y_test_enc,
    'Predicted': y_pred
})

# Add readable label names
comparison_df_xgb['Actual_Label'] = le.inverse_transform(comparison_df_xgb['Actual'])
comparison_df_xgb['Predicted_Label'] = le.inverse_transform(comparison_df_xgb['Predicted'])

# Show a sample of comparisons
print("=== XGBoost Prediction Comparison ===")
print(comparison_df_xgb.head(10))

# Count correct predictions
correct_xgb = (comparison_df_xgb['Actual'] == comparison_df_xgb['Predicted']).sum()
total_xgb = len(comparison_df_xgb)
incorrect_xgb = total_xgb - correct_xgb
accuracy_xgb = accuracy_score(y_test_enc, y_pred)

# Summary
print(f"\nTotal predictions: {total_xgb}")
print(f"Correct predictions: {correct_xgb}")
print(f"Incorrect predictions: {incorrect_xgb}")
print(f"Accuracy: {accuracy_xgb:.4f}")

=== XGBoost Prediction Comparison ===
   Actual  Predicted Actual_Label Predicted_Label
0       2          2        metro           metro
1       2          2        metro           metro
2       2          2        metro           metro
3       2          2        metro           metro
4       2          2        metro           metro
5       2          2        metro           metro
6       2          2        metro           metro
7       2          2        metro           metro
8       2          2        metro           metro
9       2          2        metro           metro

Total predictions: 3880
Correct predictions: 2261
Incorrect predictions: 1619
Accuracy: 0.5827


Randomforest

In [56]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Features en labels splitsen
X_train_rf = train_df.drop(columns=['transport_mode', 'Time'])
y_train_rf = train_df['transport_mode']

X_test_rf = test_df.drop(columns=['transport_mode', 'Time'])
y_test_rf = test_df['transport_mode']

# Encode labels (strings → integers)
rf_label_encoder = LabelEncoder()
y_train_rf_enc = rf_label_encoder.fit_transform(y_train_rf)
y_test_rf_enc = rf_label_encoder.transform(y_test_rf)

# Mapping afdrukken
rf_label_mapping = dict(zip(rf_label_encoder.classes_, range(len(rf_label_encoder.classes_))))
print("RandomForest - Label to integer mapping:", rf_label_mapping)

# Train het RandomForest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train_rf_enc)

# Voorspellingen doen
y_pred_rf = rf_model.predict(X_test_rf)

# Evaluatie
print("=== RandomForest Classification Report ===")
print(classification_report(y_test_rf_enc, y_pred_rf, target_names=rf_label_encoder.classes_))


RandomForest - Label to integer mapping: {'auto': 0, 'fietsen': 1, 'metro': 2, 'rennen': 3, 'trein': 4}
=== RandomForest Classification Report ===
              precision    recall  f1-score   support

        auto       0.50      1.00      0.67       776
     fietsen       0.00      0.00      0.00       776
       metro       0.32      0.27      0.29       776
      rennen       1.00      0.04      0.08       776
       trein       0.00      0.00      0.00       776

    accuracy                           0.26      3880
   macro avg       0.36      0.26      0.21      3880
weighted avg       0.36      0.26      0.21      3880



In [57]:
from sklearn.metrics import accuracy_score

# Accuracy
accuracy = accuracy_score(y_test_rf_enc, y_pred_rf)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.2621


In [58]:
comparison_df = pd.DataFrame({
    'Actual': y_test_rf_enc,
    'Predicted': y_pred_rf
})

# Add the label names for readability
comparison_df['Actual_Label'] = rf_label_encoder.inverse_transform(comparison_df['Actual'])
comparison_df['Predicted_Label'] = rf_label_encoder.inverse_transform(comparison_df['Predicted'])

# Print the first few rows
print("=== Prediction Comparison ===")
print(comparison_df.head(10))

# Count correct and incorrect predictions
correct = (comparison_df['Actual'] == comparison_df['Predicted']).sum()
total = len(comparison_df)
incorrect = total - correct
accuracy = accuracy_score(y_test_rf_enc, y_pred_rf)

# Print summary
print(f"\nTotal predictions: {total}")
print(f"Correct predictions: {correct}")
print(f"Incorrect predictions: {incorrect}")
print(f"Accuracy: {accuracy:.4f}")

=== Prediction Comparison ===
   Actual  Predicted Actual_Label Predicted_Label
0       2          1        metro         fietsen
1       2          1        metro         fietsen
2       2          1        metro         fietsen
3       2          1        metro         fietsen
4       2          1        metro         fietsen
5       2          1        metro         fietsen
6       2          1        metro         fietsen
7       2          1        metro         fietsen
8       2          1        metro         fietsen
9       2          1        metro         fietsen

Total predictions: 3880
Correct predictions: 1017
Incorrect predictions: 2863
Accuracy: 0.2621


Catboost

In [61]:
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Input/output preparation
catboost_features_train = train_df.drop(columns=['transport_mode', 'Time'])
catboost_labels_train = train_df['transport_mode']

catboost_features_test = test_df.drop(columns=['transport_mode', 'Time'])
catboost_labels_test = test_df['transport_mode']

# Encode target labels
catboost_label_encoder = LabelEncoder()
catboost_y_train = catboost_label_encoder.fit_transform(catboost_labels_train)
catboost_y_test = catboost_label_encoder.transform(catboost_labels_test)

# Label mapping
catboost_label_dict = dict(zip(catboost_label_encoder.classes_, range(len(catboost_label_encoder.classes_))))
print("CatBoost - Label to integer mapping:", catboost_label_dict)

# Initialize and train the CatBoost model
catboost_model = CatBoostClassifier(verbose=0, random_state=42, iterations=300, learning_rate=0.1, depth=6)
catboost_model.fit(catboost_features_train, catboost_y_train)

# Make predictions
catboost_predictions = catboost_model.predict(catboost_features_test)

# Evaluation
print("=== CatBoost Classification Report ===")
print(classification_report(catboost_y_test, catboost_predictions, target_names=catboost_label_encoder.classes_))


CatBoost - Label to integer mapping: {'auto': 0, 'fietsen': 1, 'metro': 2, 'rennen': 3, 'trein': 4}
=== CatBoost Classification Report ===
              precision    recall  f1-score   support

        auto       1.00      1.00      1.00       776
     fietsen       0.00      0.00      0.00       776
       metro       0.07      0.03      0.04       776
      rennen       1.00      0.88      0.94       776
       trein       0.00      0.00      0.00       776

    accuracy                           0.38      3880
   macro avg       0.41      0.38      0.40      3880
weighted avg       0.41      0.38      0.40      3880



In [62]:
# Accuracy
accuracy_catboost = accuracy_score(catboost_y_test, catboost_predictions)
print(f"Accuracy: {accuracy_catboost:.4f}")


Accuracy: 0.3820


In [59]:
feature_names = X_train.columns

# Haal de importances op
importances = model.feature_importances_

# Zet om naar DataFrame voor overzicht
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

In [60]:
importance_df[importance_df['Importance'] > 0]

Unnamed: 0,Feature,Importance
78,acc_magnitude,0.312589
11,Magnetometer_Z,0.133778
15,Accelerometer_X_mean_3,0.120681
22,Accelerometer_Y_mean_10,0.09476
27,Accelerometer_Z_mean_10,0.071421
12,Barometer_X,0.056817
0,Accelerometer_X,0.052429
20,Accelerometer_Y_mean_3,0.026054
52,Linear_Accelerometer_Y_mean_10,0.025413
1,Accelerometer_Y,0.024399


LSTM


In [63]:
train_df_basic = train_df[train_df.columns[:11].tolist() + ['transport_mode']]
test_df_basic = test_df[test_df.columns[:11].tolist() + ['transport_mode']]

In [64]:
le = LabelEncoder()
train_df_basic['label'] = le.fit_transform(train_df_basic['transport_mode'])
test_df_basic['label'] = le.transform(test_df_basic['transport_mode'])

# Optional: see the label mapping
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(label_mapping)

{'auto': 0, 'fietsen': 1, 'metro': 2, 'rennen': 3, 'trein': 4}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df_basic['label'] = le.fit_transform(train_df_basic['transport_mode'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df_basic['label'] = le.transform(test_df_basic['transport_mode'])


In [76]:
import numpy as np

def create_sequences(df, feature_cols, label_col, window_size=10):
    X, y = [], []
    for i in range(len(df) - window_size):
        seq_x = df[feature_cols].iloc[i:i+window_size].values
        seq_y = df[label_col].iloc[i+window_size]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [77]:
feature_cols = train_df_basic.columns[:11].tolist()

X_train, y_train = create_sequences(train_df_basic, feature_cols, 'label', window_size=10)
X_test, y_test = create_sequences(test_df_basic, feature_cols, 'label', window_size=10)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shapSe[1], X_train.shape[2])))
model.add(Dense(len(np.unique(y_train)), activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  super().__init__(**kwargs)


In [79]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.3746 - loss: 1.5056 - val_accuracy: 0.3473 - val_loss: 1.5395
Epoch 2/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9153 - loss: 0.4763 - val_accuracy: 0.3057 - val_loss: 2.0189
Epoch 3/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.9984 - loss: 0.0866 - val_accuracy: 0.3080 - val_loss: 2.5347
Epoch 4/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9985 - loss: 0.0340 - val_accuracy: 0.3031 - val_loss: 2.7033
Epoch 5/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9963 - loss: 0.0281 - val_accuracy: 0.3641 - val_loss: 2.8627
Epoch 6/10
[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.9985 - loss: 0.0217 - val_accuracy: 0.3468 - val_loss: 2.8826
Epoch 7/10
[1m121/121[0m 

<keras.src.callbacks.history.History at 0x1654ec87fd0>

In [80]:

# Predict the classes for X_test
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)

# Compute and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.2%}")

[1m121/121[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Test Accuracy: 33.36%
