In [3]:
%pip install bayesian-optimization
%pip install mlxtend

Note: you may need to restart the kernel to use updated packages.
Collecting mlxtend
  Using cached mlxtend-0.23.3-py3-none-any.whl.metadata (7.3 kB)
Using cached mlxtend-0.23.3-py3-none-any.whl (1.4 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.3
Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft

In [None]:
data1 = pd.read_excel('Data_Preprocessed1/1.xlsx')
data2 = pd.read_excel('Data_Preprocessed1/2.xlsx')
data3 = pd.read_excel('Data_Preprocessed1/3.xlsx')
data4 = pd.read_excel('Data_Preprocessed1/4.xlsx')
data5 = pd.read_excel('Data_Preprocessed1/5.xlsx')

dataset = [data1,data2,data3,data4,data5]
i = 1
for data in dataset:
    data.dropna(inplace=True)
    # save the data
    data.to_excel('Data_Preprocessed1/'+str(i)+'.xlsx', index=False)
    i += 1

In [3]:
dataset = [data1,data2,data3,data4,data5]

In [4]:
def mean(window):
    ax = window['ax'].mean()
    ay = window['ay'].mean()
    az = window['az'].mean()
    return ax, ay, az

def root_mean_square(window):
    rms_ax = np.sqrt(np.mean(window['ax']**2))
    rms_ay = np.sqrt(np.mean(window['ay']**2))
    rms_az = np.sqrt(np.mean(window['az']**2))
    return rms_ax, rms_ay, rms_az

def standard_deviation(window):
    std_ax = np.std(window['ax'])
    std_ay = np.std(window['ay'])
    std_az = np.std(window['az'])
    return std_ax, std_ay, std_az

def signal_magnitude_vector(window):
    return np.sqrt(window['ax']**2 + window['ay']**2 + window['az']**2)

def interquartile_range(window):
    iqr_ax = window['ax'].quantile(0.75) - window['ax'].quantile(0.25)
    iqr_ay = window['ay'].quantile(0.75) - window['ay'].quantile(0.25)
    iqr_az = window['az'].quantile(0.75) - window['az'].quantile(0.25)
    return iqr_ax, iqr_ay, iqr_az

def max_min(window):
    max_ax = window['ax'].max() - window['ax'].min()
    max_ay = window['ay'].max() - window['ay'].min()
    max_az = window['az'].max() - window['az'].min()
    return max_ax, max_ay, max_az

def kurtosis(window):
    kurtosis_ax = window['ax'].kurtosis()
    kurtosis_ay = window['ay'].kurtosis()
    kurtosis_az = window['az'].kurtosis()
    return kurtosis_ax, kurtosis_ay, kurtosis_az

def skewness(window):
    skewness_ax = window['ax'].skew()
    skewness_ay = window['ay'].skew()
    skewness_az = window['az'].skew()
    return skewness_ax, skewness_ay, skewness_az

def mean_frequency_power(window):
    freq_ax = np.mean(np.abs(fft(window['ax'])))
    freq_ay = np.mean(np.abs(fft(window['ay'])))
    freq_az = np.mean(np.abs(fft(window['az'])))
    return freq_ax, freq_ay, freq_az

In [20]:
def moving_window_feature_extraction(data, window_size, step_size=1):

    features_list = []
    total_samples = len(data)

    # Itération sur les fenêtres
    for start in range(0, total_samples - window_size + 1, step_size):
        end = start + window_size
        window = data.iloc[start:end]

        mean_values = mean(window)
        rms_values = root_mean_square(window)
        std_values = standard_deviation(window)
        iqr_values = interquartile_range(window)
        max_min_values = max_min(window)
        mean_freq_power_values = mean_frequency_power(window)

        smv_values = signal_magnitude_vector(window)
        smv_mean = smv_values.mean()

        features = {
            'start_index': start,
            'end_index': end,
            'mean_ax': mean_values[0],
            'mean_ay': mean_values[1],
            'mean_az': mean_values[2],
            'rms_ax': rms_values[0],
            'rms_ay': rms_values[1],
            'rms_az': rms_values[2],
            'std_ax': std_values[0],
            'std_ay': std_values[1],
            'std_az': std_values[2],
            'smv_mean': smv_mean,
            'smv_max' : smv_values.max(),
            'smv_min' : smv_values.min(),
            'iqr_ax': iqr_values[0],
            'iqr_ay': iqr_values[1],
            'iqr_az': iqr_values[2],
            'max_min_ax': max_min_values[0],
            'max_min_ay': max_min_values[1],
            'max_min_az': max_min_values[2],
            'mean_freq_power_ax': mean_freq_power_values[0],
            'mean_freq_power_ay': mean_freq_power_values[1],
            'mean_freq_power_az': mean_freq_power_values[2],
            'label' : window['label'].mode().values[0] 
        }

        features_list.append(features)

    # Conversion en DataFrame
    features_df = pd.DataFrame(features_list)

    return features_df

def preprocess_data(data, window_size, step_size):
    for i in range(len(data)):
        features_df = moving_window_feature_extraction(data[i], window_size, step_size)
        features_df.to_csv('Data_Preprocessed1/data_'+str(i+1)+'.csv', index=False)

window_size = 50
step_size = 1
preprocess_data(dataset, window_size, step_size)

## Preprocess 2

In [None]:
import pandas as pd
import os

# Dossiers des fichiers d'entrée et de sortie
data_folder = "Data/"
output_folder = "New_Data/"

# Assurer que le dossier de sortie existe
os.makedirs(output_folder, exist_ok=True)

# Liste des fichiers Excel à traiter (1.xlsx à 5.xlsx)
file_names = [f"{i}.xlsx" for i in range(1, 6)]

# Lire chaque fichier, transformer et sauvegarder
for file in file_names:
    file_path = os.path.join(data_folder, file)

    # Lire le fichier Excel
    df = pd.read_excel(file_path)

    # Transformer les données en format (acc, label)
    df_transformed = pd.DataFrame({
        'acc': pd.concat([df['ax'], df['ay'], df['az']], ignore_index=True),
        'label': pd.concat([df['lx'], df['ly'], df['lz']], ignore_index=True)
    })

    # Définir le chemin du fichier de sortie
    output_file = os.path.join(output_folder, file.replace('.xlsx', '.csv'))

    df_transformed.to_csv(output_file, index=False)

    print(f"Fichier sauvegardé : {output_file}")

Fichier sauvegardé : New_Data/1.csv
Fichier sauvegardé : New_Data/2.csv
Fichier sauvegardé : New_Data/3.csv
Fichier sauvegardé : New_Data/4.csv
Fichier sauvegardé : New_Data/5.csv


In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from scipy.fft import fft

# Chemins des dossiers
data_folder = "New_Data/"
output_folder = "New_Data/"

# Assurer que le dossier de sortie existe
os.makedirs(output_folder, exist_ok=True)

# Charger les fichiers CSV depuis `New_Data/`
file_names = [f"{i}.csv" for i in range(1, 6)]
dataset = [pd.read_csv(os.path.join(data_folder, file)) for file in file_names]

# Nettoyage des données (suppression des valeurs NaN)
for i, data in enumerate(dataset, start=1):
    data.dropna(inplace=True)
    data.to_csv(os.path.join(output_folder, f"{i}.csv"), index=False)

# Définition des fonctions d'extraction de caractéristiques
def mean(window):
    return window['acc'].mean()

def root_mean_square(window):
    return np.sqrt(np.mean(window['acc']**2))

def standard_deviation(window):
    return np.std(window['acc'])

def interquartile_range(window):
    return window['acc'].quantile(0.75) - window['acc'].quantile(0.25)

def max_min(window):
    return window['acc'].max() - window['acc'].min()

def mean_frequency_power(window):
    return np.mean(np.abs(fft(window['acc'])))

def moving_window_feature_extraction(data, window_size, step_size=1):
    features_list = []
    total_samples = len(data)

    # Itération sur les fenêtres
    for start in range(0, total_samples - window_size + 1, step_size):
        end = start + window_size
        window = data.iloc[start:end]

        features = {
            'start_index': start,
            'end_index': end,
            'mean_acc': mean(window),
            'rms_acc': root_mean_square(window),
            'std_acc': standard_deviation(window),
            'iqr_acc': interquartile_range(window),
            'max_min_acc': max_min(window),
            'mean_freq_power_acc': mean_frequency_power(window),
            'label': window['label'].mode().values[0]  # Label le plus fréquent dans la fenêtre
        }

        features_list.append(features)

    return pd.DataFrame(features_list)

def preprocess_data(dataset, window_size, step_size):
    for i, data in enumerate(dataset, start=1):
        features_df = moving_window_feature_extraction(data, window_size, step_size)
        features_df.to_csv(os.path.join(output_folder, f"data_{i}.csv"), index=False)

# Paramètres
window_size = 50
step_size = 1

# Exécution du prétraitement
preprocess_data(dataset, window_size, step_size)

print("Prétraitement terminé. Les fichiers sont sauvegardés dans", output_folder)

Prétraitement terminé. Les fichiers sont sauvegardés dans New_Data/
