In [None]:
import pandas as pd
import numpy as np
import os

In [None]:
BASE_DATA_PATH = os.getcwd() + '/data/selfBACK'
OUTPUT_PATH = os.getcwd() + '/data/selfBACK_processed_data'

SAMPLE_RATE = 100   # Hz
WINDOW_SECONDS = 4  # Window size in seconds
HOP_SECONDS = 2     # Hop size
FRAME_SIZE = int(SAMPLE_RATE * WINDOW_SECONDS) # Window size
HOP_SIZE = int(SAMPLE_RATE * HOP_SECONDS)     # Hop size

UTENTI = [p for p in range(26, 63) if p not in [32,35,37,38,45]]
ATTIVITA = [
    "upstairs",
    "downstairs",
    "walk_slow",
    "walk_mod",
    "walk_fast",
    "jogging",
    "standing",
    "sitting",
    "lying"
]

SENSORI = ['w', 't']
COLONNE = ['timestamp', 'x', 'y', 'z']

In [None]:
def load_sensor_data(base_path, participant_id, activity, sensor_type):
    filename = f"0{participant_id}.csv"
    file_path = base_path + "/" + sensor_type + "/" + activity + "/" + filename
    try:
        df = pd.read_csv(file_path, header=None, usecols=[0, 1, 2, 3], names=COLONNE)
        if df.isnull().values.any():
            df.dropna(inplace=True)
        return df
    except Exception as e:
        print(f"Errore {file_path}: {e}")
        return None

In [None]:
def get_frames(data, frame_size, hop_size):
    r = np.arange(len(data))
    s = r[::hop_size]

    window_dentro = s[s + frame_size <= len(data)]

    z = list(zip(window_dentro, window_dentro + frame_size))
    g = lambda indices: data.iloc[indices[0]:indices[1]]

    return pd.concat(map(g, z), keys=range(len(z)))

In [None]:
def calculate_acc_magnitude(df, prefix = ""):
    x_col, y_col, z_col = f"{prefix}x", f"{prefix}y", f"{prefix}z"
    if all(col in df.columns for col in [x_col, y_col, z_col]):
        df[[x_col, y_col, z_col]] = df[[x_col, y_col, z_col]].apply(pd.to_numeric, errors='coerce')
        df[f"{prefix}AccMagnitude"] = np.sqrt(np.square(df[[x_col, y_col, z_col]]).sum(axis=1))
    return df

def calc_over_in_below_mean(df, cols, perc = 0.01):
    result_dict = {}

    df_mean = df[cols].mean()
    df_lim_inf = df_mean - df_mean.abs() * perc
    df_lim_sup = df_mean + df_mean.abs() * perc

    for col in cols:
        result_dict[f'{'Acc'+col.upper() if len(col) == 1 else col}OverMean'] = (df[col] > df_lim_sup[col]).sum()
        result_dict[f'{'Acc'+col.upper() if len(col) == 1 else col}InMean'] = ((df[col] >= df_lim_inf[col]) & (df[col] <= df_lim_sup[col])).sum()
        result_dict[f'{'Acc'+col.upper() if len(col) == 1 else col}BelowMean'] = (df[col] < df_lim_inf[col]).sum()

    return pd.DataFrame([result_dict])

def calculate_features(df, feature_cols):
    df_windowed = get_frames(df, FRAME_SIZE, HOP_SIZE)

    grouped = df_windowed.groupby(level=0)

    df_mean = grouped[feature_cols].mean()
    df_mean.columns = [f'{'Acc'+col.upper() if len(col) == 1 else col}Mean' for col in feature_cols]

    df_std = grouped[feature_cols].std()
    df_std.columns = [f'{'Acc'+col.upper() if len(col) == 1 else col}Std' for col in feature_cols]

    df_min = grouped[feature_cols].min()
    df_min.columns = [f'{'Acc'+col.upper() if len(col) == 1 else col}Min' for col in feature_cols]

    df_max = grouped[feature_cols].max()
    df_max.columns = [f'{'Acc'+col.upper() if len(col) == 1 else col}Max' for col in feature_cols]

    df_oib_mean = grouped.apply(lambda x: calc_over_in_below_mean(x, feature_cols))
    df_oib_mean = df_oib_mean.reset_index(level=1, drop=True)

    all_features = pd.concat([df_mean, df_std, df_min, df_max, df_oib_mean], axis=1)

    return all_features.reset_index(drop=True)

In [None]:
os.makedirs(OUTPUT_PATH, exist_ok=True)

for utente in UTENTI:
    feature_list = []
    feature_cols = ['x', 'y', 'z', 'AccMagnitude']
    print(f"Utente {utente}")
    for sensore in SENSORI:
        for attivita in ATTIVITA:
            df = load_sensor_data(BASE_DATA_PATH, utente, attivita, sensore)
            if df is not None and not df.empty:
                df = calculate_acc_magnitude(df)
                if 'AccMagnitude' in df.columns:
                    df_features = calculate_features(df, feature_cols)
                    df_features['Userid'] = utente
                    df_features['Activity'] = attivita
                    df_features['position'] = "wrist" if sensore == 'w' else "thigh"
                    feature_list.append(df_features)

    if feature_list:
        df_all_features = pd.concat(feature_list, ignore_index=True)
        id_cols = ['Userid', 'Activity', 'position']
        feature_cols = [col for col in df_all_features.columns if col not in id_cols]
        final_cols = id_cols + feature_cols
        df_all_features = df_all_features[final_cols]

        output_filename = f"{OUTPUT_PATH}/grouped_data_User{utente}.csv"
        df_all_features.to_csv(output_filename, index=False)