In [None]:
import pandas as pd
import plotly
from pathlib import Path
import re

base = Path('./filtered_data/')

groups_dfs = {}

for csv_path in sorted(base.glob('group*/*.csv')):
    group = csv_path.parent.name
    m = re.search(r'dataset_user_(\d+)_train\.csv', csv_path.name)
    if not m:
        continue
    user_id = int(m.group(1))
    df = pd.read_csv(csv_path)
    groups_dfs.setdefault(group, {})[user_id] = df



In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ast

# Funzione per convertire stringa lista in lista
def parse_series(cell):
    if pd.isna(cell):
        return []
    try:
        return ast.literal_eval(cell)
    except:
        return []

for group, users in groups_dfs.items():
    user_ids = sorted(users.keys())

    for user_id in user_ids:
        df = users[user_id]
        n_days = len(df)

        fig = make_subplots(
            rows=3, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.1,
            subplot_titles=["Heart Rate", "Respiration Rate", "Stress Level"]
        )

        # Tracce per tutti i giorni
        for day_idx in range(n_days):
            hr_series = parse_series(df.iloc[day_idx]['hr_time_series'])
            resp_series = parse_series(df.iloc[day_idx]['resp_time_series'])
            stress_series = parse_series(df.iloc[day_idx]['stress_time_series'])

            time_hr = list(range(len(hr_series)))
            time_resp = list(range(len(resp_series)))
            time_stress = list(range(len(stress_series)))

            visible = True if day_idx == 0 else False

            fig.add_trace(go.Scatter(x=time_hr, y=hr_series, name=f"Day {day_idx+1}", visible=visible), row=1, col=1)
            fig.add_trace(go.Scatter(x=time_resp, y=resp_series, name=f"Day {day_idx+1}", visible=visible), row=2, col=1)
            fig.add_trace(go.Scatter(x=time_stress, y=stress_series, name=f"Day {day_idx+1}", visible=visible), row=3, col=1)

        # Slider steps: ogni step mostra le trace di un giorno, nasconde le altre
        steps = []
        for day_idx in range(n_days):
            step = dict(
                method="update",
                args=[{"visible": [i // 3 == day_idx for i in range(n_days*3)]},  # ogni giorno ha 3 trace
                      {"title": f"{group} - User {user_id} - Day {day_idx+1}"}],
                label=f"Day {day_idx+1}"
            )
            steps.append(step)

        sliders = [dict(active=0, currentvalue={"prefix": "Day: "}, pad={"t": 50}, steps=steps)]

        fig.update_layout(
            sliders=sliders,
            height=800,
            title_text=f"{group} - User {user_id} - Day 1"
        )

        # fig.show()


In [None]:
import pandas as pd
import plotly.graph_objects as go

# Lista delle feature scalar da plottare (non time-series)
scalar_features = [
    'hr_maxHeartRate', 'hr_minHeartRate', 'hr_restingHeartRate', 'hr_lastSevenDaysAvgRestingHeartRate',
    'resp_lowestRespirationValue', 'resp_highestRespirationValue', 'resp_avgWakingRespirationValue',
    'resp_avgSleepRespirationValue', 'resp_avgTomorrowSleepRespirationValue',
    'str_maxStressLevel', 'str_avgStressLevel',
    # 'sleep_sleepTimeSeconds', 
    # 'sleep_napTimeSeconds',
    # 'sleep_unmeasurableSleepSeconds',
    # 'sleep_deepSleepSeconds',
    #   'sleep_lightSleepSeconds',
    #     'sleep_remSleepSeconds',
    #       'sleep_awakeSleepSeconds',

    # 'act_totalCalories',
    #   'act_activeKilocalories',
    #     'act_distance', 
    #     'act_activeTime',

    'sleep_averageRespirationValue', 'sleep_lowestRespirationValue', 'sleep_highestRespirationValue',
    'sleep_awakeCount', 'sleep_avgSleepStress', 'sleep_avgHeartRate'
]

for group, users in groups_dfs.items():
    user_ids = sorted(users.keys())

    for user_id in user_ids:
        df = users[user_id]
        days = list(range(1, len(df)+1))  # x-axis = giorni

        fig = go.Figure()

        for feature in scalar_features:
            if feature in df.columns:
                fig.add_trace(
                    go.Scatter(
                        x=days,
                        y=df[feature],
                        mode='lines+markers',
                        name=feature
                    )
                )

        fig.update_layout(
            title=f"{group} - User {user_id} - Scalar Features over Days",
            xaxis_title="Day",
            yaxis_title="Value",
            height=600
        )

        # fig.show()


In [None]:
import numpy as np
from scipy.interpolate import PchipInterpolator, interp1d

def clean_series(series, invalid_values=None):
    """
    Converte i valori invalidi in np.nan.
    - series: lista di numeri
    - invalid_values: lista di valori da considerare invalidi (es. [-1, -2])
    """
    cleaned = []
    for v in series:
        if v is None:
            cleaned.append(np.nan)
        elif invalid_values is not None and v in invalid_values:
            cleaned.append(np.nan)
        else:
            cleaned.append(float(v))
    return cleaned

def interpolate_physiological(signal, kind='pchip', clip_range=None, jitter=0):
    import numpy as np
    from scipy.interpolate import PchipInterpolator, interp1d

    n = len(signal)
    if n == 0:
        return []

    x = np.arange(n)
    signal = np.array(signal, dtype=float)  # <-- conversione a NumPy array
    mask = ~np.isnan(signal)
    if mask.sum() == 0:
        return [np.nan]*n

    if kind == 'pchip':
        interpolator = PchipInterpolator(x[mask], signal[mask])
    else:
        interpolator = interp1d(x[mask], signal[mask], kind='linear', fill_value="extrapolate")

    filled = interpolator(x)
    
    if jitter > 0:
        filled += np.random.uniform(-jitter, jitter, size=n)
    if clip_range is not None:
        filled = np.clip(filled, clip_range[0], clip_range[1])
    return filled.tolist()


# # --- Esempio di utilizzo ---
# hr_series_clean = clean_series(hr_series, invalid_values=[])
# resp_series_clean = clean_series(resp_series, invalid_values=[-1, -2])
# stress_series_clean = clean_series(stress_series, invalid_values=[-1, -2])

# hr_filled = interpolate_physiological(hr_series_clean, kind='pchip', clip_range=(40,180), jitter=1)
# resp_filled = interpolate_physiological(resp_series_clean, kind='pchip', clip_range=(12,40), jitter=0.5)
# stress_filled = interpolate_physiological(stress_series_clean, kind='linear', clip_range=(0,100), jitter=0)
