In [None]:
%load_ext autoreload
%autoreload 2 

In [None]:
from dotenv import load_dotenv
load_dotenv()
from setup_path import add_src_to_path
add_src_to_path()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from awear_neuroscience.signal_processing.filters import preprocess_segment
from awear_neuroscience.signal_processing.artifacts import detect_artifacts
from awear_neuroscience.signal_processing.features import (
    compute_psd,
    extract_band_features,
    apply_ema_filtering,
    normalize_indexes,
    add_time_features,
)

In [None]:
# Load EEG long-format data (ensure this file exists)
long_df = pd.read_csv("../data/long_df.csv")
fs = 256

# Filter signal by segment
segments = long_df['segment'].unique()
filtered_signals = []
for seg in segments:
    x = long_df[long_df['segment'] == seg]['waveform_value'].values
    filtered_signals.append(preprocess_segment(x, fs))

# Add filtered back to long_df
long_df['filtered_value'] = np.concatenate(filtered_signals)
long_df['abs_filtered'] = np.abs(long_df['filtered_value'])

# Compute max abs per segment
max_abs = long_df.groupby('segment')['abs_filtered'].max().reset_index(name='max_abs_filtered_value')
long_df = long_df.merge(max_abs, on='segment', how='left')



In [None]:
px.histogram(
    long_df, 
    x='filtered_value', 
    nbins=200, 
    title='Distribution of Filtered EEG Values'
).show()

In [None]:
# Plot histogram
px.histogram(max_abs, x='max_abs_filtered_value', nbins=100, title='Max Abs Filtered EEG per Segment').show()

In [None]:

from awear_neuroscience.signal_processing.artifacts import detect_artifacts

long_df = long_df.dropna(subset=['segment', 'filtered_value'])
long_df = long_df.reset_index(drop=True)

# Group by segment, aggregate waveform as list or np.ndarray
artifact_flags = (
    long_df
    .groupby(['segment'])['filtered_value']
    .apply(lambda x: detect_artifacts(np.array(x), fs=256, method='amplitude', amp_thresh=20))
    .reset_index(name='is_artifact')
)

# Merge the artifact labels back to the long_df
long_df = long_df.merge(artifact_flags, on=['segment'], how='left')



In [None]:

# Feature extraction
features = []
for seg_id in segments:
    seg_data = long_df[long_df['segment'] == seg_id]
    if not seg_data['is_artifact'].iloc[0]:
        signal = seg_data['filtered_value'].values
        freqs, psd = compute_psd(signal, fs)
        features.append(extract_band_features(freqs, psd, segment=seg_id, focus_type=seg_data['focus_type'].iloc[0], timestamp=seg_data['timestamp'].iloc[0]))

features_df = pd.DataFrame(features)
features_df.head()


In [None]:
features_df[['delta', 'theta', 'alpha', 'beta', 'gamma']].mean()


In [None]:
import plotly.express as px

# Melt features_df and keep timestamp and email
id_vars = ['segment', 'timestamp']  # add 'session_id' or others if needed
value_vars = ['delta', 'theta', 'alpha', 'beta', 'gamma']

melted = features_df.melt(
    id_vars=id_vars,
    value_vars=value_vars,
    var_name='band',
    value_name='power'
)


fig = px.bar(
    melted,
    x='segment',
    y='power',
    color='band',
    barmode='stack',
    title='EEG Band Power per Segment'
)
fig.show()


In [None]:
import plotly.graph_objects as go

# Group by timestamp and band
agg = melted.groupby(['timestamp', 'band'])['power'].agg(['mean', 'std']).reset_index()

fig = go.Figure()

for band in agg['band'].unique():
    df_band = agg[agg['band'] == band]
    
    fig.add_trace(go.Scatter(
        x=df_band['timestamp'],
        y=df_band['mean'],
        mode='lines',
        name=f"{band} mean"
    ))
    
    # Upper bound
    fig.add_trace(go.Scatter(
        x=df_band['timestamp'],
        y=df_band['mean'] + df_band['std'],
        mode='lines',
        line=dict(width=0),
        showlegend=False
    ))

    # Lower bound with fill
    fig.add_trace(go.Scatter(
        x=df_band['timestamp'],
        y=df_band['mean'] - df_band['std'],
        mode='lines',
        line=dict(width=0),
        fill='tonexty',
        name=f"{band} ±1 std",
        fillcolor='rgba(0,100,80,0.2)',
        showlegend=True
    ))

fig.update_layout(
    title='EEG Band Power with Confidence Intervals',
    xaxis_title='Timestamp',
    yaxis_title='Power',
    legend_title='Band'
)
fig.show()




In [None]:
import plotly.express as px

fig = px.line(
    melted,
    x='timestamp',
    y='power',
    color='band',
    title='EEG Band Power Over Time',
    markers=True
)
fig.update_layout(
    xaxis_title='Timestamp',
    yaxis_title='Band Power',
    legend_title='Band'
)
fig.show()


In [None]:
# Apply filtering and normalize
features_df = apply_ema_filtering(features_df, alpha=2/(30+1))
features_df = normalize_indexes(features_df, [
    'theta_beta_ratio_fil', 'engagement_index_fil', 'focus_index_fil',
    'beta_fil', 'theta_alpha_ratio_fil', 'beta_alpha_ratio_fil'
])
features_df = add_time_features(features_df)


In [None]:

# Plots
px.line(features_df, x='timestamp', y='theta_beta_ratio_fil', color='focus_type', title='Theta/Beta Ratio').show()
px.line(features_df, x='timestamp', y='engagement_index_fil_norm', title='Normalized Engagement Index').show()
px.line(features_df, x='timestamp', y='focus_index_fil_norm', title='Normalized Focus Index').show()
px.line(features_df, x='timestamp', y='beta_fil', title='Beta Power').show()
px.line(features_df, x='timestamp', y='theta_alpha_ratio_fil', title='Theta/Alpha Ratio').show()
px.line(features_df, x='timestamp', y='beta_alpha_ratio_fil', title='Beta/Alpha Ratio').show()


# Extras

In [None]:
# Explore different thresholds for artifact removal
thresholds = [10, 20, 50, 100]
for th in thresholds:
    kept = [seg for seg in filtered if max(abs(seg)) < th]
    print(f"Threshold: {th} μV — Segments kept: {len(kept)}/{len(filtered)}")

# px.histogram([max(abs(seg)) for seg in filtered], nbins=100)



In [None]:

# Show interactive scatter for amplitude
fig1 = px.scatter(features_df, x='max_abs_amplitude', y='gamma', color='clean_by_amp',
                 title='Amplitude vs Gamma Power (color: Clean by Amplitude)',
                 hover_data=['session_id', 'email'])

# Show histogram for amplitude threshold tuning
fig2 = px.histogram(features_df, x='max_abs_amplitude', nbins=50, color='clean_by_amp',
                    title='Histogram of Max Absolute Amplitude (threshold=20)',
                    marginal='box')

# Show histogram for gamma power threshold tuning
fig3 = px.histogram(features_df, x='gamma', nbins=50, color='clean_by_gamma',
                    title='Histogram of Gamma Power (threshold=25)',
                    marginal='box')


fig1.show()
fig2.show()
fig3.show()
