# Notebook to analyze my data from the RSG task

In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# --- Load & Prepare Data ---

df = pd.read_csv('../data/readysetgo_20250722_605120.csv')

# Filter only 'go' phase trials
go_df = df[df['phase'] == 'go'].copy()

# Drop rows with missing data
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)

# Ensure numeric
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')

# Unique set durations
set_durations = sorted(go_df['set_duration'].dropna().unique())


# Reported vs Actual Interval 


In [9]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Assume go_df and set_durations are already defined and sorted

# Sort set durations to maintain color ordering
set_durations = sorted(go_df['set_duration'].unique())

# Define axis bounds
min_val = min(go_df['target_interval'].min(), go_df['reproduced_interval'].min())
max_val = max(go_df['target_interval'].max(), go_df['reproduced_interval'].max())

# Define discrete color scale from red to blue
from matplotlib import cm
import matplotlib.colors as mcolors

# Use matplotlib's 'RdBu' colormap, reversed so red = low, blue = high
cmap = cm.get_cmap('YlOrRd', len(set_durations))
ordered_colors = [mcolors.to_hex(cmap(i)) for i in range(len(set_durations))]

# Create scatter traces per set_duration
traces = []

for i, dur in enumerate(set_durations):
    subset = go_df[go_df['set_duration'] == dur]
    traces.append(
        go.Scatter(
            x=subset['target_interval'],
            y=subset['reproduced_interval'],
            mode='markers',
            name=f'Set: {int(dur)} ms',
            marker=dict(color=ordered_colors[i], size=10, line=dict(width=1, color='black')),
        )
    )

# Add unity line
traces.append(
    go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Unity Line',
        line=dict(dash='dash', color='gray', width=2),
        showlegend=True
    )
)

layout = go.Layout(
    title='RSG Task: Reported vs Actual Interval (Grouped by Set Duration)',
    xaxis=dict(title='Actual Interval (ms)', range=[min_val, max_val]),
    yaxis=dict(title='Reported Interval (ms)', range=[min_val, max_val]),
    legend=dict(title='Set Duration', x=1.05),
    width=700,
    height=600,
    margin=dict(l=80, r=80, t=80, b=80),
    plot_bgcolor='white'
)

fig = go.Figure(data=traces, layout=layout)
fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')

fig.show()

# --- Summary Stats ---
print("\n=== Summary Stats ===")
print(go_df[['reproduction_error']].describe())

# --- Error by Target Interval ---
error_by_target = go_df.groupby('target_interval')['reproduction_error'].mean()
print("\n=== Mean Error by Target Interval ===")
print(error_by_target)

# --- Save Summary CSV ---
summary = go_df.groupby(['target_interval', 'set_duration']).agg({
    'reproduction_error': ['mean', 'std', 'count']
}).reset_index()
summary.columns = ['target_interval', 'set_duration', 'mean_error', 'std_error', 'n']
# summary.to_csv('rsg_summary_stats.csv', index=False)



=== Summary Stats ===
       reproduction_error
count          300.000000
mean           -24.350000
std            284.875659
min           -696.000000
25%           -172.500000
50%              7.000000
75%            110.250000
max           2359.000000

=== Mean Error by Target Interval ===
target_interval
200.0     115.986111
530.0     141.431034
850.0      15.070175
1180.0   -106.288462
1500.0   -314.606557
Name: reproduction_error, dtype: float64


# Error vs Delay  


In [12]:
# --- Error vs Set Duration Plot ---

# Compute mean ± std of error per set duration
error_summary = go_df.groupby('set_duration')['reproduction_error'].agg(['mean', 'sem', 'count']).reset_index()

error_trace = go.Scatter(
    x=error_summary['set_duration'],
    y=error_summary['mean'],
    error_y=dict(
        type='data',
        array=error_summary['sem'],
        visible=True
    ),
    mode='markers+lines',
    marker=dict(size=10, color='crimson'),
    line=dict(width=2),
    name='Mean Error ± STD'
)

layout2 = go.Layout(
    title='Reproduction Error vs Delay Duration',
    xaxis=dict(title='Delay Duration (ms)'),
    yaxis=dict(title='Mean Reproduction Error (ms)'),
    width=700,
    height=500,
    margin=dict(l=80, r=50, t=60, b=60),
    plot_bgcolor='white'
)

fig2 = go.Figure(data=[error_trace], layout=layout2)
fig2.update_xaxes(showgrid=True, gridcolor='lightgray')
fig2.update_yaxes(showgrid=True, gridcolor='lightgray')

fig2.show()


# Error vs Target Interval


In [13]:

# Compute mean ± std of error per Target Interval
error_summary = go_df.groupby('target_interval')['reproduction_error'].agg(['mean', 'std', 'count']).reset_index()

error_trace = go.Scatter(
    x=error_summary['target_interval'],
    y=error_summary['mean'],
    error_y=dict(
        type='data',
        array=error_summary['std'],
        visible=True
    ),
    mode='markers+lines',
    marker=dict(size=10, color='crimson'),
    line=dict(width=2),
    name='Mean Error ± STD'
)

layout2 = go.Layout(
    title='Reproduction Error vs target_interval',
    xaxis=dict(title='target_interval (ms)'),
    yaxis=dict(title='Mean Reproduction Error (ms)'),
    width=700,
    height=500,
    margin=dict(l=80, r=50, t=60, b=60),
    plot_bgcolor='white'
)

fig2 = go.Figure(data=[error_trace], layout=layout2)
fig2.update_xaxes(showgrid=True, gridcolor='lightgray')
fig2.update_yaxes(showgrid=True, gridcolor='lightgray')

fig2.show()


# Error vs Target Interval (split by delay)

In [14]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from matplotlib import cm
import matplotlib.colors as mcolors

# Filter and clean
go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')

# Group for mean error stats
grouped = go_df.groupby(['target_interval', 'set_duration'])['reproduction_error'].agg(['mean', 'std', 'count']).reset_index()

# Colors ordered from red (low) to blue (high)
set_durations = sorted(go_df['set_duration'].unique())
cmap = cm.get_cmap('OrRd', len(set_durations)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(set_durations))]

fig = go.Figure()

# Plot all data points (circles)
for i, sd in enumerate(set_durations):
    subset = go_df[go_df['set_duration'] == sd]
    fig.add_trace(go.Scatter(
        x=subset['target_interval'],
        y=subset['reproduction_error'],
        mode='markers',
        name=f'{sd} ms (trials)',
        marker=dict(color=ordered_colors[i], size=6, opacity=0.5, symbol='circle-open'),
        legendgroup=str(sd),
        showlegend=False
    ))

# Plot mean with error bars
for i, sd in enumerate(set_durations):
    mean_subset = grouped[grouped['set_duration'] == sd]
    fig.add_trace(go.Scatter(
        x=mean_subset['target_interval'],
        y=mean_subset['mean'],
        error_y=dict(
            type='data',
            array=mean_subset['std'],
            visible=True
        ),
        mode='lines+markers',
        name=f'{sd} ms (mean)',
        marker=dict(color=ordered_colors[i], size=8),
        line=dict(width=2, color=ordered_colors[i]),
        legendgroup=str(sd)
    ))

# Layout
fig.update_layout(
    title='Reproduction Error vs Target Interval (Individual + Mean with Error Bars)',
    xaxis_title='Target Interval (ms)',
    yaxis_title='Reproduction Error (ms)',
    width=900,
    height=550,
    plot_bgcolor='white',
    xaxis=dict(showgrid=True, gridcolor='lightgray'),
    yaxis=dict(showgrid=True, gridcolor='lightgray'),
    legend_title='Set Duration'
)

fig.show()


# Reported Interval vs Trial number in session

In [17]:

# Filter and clean
go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval'], inplace=True)
go_df['target_interval'] = pd.to_numeric(go_df['target_interval'], errors='coerce')
go_df['reproduced_interval'] = pd.to_numeric(go_df['reproduced_interval'], errors='coerce')

# Add a trial number column
go_df['trial_number'] = range(1, len(go_df) + 1)

# Set smoothing window
N = 5  # Number of trials to smooth over

# Get unique target intervals
target_intervals = sorted(go_df['target_interval'].unique())

# Plot
fig = go.Figure()

for ti in target_intervals:
    subset = go_df[go_df['target_interval'] == ti].copy()
    subset['smoothed'] = subset['reproduced_interval'].rolling(window=N, min_periods=1).mean()

    fig.add_trace(go.Scatter(
        x=subset['trial_number'],
        y=subset['smoothed'],
        mode='lines+markers',
        name=f'{int(ti)} ms',
        marker=dict(size=6),
        line=dict(width=2),
    ))

# Layout
fig.update_layout(
    title=f'Reproduced Interval vs Trial Number (Smoothed over {N} trials)',
    xaxis_title='Trial Number',
    yaxis_title='Smoothed Reproduced Interval (ms)',
    legend_title='Target Interval',
    template='simple_white',
    width=900,
    height=500
)

fig.show()


In [19]:

go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')
go_df['trial_number'] = range(len(go_df))
error_std = go_df['reproduction_error'].std()
go_df = go_df[go_df['reproduction_error'].abs() <= 2 * error_std].reset_index(drop=True)

# === 1. Mean Error vs Target Interval ===
error_stats = go_df.groupby('target_interval')['reproduction_error'].agg(['mean', 'std']).reset_index()
px.scatter(error_stats, x='target_interval', y='mean', error_y='std',
           title='Mean Error vs Target Interval', labels={'mean': 'Reproduction Error (ms)'}).show()

# === 2. Scalar Variability (SD of reproduced interval) ===
scalar_var = go_df.groupby('target_interval')['reproduced_interval'].std().reset_index()
px.scatter(scalar_var, x='target_interval', y='reproduced_interval',
           title='Scalar Variability', labels={'reproduced_interval': 'SD of Reproduced Interval (ms)'}).show()

# === 3. Regression Toward the Mean ===
px.scatter(go_df, x='target_interval', y='reproduced_interval', trendline="ols",
           title='Regression Toward the Mean').show()

# === 4. Reproduction Error vs Set Duration ===
error_set = go_df.groupby('set_duration')['reproduction_error'].agg(['mean', 'std']).reset_index()
px.scatter(error_set, x='set_duration', y='mean', error_y='std',
           title='Error vs Set Duration', labels={'mean': 'Reproduction Error (ms)'}).show()

# === 5. Error by Target Interval x Set Duration ===
interaction = go_df.groupby(['target_interval', 'set_duration'])['reproduction_error'].mean().reset_index()
px.line(interaction, x='target_interval', y='reproduction_error', color='set_duration',
        title='Error by Target Interval x Set Duration').show()

# === 6. Trial-by-Trial Reproduced Interval ===
px.line(go_df, x='trial_number', y='reproduced_interval', color='target_interval',
        title='Reproduced Interval Across Trials').show()

# === 7. Autocorrelation of Errors ===
lags = list(range(1, 10))
autocorr_vals = [go_df['reproduction_error'].autocorr(lag=l) for l in lags]
px.bar(x=lags, y=autocorr_vals, labels={'x': 'Lag', 'y': 'Autocorrelation'},
       title='Autocorrelation of Reproduction Error').show()

# === 8. Error Distributions by Set Duration ===
px.violin(go_df, x='set_duration', y='reproduction_error', box=True, points='all',
          title='Error Distributions by Set Duration').show()

# === 9. Coefficient of Variation (CV) by Target Interval ===
cv = go_df.groupby('target_interval')['reproduced_interval'].agg(['mean', 'std']).reset_index()
cv['cv'] = cv['std'] / cv['mean']
px.line(cv, x='target_interval', y='cv', title='Coefficient of Variation by Target Interval').show()

# === 10. Outlier Detection (±2 SD) ===
threshold = 2 * go_df['reproduction_error'].std()
go_df['is_outlier'] = go_df['reproduction_error'].abs() > threshold
px.scatter(go_df, x='trial_number', y='reproduction_error', color='is_outlier',
           title='Outliers in Reproduction Error').show()
