# Notebook to analyze my data from the Standard RSG with blocks task

In [18]:
import pandas as pd
import numpy as np

# Load the full CSV
file_paths = [
    '../data/readysetgo_partial_20250804_571685.csv',
        '../data/readysetgo_partial_20250802_015792.csv',

    '../data/readysetgo_partial_20250730_895267.csv',
    '../data/readysetgo_partial_20250730_285341.csv',  # Replace with your second file
]

# Load and tag with file index
dfs = []
for i, file in enumerate(file_paths):
    df = pd.read_csv(file)
    df['file_id'] = i  # Tag file index
    dfs.append(df)
    
# Concatenate
df = pd.concat(dfs, ignore_index=True)

# Identify row indices where stimulus says "Block change"
block_change_indices = df[df['stimulus'].astype(str).str.contains('Block change', na=False)].index.tolist()

# 2. By change in file_id
file_change_indices = df[df['file_id'].diff().fillna(0) != 0].index.tolist()

# Combine and sort all block change start points
all_block_starts = sorted(set([0] + [i + 1 for i in block_change_indices + file_change_indices] + [len(df)]))

# --- Assign Block Numbers ---
block_labels = np.empty(len(df), dtype=object)

for block_num, (start, end) in enumerate(zip(all_block_starts[:-1], all_block_starts[1:]), start=1):
    block_labels[start:end] = block_num

df['block'] = block_labels
# Add 0 as the start of the first block
block_starts = [0] + [i + 1 for i in block_change_indices]  # +1 to start after 'Block change'
block_starts.append(len(df))  # add end to slice to end

# Filter only 'go' trials
go_df = df[df['phase'] == 'go'].copy()

# Drop rows with missing data
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)

# Ensure numeric types
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')

# Compute prior_mean = (min + max) / 2 per block
block_bounds = go_df.groupby('block')['target_interval'].agg(['min', 'max'])
block_bounds['prior_mean'] = (block_bounds['min'] + block_bounds['max']) / 2

# Merge into go_df
go_df = go_df.merge(block_bounds[['prior_mean']], left_on='block', right_index=True)

# Optional: unique set durations
set_durations = sorted(go_df['set_duration'].dropna().unique())
%debug
def remove_outliers(df):
    df_filtered = []
    for name, group in df.groupby('target_interval'):
        mean = group['reproduced_interval'].mean()
        std = group['reproduced_interval'].std()
        if std == 0 or np.isnan(std):
            df_filtered.append(group)
        else:
            
            filtered = group[np.abs(group['reproduced_interval'] - mean) <=  3* std]
           
            df_filtered.append(filtered)
    return pd.concat(df_filtered, ignore_index=True)

go_df = remove_outliers(go_df)

ERROR:root:No traceback has been produced, nothing to debug.


In [19]:
np.shape(go_df)

(701, 16)

# Reported vs Actual Interval 


In [20]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Assume go_df and set_durations are already defined and sorted

# Sort set durations to maintain color ordering
blocks = sorted(go_df['prior_mean'].unique())

# Define axis bounds
min_val = min(go_df['target_interval'].min(), go_df['reproduced_interval'].min())
max_val = max(go_df['target_interval'].max(), go_df['reproduced_interval'].max())

# Define discrete color scale from red to blue
from matplotlib import cm
import matplotlib.colors as mcolors

# Use matplotlib's 'RdBu' colormap, reversed so red = low, blue = high
cmap = cm.get_cmap('OrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# Create scatter traces per set_duration
traces = []

for i, iBlock in enumerate(blocks):
    subset = go_df[go_df['prior_mean'] == iBlock]
    traces.append(
        go.Scatter(
            x=subset['target_interval'],
            y=subset['reproduced_interval'],
            mode='markers',
            name=f'Block: {int(iBlock)} ms',
            marker=dict(color=ordered_colors[i], size=10, line=dict(width=1, color='black')),
        )
    )

# Add unity line
traces.append(
    go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Unity Line',
        line=dict(dash='dash', color='gray', width=2),
        showlegend=True
    )
)

layout = go.Layout(
    title='RSG Task: Reported vs Actual Interval (Grouped by Set Duration)',
    xaxis=dict(title='Actual Interval (ms)', range=[min_val, max_val]),
    yaxis=dict(title='Reported Interval (ms)', range=[min_val, max_val]),
    legend=dict(title='Block Number', x=1.05),
    width=700,
    height=600,
    margin=dict(l=80, r=80, t=80, b=80),
    plot_bgcolor='white'
)

fig = go.Figure(data=traces, layout=layout)
fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')

fig.show()

# --- Summary Stats ---
print("\n=== Summary Stats ===")
print(go_df[['reproduction_error']].describe())

# --- Error by Target Interval ---
error_by_target = go_df.groupby('target_interval')['reproduction_error'].mean()
print("\n=== Mean Error by Target Interval ===")
print(error_by_target)

# --- Save Summary CSV ---
summary = go_df.groupby(['target_interval', 'set_duration']).agg({
    'reproduction_error': ['mean', 'std', 'count']
}).reset_index()
summary.columns = ['target_interval', 'set_duration', 'mean_error', 'std_error', 'n']
# summary.to_csv('rsg_summary_stats.csv', index=False)



=== Summary Stats ===
       reproduction_error
count          701.000000
mean            13.663338
std            138.845714
min           -398.000000
25%            -77.000000
50%              8.000000
75%             94.000000
max            535.000000

=== Mean Error by Target Interval ===
target_interval
490.0       5.500000
550.0     -34.857143
610.0      -8.769231
670.0     117.555556
730.0      92.666667
790.0      30.052632
850.0      70.166667
910.0      54.419048
960.0      52.895349
1020.0    -12.139785
1080.0    -46.671642
1140.0    -54.389831
1200.0    -83.041096
Name: reproduction_error, dtype: float64


In [34]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from matplotlib import cm
import matplotlib.colors as mcolors

# Assumes go_df is already defined and contains:
# target_interval, reproduced_interval, reproduction_error, block, prior_mean

# Get all unique blocks based on prior_mean
blocks = sorted(go_df['prior_mean'].unique())

# Color map setup
cmap = cm.get_cmap('YlOrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# Axis bounds
min_val = min(go_df['target_interval'].min(), go_df['reproduced_interval'].min())
max_val = max(go_df['target_interval'].max(), go_df['reproduced_interval'].max())

# Initialize traces and shapes
traces = []
shapes = []

for i, iBlock in enumerate(blocks):
    block_df = go_df[go_df['prior_mean'] == iBlock]

    # --- Raw trial dots ---
    traces.append(
        go.Scatter(
            x=block_df['target_interval'],
            y=block_df['reproduced_interval'],
            mode='markers',
            name=f'Block {i+1} Trials',
            marker=dict(color=ordered_colors[i], size=3, line=dict(width=0.5, color='black')),
            opacity=0.5,
            legendgroup=f'block_{i}',
            showlegend=True
        )
    )

    # --- Line plot with mean ± SEM across target intervals ---
    summary = (
        block_df
        .groupby('target_interval')
        .agg(mean_repro=('reproduced_interval', 'mean'),
             sem_repro=('reproduced_interval', lambda x: x.std(ddof=1) / np.sqrt(len(x))))
        .reset_index()
    )

    traces.append(
        go.Scatter(
            x=summary['target_interval'],
            y=summary['mean_repro'],
            error_y=dict(
                type='data',
                array=summary['sem_repro'],
                visible=True,
                thickness=1.5,
                width=5
            ),
            mode='lines+markers',
            name=f'Block {i+1} Mean ± SEM',
            marker=dict(symbol='circle', size=10, color=ordered_colors[i], line=dict(color='black', width=1.5)),
            line=dict(color=ordered_colors[i], width=2),
            legendgroup=f'block_{i}',
            showlegend=True
        )
    )

    # --- Horizontal line at mean target interval for the block ---
    mean_target = block_df['target_interval'].mean()
    shapes.append(dict(
        type="line",
        x0=min_val, x1=max_val,
        y0=mean_target, y1=mean_target,
        line=dict(color=ordered_colors[i], width=2, dash='dot'),
        layer='below'  # Draw behind markers
    ))

# --- Unity line ---
traces.append(
    go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Unity Line',
        line=dict(dash='dash', color='gray', width=2),
        showlegend=True
    )
)

# --- Layout ---
layout = go.Layout(
    title='RSG Task: Mean Reported vs Actual Interval (per Block)',
    xaxis=dict(title='Actual Interval (ms)', range=[min_val, max_val]),
    yaxis=dict(title='Reported Interval (ms)', range=[min_val, max_val]),
    legend=dict(title='Blocks', x=1.02, y=1),
    width=850,
    height=600,
    margin=dict(l=80, r=160, t=80, b=80),
    plot_bgcolor='white',
    shapes=shapes
)

fig = go.Figure(data=traces, layout=layout)
fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')
fig.show()

In [27]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from matplotlib import cm
import matplotlib.colors as mcolors

# --- Assumes go_df is defined and cleaned ---

# Identify unique blocks
blocks = sorted(go_df['prior_mean'].unique())

# Color setup
cmap = cm.get_cmap('YlOrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# Axis bounds
min_val = min(go_df['target_interval'].min(), go_df['reproduced_interval'].min())
max_val = max(go_df['target_interval'].max(), go_df['reproduced_interval'].max())

# --- Create subplot layout: 3 rows (trials, bias, std)
fig = make_subplots(
    rows=3, cols=1,
    row_heights=[0.5, 0.25, 0.25],
    shared_xaxes=True,
    vertical_spacing=0.05,
    subplot_titles=[
        'Reported vs Actual Interval (per Block)',
        'Bias (Mean Reported - Target)',
        'STD of Reported Interval (per Target)'
    ]
)

shapes = []

for i, iBlock in enumerate(blocks):
    block_df = go_df[go_df['prior_mean'] == iBlock]

    # Mean ± SEM summary
    summary = (
        block_df
        .groupby('target_interval')
        .agg(
            mean_repro=('reproduced_interval', 'mean'),
            sem_repro=('reproduced_interval', lambda x: x.std(ddof=1) / np.sqrt(len(x))),
            std_repro=('reproduced_interval', 'std')
        )
        .reset_index()
    )

    # --- Raw trials (row 1) ---
    fig.add_trace(
        go.Scatter(
            x=block_df['target_interval'],
            y=block_df['reproduced_interval'],
            mode='markers',
            name=f'Block {i+1} Trials',
            marker=dict(color=ordered_colors[i], size=3, line=dict(width=0.5, color='black')),
            opacity=0.4,
            legendgroup=f'block_{i}'
        ),
        row=1, col=1
    )

    # --- Mean ± SEM (row 1) ---
    fig.add_trace(
        go.Scatter(
            x=summary['target_interval'],
            y=summary['mean_repro'],
            error_y=dict(
                type='data',
                array=summary['sem_repro'],
                visible=True,
                thickness=1.5,
                width=5
            ),
            mode='lines+markers',
            name=f'Block {i+1} Mean ± SEM',
            marker=dict(color=ordered_colors[i], size=10, line=dict(color='black', width=1.5)),
            line=dict(color=ordered_colors[i], width=2),
            legendgroup=f'block_{i}'
        ),
        row=1, col=1
    )

    # --- Horizontal line at mean target interval (row 1 shape) ---
    mean_target = block_df['target_interval'].mean()
    shapes.append(dict(
        type="line",
        
        x0=min_val, x1=max_val,
        y0=mean_target, y1=mean_target,
        line=dict(color=ordered_colors[i], width=2, dash='dot'),
        layer='below'
    ))

    # --- Bias Plot (row 2) ---
    bias = np.abs(summary['mean_repro'] - summary['target_interval'])
    fig.add_trace(
        go.Scatter(
            x=summary['target_interval'],
            y=bias,
            mode='lines+markers',
            name=f'Block {i+1} Bias',
            marker=dict(color=ordered_colors[i], size=8, line=dict(color='black', width=1)),
            line=dict(color=ordered_colors[i], width=2),
            legendgroup=f'block_{i}',
            showlegend=False
        ),
        row=2, col=1
    )

    # --- STD Plot (row 3) ---
    fig.add_trace(
        go.Scatter(
            x=summary['target_interval'],
            y=summary['std_repro'],
            mode='lines+markers',
            name=f'Block {i+1} STD',
            marker=dict(color=ordered_colors[i], size=8, line=dict(color='black', width=1)),
            line=dict(color=ordered_colors[i], width=2),
            legendgroup=f'block_{i}',
            showlegend=False
        ),
        row=3, col=1
    )

# --- Unity line on row 1 ---
fig.add_trace(
    go.Scatter(
        x=[min_val, max_val],
        y=[min_val, max_val],
        mode='lines',
        name='Unity Line',
        line=dict(dash='dash', color='gray', width=2),
        showlegend=True
    ),
    row=1, col=1
)

# --- Final layout ---
fig.update_layout(
    width=900,
    height=850,
    margin=dict(l=80, r=200, t=80, b=80),
    plot_bgcolor='white',
    legend=dict(x=1.02, y=1),
    shapes=shapes
)

# --- Axes labels ---
fig.update_xaxes(title='Actual Interval (ms)', row=3, col=1)
fig.update_yaxes(title='Reported Interval (ms)', row=1, col=1, range=[min_val, max_val])
fig.update_yaxes(title='Bias (ms)', row=2, col=1, zeroline=True, zerolinecolor='gray', zerolinewidth=2)
fig.update_yaxes(title='STD (ms)', row=3, col=1)

fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')

# Show plot
fig.show()


In [32]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from matplotlib import cm
import matplotlib.colors as mcolors

# --- Assumes go_df is already defined and cleaned ---

# Get unique blocks
blocks = sorted(go_df['prior_mean'].unique())

# Color map
cmap = cm.get_cmap('YlOrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# --- Compute bias and std for each target_interval × block ---
summary_rows = []

for i, iBlock in enumerate(blocks):
    block_df = go_df[go_df['prior_mean'] == iBlock]
    color = ordered_colors[i]
    
    for interval in sorted(block_df['target_interval'].unique()):
        sub_df = block_df[block_df['target_interval'] == interval]
        if len(sub_df) < 2:
            continue  # Not enough data to compute std

        bias = np.abs(sub_df['reproduced_interval'] - sub_df['target_interval']).mean()
        std = sub_df['reproduced_interval'].std()

        summary_rows.append({
            'block': f'Block {i+1}',
            'prior_mean': iBlock,
            'target_interval': interval,
            'bias': bias,
            'std': std,
            'color': color
        })

# Convert to DataFrame
summary_df = pd.DataFrame(summary_rows)


import pandas as pd
import numpy as np
import plotly.graph_objects as go
from matplotlib import cm
import matplotlib.colors as mcolors

# --- Color Setup ---
blocks = sorted(summary_df['prior_mean'].unique())
cmap = cm.get_cmap('YlOrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# --- Initialize plot ---
traces = []

# --- Plot small dots for each target interval ---
for i, block_val in enumerate(blocks):
    block_data = summary_df[summary_df['prior_mean'] == block_val]

    traces.append(
        go.Scatter(
            x=block_data['bias'],
            y=block_data['std'],
            mode='markers',
            name=f'Block {i+1} Intervals',
            marker=dict(
                color=ordered_colors[i],
                size=6,
                line=dict(width=0.5, color='black')
            ),
            opacity=0.6,
            legendgroup=f'block_{i}',
            showlegend=True
        )
    )

    # --- Compute and plot block-wise mean ---
    mean_bias = block_data['bias'].mean()
    mean_std = block_data['std'].mean()

    traces.append(
        go.Scatter(
            x=[mean_bias],
            y=[mean_std],
            mode='markers',
            name=f'Block {i+1} Mean',
            marker=dict(
                color=ordered_colors[i],
                size=14,
                symbol='circle',
                line=dict(width=2, color='black')
            ),
            legendgroup=f'block_{i}',
            showlegend=True
        )
    )

# --- Layout ---
layout = go.Layout(
    title='Bias vs Variability per Target Interval (per Block)',
    xaxis=dict(title='|Bias| (ms)'),
    yaxis=dict(title='STD of Reproduced Interval (ms)'),
    width=700,
    height=600,
    legend=dict(title='Block', x=1.02),
    plot_bgcolor='white',
    margin=dict(l=80, r=160, t=80, b=80)
)

fig = go.Figure(data=traces, layout=layout)
fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')
fig.show()


In [22]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from matplotlib import cm
import matplotlib.colors as mcolors

# Assumes go_df contains: target_interval, reproduced_interval, prior_mean (block label)

# Get all unique blocks
blocks = sorted(go_df['prior_mean'].unique())

# Define color map
cmap = cm.get_cmap('OrRd', len(blocks)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(blocks))]

# Prepare traces
traces = []

for i, block_mean in enumerate(blocks):
    block_df = go_df[go_df['prior_mean'] == block_mean]

    # Group by target interval and compute std (scalar variability)
    variability = (
        block_df
        .groupby('target_interval')
        .agg(std_reproduced=('reproduced_interval', 'std'),
             n=('reproduced_interval', 'count'))
        .reset_index()
    )

    # Optionally compute SEM for the std itself (though often omitted)
    # variability['sem_std'] = variability['std_reproduced'] / np.sqrt(2 * (variability['n'] - 1))

    traces.append(
        go.Scatter(
            x=variability['target_interval'],
            y=variability['std_reproduced'],
            mode='lines+markers',
            name=f'Block {i+1} (Prior Mean {int(block_mean)} ms)',
            marker=dict(color=ordered_colors[i], size=8),
            line=dict(color=ordered_colors[i], width=2),
        )
    )

# Layout
layout = go.Layout(
    title='Scalar Variability vs Target Interval (by Block)',
    xaxis=dict(title='Target Interval (ms)'),
    yaxis=dict(title='STD of Reported Interval (ms)'),
    width=800,
    height=500,
    plot_bgcolor='white',
    legend=dict(x=1.05),
    margin=dict(l=80, r=160, t=80, b=80),
)

fig = go.Figure(data=traces, layout=layout)
fig.update_xaxes(showgrid=True, gridcolor='lightgray')
fig.update_yaxes(showgrid=True, gridcolor='lightgray')
fig.show()


# Error vs Delay  


In [5]:
# --- Error vs Set Duration Plot ---

# Compute mean ± std of error per set duration
error_summary = go_df.groupby('set_duration')['reproduction_error'].agg(['mean', 'sem', 'count']).reset_index()

error_trace = go.Scatter(
    x=error_summary['set_duration'],
    y=error_summary['mean'],
    error_y=dict(
        type='data',
        array=error_summary['sem'],
        visible=True
    ),
    mode='markers+lines',
    marker=dict(size=10, color='crimson'),
    line=dict(width=2),
    name='Mean Error ± STD'
)

layout2 = go.Layout(
    title='Reproduction Error vs Delay Duration',
    xaxis=dict(title='Delay Duration (ms)'),
    yaxis=dict(title='Mean Reproduction Error (ms)'),
    width=700,
    height=500,
    margin=dict(l=80, r=50, t=60, b=60),
    plot_bgcolor='white'
)

fig2 = go.Figure(data=[error_trace], layout=layout2)
fig2.update_xaxes(showgrid=True, gridcolor='lightgray')
fig2.update_yaxes(showgrid=True, gridcolor='lightgray')

fig2.show()


# Error vs Target Interval


In [6]:

# Compute mean ± std of error per Target Interval
error_summary = go_df.groupby('target_interval')['reproduction_error'].agg(['mean', 'std', 'count']).reset_index()

error_trace = go.Scatter(
    x=error_summary['target_interval'],
    y=error_summary['mean'],
    error_y=dict(
        type='data',
        array=error_summary['std'],
        visible=True
    ),
    mode='markers+lines',
    marker=dict(size=10, color='crimson'),
    line=dict(width=2),
    name='Mean Error ± STD'
)

layout2 = go.Layout(
    title='Reproduction Error vs target_interval',
    xaxis=dict(title='target_interval (ms)'),
    yaxis=dict(title='Mean Reproduction Error (ms)'),
    width=700,
    height=500,
    margin=dict(l=80, r=50, t=60, b=60),
    plot_bgcolor='white'
)

fig2 = go.Figure(data=[error_trace], layout=layout2)
fig2.update_xaxes(showgrid=True, gridcolor='lightgray')
fig2.update_yaxes(showgrid=True, gridcolor='lightgray')

fig2.show()


# Error vs Target Interval (split by delay)

In [14]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from matplotlib import cm
import matplotlib.colors as mcolors

# Filter and clean
go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')

# Group for mean error stats
grouped = go_df.groupby(['target_interval', 'set_duration'])['reproduction_error'].agg(['mean', 'std', 'count']).reset_index()

# Colors ordered from red (low) to blue (high)
set_durations = sorted(go_df['set_duration'].unique())
cmap = cm.get_cmap('OrRd', len(set_durations)+1)
ordered_colors = [mcolors.to_hex(cmap(i+1)) for i in range(len(set_durations))]

fig = go.Figure()

# Plot all data points (circles)
for i, sd in enumerate(set_durations):
    subset = go_df[go_df['set_duration'] == sd]
    fig.add_trace(go.Scatter(
        x=subset['target_interval'],
        y=subset['reproduction_error'],
        mode='markers',
        name=f'{sd} ms (trials)',
        marker=dict(color=ordered_colors[i], size=6, opacity=0.5, symbol='circle-open'),
        legendgroup=str(sd),
        showlegend=False
    ))

# Plot mean with error bars
for i, sd in enumerate(set_durations):
    mean_subset = grouped[grouped['set_duration'] == sd]
    fig.add_trace(go.Scatter(
        x=mean_subset['target_interval'],
        y=mean_subset['mean'],
        error_y=dict(
            type='data',
            array=mean_subset['std'],
            visible=True
        ),
        mode='lines+markers',
        name=f'{sd} ms (mean)',
        marker=dict(color=ordered_colors[i], size=8),
        line=dict(width=2, color=ordered_colors[i]),
        legendgroup=str(sd)
    ))

# Layout
fig.update_layout(
    title='Reproduction Error vs Target Interval (Individual + Mean with Error Bars)',
    xaxis_title='Target Interval (ms)',
    yaxis_title='Reproduction Error (ms)',
    width=900,
    height=550,
    plot_bgcolor='white',
    xaxis=dict(showgrid=True, gridcolor='lightgray'),
    yaxis=dict(showgrid=True, gridcolor='lightgray'),
    legend_title='Set Duration'
)

fig.show()


# Reported Interval vs Trial number in session

In [7]:

# Filter and clean
go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval'], inplace=True)
go_df['target_interval'] = pd.to_numeric(go_df['target_interval'], errors='coerce')
go_df['reproduced_interval'] = pd.to_numeric(go_df['reproduced_interval'], errors='coerce')

# Add a trial number column
go_df['trial_number'] = range(1, len(go_df) + 1)

# Set smoothing window
N = 5  # Number of trials to smooth over

# Get unique target intervals
target_intervals = sorted(go_df['target_interval'].unique())

# Plot
fig = go.Figure()

for ti in target_intervals:
    subset = go_df[go_df['target_interval'] == ti].copy()
    subset['smoothed'] = subset['reproduced_interval'].rolling(window=N, min_periods=1).mean()

    fig.add_trace(go.Scatter(
        x=subset['trial_number'],
        y=subset['smoothed'],
        mode='lines+markers',
        name=f'{int(ti)} ms',
        marker=dict(size=6),
        line=dict(width=2),
    ))

# Layout
fig.update_layout(
    title=f'Reproduced Interval vs Trial Number (Smoothed over {N} trials)',
    xaxis_title='Trial Number',
    yaxis_title='Smoothed Reproduced Interval (ms)',
    legend_title='Target Interval',
    template='simple_white',
    width=900,
    height=500
)

fig.show()


In [9]:

go_df = df[df['phase'] == 'go'].copy()
go_df.dropna(subset=['target_interval', 'reproduced_interval', 'reproduction_error'], inplace=True)
for col in ['target_interval', 'set_duration', 'reproduced_interval', 'reproduction_error']:
    go_df[col] = pd.to_numeric(go_df[col], errors='coerce')
go_df['trial_number'] = range(len(go_df))
error_std = go_df['reproduction_error'].std()
go_df = go_df[go_df['reproduction_error'].abs() <= 2 * error_std].reset_index(drop=True)

# === 1. Mean Error vs Target Interval ===
error_stats = go_df.groupby('target_interval')['reproduction_error'].agg(['mean', 'std']).reset_index()
px.scatter(error_stats, x='target_interval', y='mean', error_y='std',
           title='Mean Error vs Target Interval', labels={'mean': 'Reproduction Error (ms)'}).show()

# === 2. Scalar Variability (SD of reproduced interval) ===
scalar_var = go_df.groupby('target_interval')['reproduced_interval'].std().reset_index()
px.line(scalar_var, x='target_interval', y='reproduced_interval',
           title='Scalar Variability', labels={'reproduced_interval': 'SD of Reproduced Interval (ms)'}).show()

# === 3. Regression Toward the Mean ===
px.scatter(go_df, x='target_interval', y='reproduced_interval', trendline="ols",
           title='Regression Toward the Mean').show()

# === 4. Reproduction Error vs Set Duration ===
error_set = go_df.groupby('set_duration')['reproduction_error'].agg(['mean', 'std']).reset_index()
px.scatter(error_set, x='set_duration', y='mean', error_y='std',
           title='Error vs Set Duration', labels={'mean': 'Reproduction Error (ms)'}).show()

# === 5. Error by Target Interval x Set Duration ===
interaction = go_df.groupby(['target_interval', 'set_duration'])['reproduction_error'].mean().reset_index()
px.line(interaction, x='target_interval', y='reproduction_error', color='set_duration',
        title='Error by Target Interval x Set Duration').show()

# === 6. Trial-by-Trial Reproduced Interval ===
px.line(go_df, x='trial_number', y='reproduced_interval', color='target_interval',
        title='Reproduced Interval Across Trials').show()

# === 7. Autocorrelation of Errors ===
lags = list(range(1, 10))
autocorr_vals = [go_df['reproduction_error'].autocorr(lag=l) for l in lags]
px.bar(x=lags, y=autocorr_vals, labels={'x': 'Lag', 'y': 'Autocorrelation'},
       title='Autocorrelation of Reproduction Error').show()

# === 8. Error Distributions by Set Duration ===
px.violin(go_df, x='set_duration', y='reproduction_error', box=True, points='all',
          title='Error Distributions by Set Duration').show()

# === 9. Coefficient of Variation (CV) by Target Interval ===
cv = go_df.groupby('target_interval')['reproduced_interval'].agg(['mean', 'std']).reset_index()
cv['cv'] = cv['std'] / cv['mean']
px.line(cv, x='target_interval', y='cv', title='Coefficient of Variation by Target Interval').show()

# === 10. Outlier Detection (±2 SD) ===
threshold = 2 * go_df['reproduction_error'].std()
go_df['is_outlier'] = go_df['reproduction_error'].abs() > threshold
px.scatter(go_df, x='trial_number', y='reproduction_error', color='is_outlier',
           title='Outliers in Reproduction Error').show()
