In [1]:
# all imports, numpy, scipy, matplotlib, seaborn, pandas, plotly
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import sklearn 
import os
from matplotlib import cm as colours 
from colorama import Fore, Back, Style
from scipy.stats import sem
import joblib
from scipy.stats import ttest_rel, ttest_ind
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon
import plotly.io as pio






In [2]:
Accuracy_df_Kumar_MFA = pd.read_csv("Accuracy_df_Kumar_MFA.csv")
# rename column name from method to Annotator 

Accuracy_df_Kumar_MFA = Accuracy_df_Kumar_MFA.rename(columns= {'Method': "Aligner"})
# change the values inside column position to make them all capital letters
Accuracy_df_Kumar_MFA['Position'] = Accuracy_df_Kumar_MFA['Position'].str.upper()
# change kumar to human 
Accuracy_df_Kumar_MFA['Aligner'] = Accuracy_df_Kumar_MFA['Aligner'].replace(['Kumar', 'MFA'], ['Manual', 'Automated'])
print(Accuracy_df_Kumar_MFA.head(20))
# print shape of the dataframe
print(Accuracy_df_Kumar_MFA.shape)

   Patient Position    Aligner  Accuracy
0      S14       P1     Manual  0.604167
1      S14       P1     Manual  0.576389
2      S14       P1     Manual  0.583333
3      S14       P1     Manual  0.569444
4      S14       P1     Manual  0.597222
5      S14       P1     Manual  0.576389
6      S14       P1     Manual  0.576389
7      S14       P1     Manual  0.597222
8      S14       P1     Manual  0.604167
9      S14       P1     Manual  0.611111
10     S14       P1  Automated  0.398649
11     S14       P1  Automated  0.405405
12     S14       P1  Automated  0.385135
13     S14       P1  Automated  0.398649
14     S14       P1  Automated  0.378378
15     S14       P1  Automated  0.425676
16     S14       P1  Automated  0.405405
17     S14       P1  Automated  0.385135
18     S14       P1  Automated  0.405405
19     S14       P1  Automated  0.371622
(300, 4)


In [3]:
Accuracy_df_Kumar_MFA = Accuracy_df_Kumar_MFA[Accuracy_df_Kumar_MFA['Patient'] != 'S14']

patient_rename = {
    'S26': 'S1',
    'S23': 'S2',
    'S33': 'S3',
    'S22': 'S4'
}
Accuracy_df_Kumar_MFA['Patient'] = Accuracy_df_Kumar_MFA['Patient'].replace(patient_rename)
Accuracy_df_Kumar_MFA.head()

Unnamed: 0,Patient,Position,Aligner,Accuracy
60,S4,P1,Manual,0.18543
61,S4,P1,Manual,0.172185
62,S4,P1,Manual,0.18543
63,S4,P1,Manual,0.18543
64,S4,P1,Manual,0.13245


In [4]:
print(f"Number of Accuracy scores before grouping and averaging \n {Accuracy_df_Kumar_MFA.groupby(['Patient', 'Position', 'Aligner']).count().Accuracy.iloc[0]}")
Accuracy_df4stats = Accuracy_df_Kumar_MFA.groupby(['Patient', 'Position', 'Aligner'])['Accuracy'].mean().reset_index()
print(f"Number of Accuracy scores AFTER grouping and averaging \n {Accuracy_df4stats.groupby(['Patient', 'Position', 'Aligner']).count().Accuracy.iloc[0]}")

Accuracy_df_Kumar_MFA.head(len(Accuracy_df_Kumar_MFA))

Number of Accuracy scores before grouping and averaging 
 10
Number of Accuracy scores AFTER grouping and averaging 
 1


Unnamed: 0,Patient,Position,Aligner,Accuracy
60,S4,P1,Manual,0.185430
61,S4,P1,Manual,0.172185
62,S4,P1,Manual,0.185430
63,S4,P1,Manual,0.185430
64,S4,P1,Manual,0.132450
...,...,...,...,...
295,S3,P3,Automated,0.239130
296,S3,P3,Automated,0.282609
297,S3,P3,Automated,0.217391
298,S3,P3,Automated,0.239130


## All (dist: patients), positions dropped, manual vs. auto

In [5]:
acc_df_kumar_mfa_pt_avg = Accuracy_df_Kumar_MFA.drop(columns=['Position']).groupby(['Patient', 'Aligner'])['Accuracy'].mean().reset_index()
print(acc_df_kumar_mfa_pt_avg)

  Patient    Aligner  Accuracy
0      S1  Automated  0.573826
1      S1     Manual  0.559459
2      S2  Automated  0.240260
3      S2     Manual  0.265784
4      S3  Automated  0.352899
5      S3     Manual  0.352174
6      S4  Automated  0.261623
7      S4     Manual  0.216998


In [21]:
import plotly.express as px

# Base color mapping for aligners
colorsmap = {
    'Manual': px.colors.qualitative.Vivid[1],
    'Automated': px.colors.qualitative.Vivid[0]
}

# Use a completely different palette for patients
patient_colors = px.colors.qualitative.Dark24  # rich, distinct palette
patients = acc_df_kumar_mfa_pt_avg["Patient"].unique()
patient_color_map = {
    patient: patient_colors[i % len(patient_colors)]
    for i, patient in enumerate(patients)
}

# --- Patient lines (colored uniquely) ---
fig = px.line(
    acc_df_kumar_mfa_pt_avg,
    x='Aligner',
    y='Accuracy',
    color='Patient',
    color_discrete_map=patient_color_map
)

# --- Overlay boxplots (aligner colors unchanged) ---
boxplt = px.box(
    acc_df_kumar_mfa_pt_avg,
    x='Aligner',
    y='Accuracy',
    color='Aligner',
    color_discrete_sequence=px.colors.qualitative.Vivid,
    color_discrete_map=colorsmap
)
fig.add_traces(boxplt.data)

# --- Chance line ---
fig.add_hline(
    y=1/9,
    line_dash='dash',
    line_color='gray',
    annotation_text='Chance',
    annotation_position='bottom right'
)

# --- Layout ---
fig.update_layout(
    plot_bgcolor='white',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.02,
        xanchor='left',
        x=0.01,
        title_text=''
    ),
    font=dict(size=20, family='Arial'),
)
fig.update_xaxes(
    title='',
    mirror=False,
    ticks='outside',
    showline=True,
    linecolor='black',
    showgrid=False,
    autorange='reversed'
)
fig.update_yaxes(
    title='Phoneme decoding accuracy',
    mirror=False,
    ticks='outside',
    showline=True,
    linecolor='black',
    showgrid=False,
    rangemode='tozero'
)
fig.write_image(r"Figures\Figure 4\October_2025\manualvsautomated_DecodingAcc_linedashedbyPatients5.svg", format='svg')
fig.show()


In [7]:
kumar_array = acc_df_kumar_mfa_pt_avg[acc_df_kumar_mfa_pt_avg['Aligner'] == 'Manual']['Accuracy']
mfa_array = acc_df_kumar_mfa_pt_avg[acc_df_kumar_mfa_pt_avg['Aligner'] == 'Automated']['Accuracy']
result = wilcoxon(kumar_array, mfa_array)
print(result)
print("***: p < 0.001\n** : p < 0.01\n*  : p < 0.05\nn.s.: p > 0.05")


WilcoxonResult(statistic=3.0, pvalue=0.625)
***: p < 0.001
** : p < 0.01
*  : p < 0.05
n.s.: p > 0.05


## Here we do across positions, with dist being patients, averaged across iterations

In [8]:
acc_df_kumar_mfa_pos_avgiter = Accuracy_df_Kumar_MFA.groupby(['Patient', 'Position', 'Aligner'])['Accuracy'].mean().reset_index()
print(acc_df_kumar_mfa_pos_avgiter)


   Patient Position    Aligner  Accuracy
0       S1       P1  Automated  0.602013
1       S1       P1     Manual  0.597973
2       S1       P2  Automated  0.552349
3       S1       P2     Manual  0.560811
4       S1       P3  Automated  0.567114
5       S1       P3     Manual  0.519595
6       S2       P1  Automated  0.268182
7       S2       P1     Manual  0.267550
8       S2       P2  Automated  0.264286
9       S2       P2     Manual  0.293377
10      S2       P3  Automated  0.188312
11      S2       P3     Manual  0.236424
12      S3       P1  Automated  0.469565
13      S3       P1     Manual  0.480435
14      S3       P2  Automated  0.317391
15      S3       P2     Manual  0.280435
16      S3       P3  Automated  0.271739
17      S3       P3     Manual  0.295652
18      S4       P1  Automated  0.210526
19      S4       P1     Manual  0.166225
20      S4       P2  Automated  0.330263
21      S4       P2     Manual  0.323841
22      S4       P3  Automated  0.244079
23      S4      

In [9]:
for pos in acc_df_kumar_mfa_pos_avgiter['Position'].unique():
    print(pos)
    kumar_array = acc_df_kumar_mfa_pos_avgiter[
        (acc_df_kumar_mfa_pos_avgiter['Aligner'] == 'Manual') & 
        (acc_df_kumar_mfa_pos_avgiter['Position'] == pos)]
    mfa_array = acc_df_kumar_mfa_pos_avgiter[
        (acc_df_kumar_mfa_pos_avgiter['Aligner'] == 'Automated') & 
        (acc_df_kumar_mfa_pos_avgiter['Position'] == pos)]
    # check if patient order is the same in both arrays
    # if not, print warning
    if not all(kumar_array['Patient'].values == mfa_array['Patient'].values):
        print("Warning: Patient order mismatch between Kumar and MFA arrays")    
    result = wilcoxon(kumar_array['Accuracy'], mfa_array['Accuracy'])
    #print(kumar_array)
    #print("-"*10)
    #print(mfa_array)
    print("wilcoxon result:")
    print(result)



P1
wilcoxon result:
WilcoxonResult(statistic=3.0, pvalue=0.625)
P2
wilcoxon result:
WilcoxonResult(statistic=5.0, pvalue=1.0)
P3
wilcoxon result:
WilcoxonResult(statistic=4.0, pvalue=0.875)


In [22]:
colorsmap = {
    'Manual': px.colors.qualitative.Vivid[1],
    'Automated': px.colors.qualitative.Vivid[0]
}
fig = px.box(acc_df_kumar_mfa_pos_avgiter,
    y='Accuracy',
    x='Position',
    color='Aligner',
    points='all',  # Shows stripplot-like points
    boxmode='group',
    hover_name='Patient',
    hover_data=['Patient', 'Accuracy', 'Position'],
    color_discrete_sequence= px.colors.qualitative.Vivid, 
    color_discrete_map= colorsmap,
    category_orders={"Aligner":['Manual', 'Automated']}     
    )
fig.update_traces(marker=dict(size=5,opacity=0.9), jitter=0.3, boxmean=True)

fig.add_hline(
    y=1/9,
    line_dash='dash',
    line_color='gray',
    annotation_text='Chance',
    annotation_position='bottom right'
)

# Add chance level (horizontal line)
fig.update_yaxes(rangemode="tozero", showline=True, linecolor="black", linewidth=2, ticks='outside')
fig.update_xaxes(showline=True, linecolor="black", linewidth=2, ticks='outside')
fig.update_layout(
    plot_bgcolor='white',
    legend=None,
    title_text='',
    title_x=0.5,
    font=dict(size=20, family='Arial')
)
fig.write_image(r"Figures\Figure 4\October_2025\manualvsautomated_DecodingAccByPosPlotlyavgd5.svg", format='svg')
fig.show()


In [None]:
diffb = 17.062 - 24.203
diffa = 18.201 - 25.119 
print(diffb, diffa) 

# w = 0.871, h = 0.987

-7.140999999999998 -6.917999999999999


## Here we do per patient, distributions being iterations, and we average across positions cuz who cares about their effect on patient/method

In [23]:
acc_df_kumar_mfa_iter_avgpos = Accuracy_df_Kumar_MFA.sort_values(
    ['Patient', 'Aligner', 'Position']
).reset_index(drop=True)

# create iteration index (0–9 for each patient×method×position)
acc_df_kumar_mfa_iter_avgpos['Iteration'] = (
    acc_df_kumar_mfa_iter_avgpos.groupby(['Patient', 'Aligner', 'Position']).cumcount()
)

# average across positions, keeping 10 iterations per patient×method
acc_df_kumar_mfa_iter_avgpos = (
    acc_df_kumar_mfa_iter_avgpos.drop(columns=['Position'])
    .groupby(['Patient', 'Aligner', 'Iteration'], as_index=False)['Accuracy']
    .mean()
)

acc_df_kumar_mfa_iter_avgpos.head()

Unnamed: 0,Patient,Aligner,Iteration,Accuracy
0,S1,Automated,0,0.577181
1,S1,Automated,1,0.563758
2,S1,Automated,2,0.572707
3,S1,Automated,3,0.57047
4,S1,Automated,4,0.577181


In [24]:
mean1, mean2 = acc_df_kumar_mfa_iter_avgpos[
        (acc_df_kumar_mfa_iter_avgpos['Aligner'] == 'Manual') & 
        (acc_df_kumar_mfa_iter_avgpos['Patient'] == 'S1')]['Accuracy'].mean() , acc_df_kumar_mfa_iter_avgpos[
        (acc_df_kumar_mfa_iter_avgpos['Aligner'] == 'Automated') & 
        (acc_df_kumar_mfa_iter_avgpos['Patient'] == 'S1')]['Accuracy'].mean()
        
print(mean1, mean2)

0.5594594594594595 0.5738255033557047


In [25]:
for pat in acc_df_kumar_mfa_iter_avgpos['Patient'].unique():
    print(pat)
    kumar_array = acc_df_kumar_mfa_iter_avgpos[
        (acc_df_kumar_mfa_iter_avgpos['Aligner'] == 'Manual') & 
        (acc_df_kumar_mfa_iter_avgpos['Patient'] == pat)]['Accuracy']
    mfa_array = acc_df_kumar_mfa_iter_avgpos[
        (acc_df_kumar_mfa_iter_avgpos['Aligner'] == 'Automated') & 
        (acc_df_kumar_mfa_iter_avgpos['Patient'] == pat)]['Accuracy']    
    result = mannwhitneyu(kumar_array, mfa_array)
    print(result)



S1
MannwhitneyuResult(statistic=3.0, pvalue=0.000418144432661332)
S2
MannwhitneyuResult(statistic=98.0, pvalue=0.00031970214768035134)
S3
MannwhitneyuResult(statistic=48.5, pvalue=0.9393087523212691)
S4
MannwhitneyuResult(statistic=0.0, pvalue=0.0001746242085521927)


In [26]:
orderforplot = {
    "Difference_Variable": ['Onset', 'Offset', 'Duration'], 
    "Phoneme": ['a', 'ae', 'b', 'g', 'i', 'k', 'p', 'u', 'v'], 
    "Patient": ['S1', 'S2', 'S3', 'S4', 'S5'], 
    "Aligner":['Manual', 'Automated']
    
}

colorsmap = {
    'Manual': px.colors.qualitative.Vivid[1],
    'Automated': px.colors.qualitative.Vivid[0]
}
fig = px.box(acc_df_kumar_mfa_iter_avgpos,
    y='Accuracy',
    x='Patient',
    color='Aligner',
    points='all',  # Shows stripplot-like points
    boxmode='group', 
    hover_data=['Patient', 'Aligner', 'Accuracy', 'Iteration'],
    color_discrete_sequence= px.colors.qualitative.Vivid, 
    color_discrete_map= colorsmap, 
    category_orders=orderforplot
    )
fig.update_traces(jitter=0.3)
fig.add_hline(
    y=1/9,
    line_dash='dash',
    line_color='gray',
    annotation_text='Chance',
    annotation_position='bottom right'
)
fig.update_yaxes(rangemode="tozero", showline=True, linecolor="black", linewidth=2, ticks='outside')
fig.update_xaxes(showline=True, linecolor="black", linewidth=2, ticks='outside')
fig.update_layout(
    plot_bgcolor='white',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.02,
        xanchor='left',
        x=0.01,
        title_text=''
    ),
    title_text='',
    title_x=0.5,
    font=dict(size=20, family='Arial')
)
fig.write_image(r"Figures\Figure 4\October_2025\manualvsautomated_DecodingAccByPatPlotlyavgd5.svg", format='svg')
fig.show()


# Decoding TW Analyses 

In [27]:
Accuracy_DWs1 = pd.read_csv('Accuracy_DWs.csv')
Accuracy_DWs1.head()

Unnamed: 0,Patient,Position,Method,Decoding TW,Accuracy
0,S14,p1,Kumar,1S,0.604167
1,S14,p1,Kumar,1S,0.597222
2,S14,p1,Kumar,1S,0.604167
3,S14,p1,Kumar,1S,0.590278
4,S14,p1,Kumar,1S,0.604167


In [28]:
Accuracy_DWs2 = pd.read_csv('Accuracy_DWs2.csv')
Accuracy_DWs2.head()

Unnamed: 0,Patient,Position,Method,Decoding TW,Accuracy
0,S14,p1,Kumar,0.1S,0.444444
1,S14,p1,Kumar,0.1S,0.416667
2,S14,p1,Kumar,0.1S,0.402778
3,S14,p1,Kumar,0.1S,0.416667
4,S14,p1,Kumar,0.1S,0.416667


In [29]:
Accuracy_DWs3 = pd.read_csv('Accuracy_DWs3.csv')
Accuracy_DWs3.head()

Unnamed: 0,Patient,Position,Method,Decoding TW,Accuracy
0,S14,p1,Kumar,0.025S,0.333333
1,S14,p1,Kumar,0.025S,0.340278
2,S14,p1,Kumar,0.025S,0.333333
3,S14,p1,Kumar,0.025S,0.340278
4,S14,p1,Kumar,0.025S,0.333333


In [30]:
Accuracy_DWs = pd.concat([Accuracy_DWs1, Accuracy_DWs2, Accuracy_DWs3], axis=0)
Accuracy_DWs.head(20)

Unnamed: 0,Patient,Position,Method,Decoding TW,Accuracy
0,S14,p1,Kumar,1S,0.604167
1,S14,p1,Kumar,1S,0.597222
2,S14,p1,Kumar,1S,0.604167
3,S14,p1,Kumar,1S,0.590278
4,S14,p1,Kumar,1S,0.604167
5,S14,p1,Kumar,1S,0.576389
6,S14,p1,Kumar,1S,0.583333
7,S14,p1,Kumar,1S,0.611111
8,S14,p1,Kumar,1S,0.590278
9,S14,p1,Kumar,1S,0.555556


In [31]:
Accuracy_DWs['Decoding TW'].unique()[::-1]

array(['0.01S', '0.015S', '0.025S', '0.05S', '0.1S', '0.2S', '0.4S',
       '0.6S', '0.8S', '1S'], dtype=object)

In [32]:
Accuracy_DWs = Accuracy_DWs[~Accuracy_DWs['Decoding TW'].isin(['0.015S', '0.025S', '0.05S'])]
Accuracy_DWs['Decoding TW'].unique()

array(['1S', '0.8S', '0.6S', '0.4S', '0.2S', '0.1S', '0.01S'],
      dtype=object)

In [33]:
Accuracy_DWs = Accuracy_DWs.rename(columns= {'Method': "Aligner"})
# change the values inside column position to make them all capital letters
Accuracy_DWs['Position'] = Accuracy_DWs['Position'].str.upper()
# change kumar to human 
Accuracy_DWs['Aligner'] = Accuracy_DWs['Aligner'].replace(['Kumar', 'MFA'], ['Manual', 'Automated'])

Accuracy_DWs = Accuracy_DWs[Accuracy_DWs['Patient'] != 'S14']

patient_rename = {
    'S26': 'S1',
    'S23': 'S2',
    'S33': 'S3',
    'S22': 'S4'
}
Accuracy_DWs['Patient'] = Accuracy_DWs['Patient'].replace(patient_rename)
print(Accuracy_DWs.shape)
Accuracy_DWs.head()

(1680, 5)


Unnamed: 0,Patient,Position,Aligner,Decoding TW,Accuracy
300,S4,P1,Manual,1S,0.15894
301,S4,P1,Manual,1S,0.178808
302,S4,P1,Manual,1S,0.152318
303,S4,P1,Manual,1S,0.13245
304,S4,P1,Manual,1S,0.18543


In [34]:
print(f"Number of Accuracy scores before grouping and averaging \n {Accuracy_DWs.drop(columns=['Position']).groupby(['Patient', 'Aligner', 'Decoding TW']).count().Accuracy.iloc[0]}")
Accuracy_df4statsdw = Accuracy_DWs.drop(columns=['Position']).groupby(['Patient', 'Aligner', 'Decoding TW'])['Accuracy'].mean().reset_index()
print(f"Number of Accuracy scores AFTER grouping and averaging \n {Accuracy_df4statsdw.groupby(['Patient', 'Aligner', 'Decoding TW']).count().Accuracy.iloc[0]}")

Accuracy_df4statsdw.head(len(Accuracy_df4statsdw))

Number of Accuracy scores before grouping and averaging 
 30
Number of Accuracy scores AFTER grouping and averaging 
 1


Unnamed: 0,Patient,Aligner,Decoding TW,Accuracy
0,S1,Automated,0.01S,0.319239
1,S1,Automated,0.1S,0.379642
2,S1,Automated,0.2S,0.443848
3,S1,Automated,0.4S,0.514765
4,S1,Automated,0.6S,0.562416
5,S1,Automated,0.8S,0.56443
6,S1,Automated,1S,0.579418
7,S1,Manual,0.01S,0.308784
8,S1,Manual,0.1S,0.397523
9,S1,Manual,0.2S,0.440541


In [35]:
for dw in Accuracy_df4statsdw['Decoding TW'].unique():
    print(dw)
    kumar_array = Accuracy_df4statsdw[
        (Accuracy_df4statsdw['Aligner'] == 'Manual') & 
        (Accuracy_df4statsdw['Decoding TW'] == dw)]['Accuracy']
    mfa_array = Accuracy_df4statsdw[
        (Accuracy_df4statsdw['Aligner'] == 'Automated') & 
        (Accuracy_df4statsdw['Decoding TW'] == dw)]['Accuracy']    
    result = wilcoxon(kumar_array, mfa_array)
    print(result)



0.01S
WilcoxonResult(statistic=3.0, pvalue=0.625)
0.1S
WilcoxonResult(statistic=2.0, pvalue=0.375)
0.2S
WilcoxonResult(statistic=5.0, pvalue=1.0)
0.4S
WilcoxonResult(statistic=2.0, pvalue=0.375)
0.6S
WilcoxonResult(statistic=4.0, pvalue=0.875)
0.8S
WilcoxonResult(statistic=4.0, pvalue=0.875)
1S
WilcoxonResult(statistic=3.0, pvalue=0.625)


# General Trend for DWs

In [38]:
per_patient = Accuracy_df4statsdw.groupby(
    ["Aligner", "Decoding TW", "Patient"], as_index=False
)["Accuracy"].mean()

summary = per_patient.groupby(
    ["Aligner", "Decoding TW"], as_index=False
).agg(
    mean_acc=("Accuracy", "mean"),
    sd=("Accuracy", "std"),
    n=("Accuracy", "count")
)

tw_map = {
    "1S": 1.0,
    "0.8S": 0.8,
    "0.6S": 0.6,
    "0.4S": 0.4,
    "0.2S": 0.2, 
    "0.1S": 0.1, 
    "0.01S": 0.01
}
summary["TW_value"] = summary["Decoding TW"].map(tw_map)

summary["sem"] = summary["sd"] / np.sqrt(summary["n"])
summary["upper"] = summary["mean_acc"] + summary["sem"]
summary["lower"] = summary["mean_acc"] - summary["sem"]

aligners_to_plot = ["Manual", "Automated"]
df_plot = summary

fig = go.Figure()

for aligner in aligners_to_plot:
    df_a = df_plot[df_plot["Aligner"] == aligner].sort_values("TW_value")
    color = colorsmap[aligner]

    fig.add_trace(
        go.Scatter(
            x=pd.concat([df_a["TW_value"], df_a["TW_value"][::-1]]),
            y=pd.concat([df_a["upper"], df_a["lower"][::-1]]),
            fill='toself',
            fillcolor=color.replace("rgb", "rgba").replace(")", ",0.2)"),  
            line=dict(color='rgba(255,255,255,0)'),
            hoverinfo="skip",
            showlegend=False
        )
    )
    
    fig.add_trace(
        go.Scatter(
            x=df_a["TW_value"],
            y=df_a["mean_acc"],
            mode="lines+markers",
            name=aligner,
            line=dict(width=2, color=color)
        )
    )

fig.add_hline(
    y=1/9,
    line_dash="dash",
    line_color="red",
    annotation_text="Chance",
    annotation_position="bottom right"
)

fig.update_yaxes(
    title="Decoding Accuracy",
    rangemode="tozero",
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)
fig.update_xaxes(
    type = 'log',
    title="Decoding TW",
    tickvals=list(tw_map.values()),
    ticktext=list(tw_map.keys()),
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)
fig.update_layout(
    plot_bgcolor='white',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.02,
        xanchor='left',
        x=0.01,
        title_text=''
    ),
    title_text='',
    title_x=0.5,
    font=dict(size=20, family='Arial')
)
#fig.write_image(r"Figures\Figure 4\manualvsautomated_DecodingAccByDTWLinePlot3.pdf", format="pdf")

fig.show()


# Same but per patient (I like more)

In [None]:
tw_map

In [39]:
tw_mapms = {
    '1S': '1000',
    '0.8S': '800',
    '0.6S': '600',
    '0.4S': '400',
    '0.2S': '200',
    '0.1S': '100',
    '0.01S': '10'
    }


In [43]:
Accuracy_df4statsdw.head()

Unnamed: 0,Patient,Aligner,Decoding TW,Accuracy
0,S1,Automated,0.01S,0.319239
1,S1,Automated,0.1S,0.379642
2,S1,Automated,0.2S,0.443848
3,S1,Automated,0.4S,0.514765
4,S1,Automated,0.6S,0.562416


In [None]:
for pat in Accuracy_df4statsdw.Patient.unique():
    print(pat)
    for dw in Accuracy_df4statsdw['Decoding TW'].unique():
        print(dw)
        kumar_array = Accuracy_df4statsdw[
            (Accuracy_df4statsdw['Aligner'] == 'Manual') & 
            (Accuracy_df4statsdw['Decoding TW'] == dw) &
            (Accuracy_df4statsdw['Patient'] == pat)
            ]['Accuracy']
        mfa_array = Accuracy_df4statsdw[
            (Accuracy_df4statsdw['Aligner'] == 'Automated') & 
            (Accuracy_df4statsdw['Decoding TW'] == dw) &
            (Accuracy_df4statsdw['Patient'] == pat)]['Accuracy'] 
        print(kumar_array, mfa_array)   
        result = wilcoxon(kumar_array, mfa_array)
        print(result)
        
# actually there is no distribution... makes sense why pvalue is 1



S1
0.01S
7    0.308784
Name: Accuracy, dtype: float64 0    0.319239
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
0.1S
8    0.397523
Name: Accuracy, dtype: float64 1    0.379642
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
0.2S
9    0.440541
Name: Accuracy, dtype: float64 2    0.443848
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
0.4S
10    0.511261
Name: Accuracy, dtype: float64 3    0.514765
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
0.6S
11    0.53964
Name: Accuracy, dtype: float64 4    0.562416
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
0.8S
12    0.559009
Name: Accuracy, dtype: float64 5    0.56443
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
1S
13    0.561486
Name: Accuracy, dtype: float64 6    0.579418
Name: Accuracy, dtype: float64
WilcoxonResult(statistic=0.0, pvalue=1.0)
S2
0.01S
21    0.177704
Name: Accurac

In [50]:
import plotly.graph_objects as go

# Prepare data: one line per patient × aligner
df_plot = per_patient.copy()
df_plot["TW_value"] = df_plot["Decoding TW"].map(tw_map)

fig = go.Figure()

# assign each patient a dash style
dash_styles = ["dot", "dash", "dashdot", "longdash", "longdashdot"]
patients = df_plot["Patient"].unique()
patient_dash_map = {
    patient: dash_styles[i % len(dash_styles)] for i, patient in enumerate(patients)
}

# Real traces (hidden from legend)
for patient in patients:
    for aligner in ["Manual", "Automated"]:
        df_sub = df_plot[(df_plot["Patient"] == patient) & (df_plot["Aligner"] == aligner)]
        df_sub = df_sub.sort_values("TW_value")
        
        fig.add_trace(
            go.Scatter(
                x=df_sub["TW_value"],
                y=df_sub["Accuracy"],
                mode="lines+markers",
                name=f"{aligner} - P{patient}",
                line=dict(
                    color=colorsmap[aligner],
                    dash=patient_dash_map[patient],
                    width=2
                ),
                showlegend=False  # suppress patient × aligner combos
            )
        )

# Dummy traces for legend: aligners (color only)
for aligner in ["Manual", "Automated"]:
    fig.add_trace(
        go.Scatter(
            x=[None], y=[None],
            mode="lines",
            line=dict(color=colorsmap[aligner], width=2),
            name=aligner, 
        )
    )

# Dummy traces for legend: patients (dash only, neutral color)
for patient in patients:
    fig.add_trace(
        go.Scatter(
            x=[None], y=[None],
            mode="lines",
            line=dict(color="gray", dash=patient_dash_map[patient], width=2),
            name=f"Patient {patient}"
        )
    )

# Chance line
fig.add_hline(
    y=1/9,
    line_dash="dash",
    line_color="gray",
    annotation_text="Chance",
    annotation_position="bottom right"
)

# Axes formatting
fig.update_yaxes(
    title="Decoding Accuracy",
    rangemode="tozero",
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)
fig.update_xaxes(
    title="Decoding TW",
    tickvals=list(tw_map.values()),
    ticktext=list(tw_mapms.values()),
    tickfont=dict(size=12),
    automargin=True,
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)
fig.update_layout(
    plot_bgcolor='white',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.02,
        xanchor='left',
        x=0.01,
        title_text=''
    ),
    title_text='',
    title_x=0.5,
    font=dict(size=20, family='Arial'), 
    width=2200
)
fig.write_image(r"Figures\Figure 4\October_2025\manualvsautomated_DecodingAccByDTWLinePlotbypat5.pdf", format="pdf")
fig.show()


# Both at the same time attempt (I don't like-- too much info)

In [None]:
import plotly.graph_objects as go

# --- Prepare summary data ---
summary["upper"] = summary["mean_acc"] + summary["sem"]
summary["lower"] = summary["mean_acc"] - summary["sem"]

aligners_to_plot = ["Manual", "Automated"]
df_summary = summary
df_patients = per_patient.copy()
df_patients["TW_value"] = df_patients["Decoding TW"].map(tw_map)

# --- Unique dash + marker combinations ---
dash_styles = ["dot", "dash", "dashdot", "longdash", "longdashdot", "solid"]
marker_symbols = ["circle", "square", "triangle-up", "diamond", "cross", "x"]
patients = df_patients["Patient"].unique()
patient_style_map = {
    patient: {"dash": dash_styles[i % len(dash_styles)], "marker": marker_symbols[i % len(marker_symbols)]}
    for i, patient in enumerate(patients)
}

# --- Create figure ---
fig = go.Figure()

# --- Add SEM shaded regions and mean lines ---
for aligner in aligners_to_plot:
    df_a = df_summary[df_summary["Aligner"] == aligner].sort_values("TW_value")
    color = colorsmap[aligner]

    # SEM shading
    fig.add_trace(
        go.Scatter(
            x=pd.concat([df_a["TW_value"], df_a["TW_value"][::-1]]),
            y=pd.concat([df_a["upper"], df_a["lower"][::-1]]),
            fill='toself',
            fillcolor=color.replace("rgb", "rgba").replace(")", ",0.2)"),
            line=dict(color='rgba(255,255,255,0)'),
            hoverinfo="skip",
            showlegend=False
        )
    )

    # Mean line
    fig.add_trace(
        go.Scatter(
            x=df_a["TW_value"],
            y=df_a["mean_acc"],
            mode="lines+markers",
            line=dict(width=3, color=color),
            marker=dict(symbol="circle", size=8),
            name=f"{aligner} mean",
            showlegend=True
        )
    )

# --- Add per-patient lines with unique styles ---
for patient in patients:
    style = patient_style_map[patient]
    for aligner in aligners_to_plot:
        df_sub = df_patients[(df_patients["Patient"] == patient) & (df_patients["Aligner"] == aligner)]
        df_sub = df_sub.sort_values("TW_value")

        fig.add_trace(
            go.Scatter(
                x=df_sub["TW_value"],
                y=df_sub["Accuracy"],
                mode="lines+markers",
                line=dict(color=colorsmap[aligner], dash=style["dash"], width=2),
                marker=dict(symbol=style["marker"], size=7),
                name=f"Patient {patient}",
                showlegend=(aligner == "Manual")  # only show once
            )
        )

# --- Chance line ---
fig.add_hline(
    y=1/9,
    line_dash="dash",
    line_color="red",
    annotation_text="Chance",
    annotation_position="bottom right"
)

# --- Axes formatting ---
fig.update_yaxes(
    title="Decoding Accuracy",
    rangemode="tozero",
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)
fig.update_xaxes(
    title="Decoding TW",
    tickvals=list(tw_map.values()),
    ticktext=list(tw_map.keys()),
    showline=True, linecolor="black", linewidth=2, ticks="outside"
)

#fig.write_image(r"Figures\Figure 4\manualvsautomated_DecodingAccByDTWLinePlotbypat3.pdf", format="pdf")

fig.show()
