In [None]:
import pandas as pd
import os
import plotly.graph_objects as go
if os.getcwd().endswith('ALPACA-paper'):
    os.chdir('bin/ANALYSIS')
output_dir = "../../_assets/ancestral_concordance"
files = [x for x in os.listdir(output_dir) if x.endswith('.csv')]
results_df = pd.concat([pd.read_csv(f'{output_dir}/{x}') for x in files])
results_df = results_df[results_df.het=='clonal']
fig = go.Figure()   
scale = 0.2
font_size = 45
w = 600
h = 800
fonts = 'DejaVu Sans'
y_axis_title='Clustering concordance'
x_axis_title='% of genome removed'
fig.update_layout(title='', font=dict(family=fonts, size=font_size), width=w, height=h, showlegend=False, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)')

fig.update_xaxes(showgrid=False, title=x_axis_title,title_font=dict(family=fonts, size=font_size*0.7))
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='black', zeroline=True, zerolinecolor='black', zerolinewidth=1,title=y_axis_title, title_font=dict(family=fonts, size=font_size*0.7),)
jitter_value = 0
point_size = 7
line_width = 2

for cohort in results_df.comparison_cohort.unique():
    cohort_results = results_df[results_df.comparison_cohort==cohort]
    y_name = str(int(float(cohort.replace('SCNA_', ''))*100))+'%'
    trace = go.Box(y=cohort_results.concordance_scores, orientation='v', boxpoints='outliers', jitter=jitter_value, pointpos=0, line=dict(width=line_width), marker=dict(size=point_size),name=y_name)
    fig.add_trace(trace)

fig.update_layout(
    paper_bgcolor='white',
    plot_bgcolor='white'
)
fig.write_image(f'../../figures/Suppfig1d_ancestral_concordance_boxplot.pdf', format='pdf')