In [None]:
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import panel as pn 
from scipy import stats
from statsmodels.distributions.empirical_distribution import ECDF
from bokeh.io import output_notebook
from bokeh.plotting import figure,show 
from bokeh.layouts import column, row
output_notebook()
pn.extension()

In [None]:
def get_config():
    with open("config.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
    return config

In [None]:
def combine_datasets(ccre, euclidean, amount):
    config = get_config()

    ccre_df = pd.read_csv(config[ccre])
    euclidean_df = pd.read_csv(config[euclidean])
    x = np.array(ccre_df['ccre(X|Y)'][:])
    y = np.array(euclidean_df['euclidean_similarity'][:])
    euclidean_df['same_centre'] = list(abs((euclidean_df.id_x - euclidean_df.id_y)) <=amount)

    ccre_df['same_centre'] = list(abs((ccre_df.id_x - ccre_df.id_y)) <=amount)
    #combine
    df = pd.concat([euclidean_df, ccre_df], axis=1)
    df = df.loc[:,~df.columns.duplicated()].copy()
    return df

In [None]:
HC_AD = combine_datasets(ccre="ccre_HC_AD", euclidean="euclidean_HC_AD", amount=4)
HC_PD = combine_datasets(ccre="ccre_HC_PD", euclidean="euclidean_HC_PD", amount=4)
HC_AD_PD = combine_datasets(ccre="ccre_HC_AD_PD", euclidean="euclidean_HC_AD_PD", amount=4)
AD_PD = combine_datasets(ccre="ccre_AD_PD", euclidean="euclidean_AD_PD", amount=4)
HC = combine_datasets(ccre="ccre_HC", euclidean="euclidean_HC", amount=4)
AD = combine_datasets(ccre="ccre_AD", euclidean="euclidean_AD", amount=4)
PD = combine_datasets(ccre="ccre_PD", euclidean="euclidean_PD", amount=4)

In [None]:
PD

In [None]:
def compare_scatter_plots(src="AD_PD", src2="AD_PD"):
    df1 = sources_dataframes[src]
    df2 = sources_dataframes[src2]
    plot1 = figure(height=400, width=400, title=src)
    plot2 =  figure(height=400, width=400, title=src2)
    plot1.scatter('ccre(X|Y)', 'euclidean_similarity', source=df1[df1['same_centre'] == True])
    plot1.scatter('ccre(X|Y)', 'euclidean_similarity', source=df1[df1['same_centre'] == False], color="red")
    plot2.scatter('ccre(X|Y)', 'euclidean_similarity', source=df2[df2['same_centre'] == True])
    plot2.scatter('ccre(X|Y)', 'euclidean_similarity', source=df2[df2['same_centre'] == False], color="red")
    return row(plot1, plot2)
sources_names = ["AD_PD", "HC_PD", "HC_AD", "HC", "AD", "PD"]
sources_dataframes = {"AD_PD": AD_PD, "HC_PD":HC_PD, "HC_AD": HC_AD, "HC": HC, "AD": AD, "PD": PD} 
scatter_plots = pn.interact(compare_scatter_plots, src=sources_names, src2=sources_names)
scatter_plots

In [None]:
def cumulative_histogram(source1 = "AD_PD", source2 = "AD_PD"):
    data = sources_dataframes[source1]
    ecdf = ECDF(data['ccre(X|Y)'])
    ecdf2 = ECDF(data['euclidean_similarity'])
    
    plot1 = figure(width=400, height=400, title=source1)
    plot1.line(ecdf.x, ecdf.y)
    plot1.line(ecdf2.x, ecdf2.y, color='orange')
    
    data = sources_dataframes[source2]
    ecdf = ECDF(data['ccre(X|Y)'])
    ecdf2 = ECDF(data['euclidean_similarity'])
    
    plot2 = figure(width=400, height=400, title=source2)
    plot2.line(ecdf.x, ecdf.y)
    plot2.line(ecdf2.x, ecdf2.y, color='orange')

    return row(plot1, plot2)
cumulative_plots = pn.interact(cumulative_histogram, source1=sources_names, source2=sources_names)
cumulative_plots
