In [None]:
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def get_config():
    with open("config.yaml", 'r') as stream:
        config = yaml.safe_load(stream)
    return config

In [None]:
def combine_datasets(ccre, euclidean, amount):
    config = get_config()

    ccre_df = pd.read_csv(config[ccre])
    euclidean_df = pd.read_csv(config[euclidean])
    x = np.array(ccre_df['ccre(X|Y)'][:])
    y = np.array(euclidean_df['euclidean_similarity'][:])
    euclidean_df['same_centre'] = list(abs((euclidean_df.id_x - euclidean_df.id_y)) <=amount)

    ccre_df['same_centre'] = list(abs((ccre_df.id_x - ccre_df.id_y)) <=amount)
    #combine
    df = pd.concat([euclidean_df, ccre_df], axis=1)
    df = df.loc[:,~df.columns.duplicated()].copy()
    return df

In [None]:
def compare_euclidean_ccre(df, disease):
    groups = df.groupby('same_centre')
    for name, group in groups:
        plt.plot(group['ccre(X|Y)'], group['euclidean_similarity'], marker='o', linestyle='', markersize=5, label=name)
    plt.title(f"euclidean vs ccre in {disease} grouped by same_centre")
    plt.xlabel("ccre")
    plt.ylabel("euclidean similarity")
    plt.legend()

def cumulative_histogram(ccre, euclidean):
    plt.ecdf(ccre, label="ccre")
    plt.ecdf(euclidean, label="euclidean")
    plt.legend()

In [None]:
AD = combine_datasets(ccre="ccre_AD", euclidean="euclidean_AD", amount=4)
PD = combine_datasets(ccre="ccre_PD", euclidean="euclidean_PD", amount=4)
HC = combine_datasets(ccre="ccre_HC", euclidean="euclidean_HC", amount=4)

In [None]:
compare_euclidean_ccre(AD, "AD")

In [None]:
compare_euclidean_ccre(PD, "PD")

In [None]:
compare_euclidean_ccre(HC, "HC")

In [None]:
cumulative_histogram(HC['ccre(X|Y)'],HC['euclidean_similarity'])


In [None]:
cumulative_histogram(PD['ccre(X|Y)'],PD['euclidean_similarity'])

In [None]:
cumulative_histogram(AD['ccre(X|Y)'],AD['euclidean_similarity'])