In [None]:
import numpy as np
import pandas as pd
from scripts.python.routines.manifest import get_manifest
from scripts.python.preprocessing.serialization.routines.pheno_betas_checking import get_pheno_betas_with_common_subjects
import pathlib
from tqdm.notebook import tqdm
from impyute.imputation.cs import fast_knn, mean, median, random, mice, mode, em
from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
from matplotlib_venn import venn3, venn3_circles
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt


In [None]:

path_data = f"E:/YandexDisk/Work/pydnameth/datasets/meta/tasks/GPL13534_Blood/"
path_save = f"E:/YandexDisk/Work/pydnameth/draft/03_somewhere/revision/R2_Major_C7"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

manifest = get_manifest('GPL13534')

cpgs_dict = {
    'Parkinson': {
        'harmonized': 50911,
        'non_harmonized': 43019
    },
    'Schizophrenia': {
        'harmonized': 110137,
        'non_harmonized': 35145
    }
}

for disease in cpgs_dict:
    cpgs_disease = {}
    for data_type in cpgs_dict[disease]:
        cpgs_disease[data_type] = set(pd.read_excel(f"{path_data}/{disease}/{data_type}/cpgs/{cpgs_dict[disease][data_type]}.xlsx", index_col="features").index.values)
        print(f"{disease} {data_type}: {len(cpgs_disease[data_type])}")
    intxn = list(set.intersection(*list(cpgs_disease.values())))
    print(f"intxn_size: {len(intxn)}")
    df_intxn = pd.DataFrame(index=intxn)
    df_intxn.to_excel(f"{path_save}/{disease}_intxn.xlsx")

    fig, ax = plt.subplots()
    venn = venn2(
        subsets=(cpgs_disease['harmonized'], cpgs_disease['non_harmonized']),
        set_labels=(f"Harmonized\n({len(cpgs_disease['harmonized'])})", f"Non-harmonized\n({len(cpgs_disease['non_harmonized'])})"),
        set_colors=('r', 'g'),
        alpha=0.5)
    venn2_circles(subsets=(cpgs_disease['harmonized'], cpgs_disease['non_harmonized']))
    for text in venn.set_labels:
        text.set_fontsize(16)
    for text in venn.subset_labels:
        text.set_fontsize(16)
    plt.savefig(f"{path_save}/{disease}_venn.png", bbox_inches='tight', dpi=400)
    plt.savefig(f"{path_save}/{disease}_venn.pdf", bbox_inches='tight', dpi=400)
    plt.clf()