In [1]:
"""
The aim of this script is to compare TRACEvar performance to common genetic tools (Fig. 2C-F). 
Tissue specific performance data and AUC plots created here, overall comparision is in the Methods_Comparison_Analysis.ipynb script.
"""

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
import pickle
import shap
from sklearn.metrics import precision_recall_curve
import matplotlib.pyplot as plt
import matplotlib
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import average_precision_score
import pickle
from scipy import interp

In [2]:
"------------------------------------------  Overlap cols ----------------------------------"

path = os.path.join('..', '..', 'Data', 'Relevant_Columns_Names_Edited_2.csv')
Relevant_Cols_df = pd.read_csv(path)
overlap_cols = Relevant_Cols_df['Feature'].tolist()
print(overlap_cols)


"------------------------------------------  Load Data ----------------------------------"

path = os.path.join('..', '..', 'Data', 'Full_Slim_Dataset_hg37-v1.6.csv')
Variants_data = pd.read_csv(path, engine='python') 
print(Variants_data)


path = os.path.join('..', '..', 'Results', 'Best_Parameters', 'Best_Parameters_New_17.csv')
Best_param = pd.read_csv(path, engine='python')
print(Best_param)


path_capice = os.path.join('..', '..', 'Data', 'CAPICE_hg37_VariantID.csv')
CAPICE_score = pd.read_csv(path_capice)


['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

       VariationID   OMIMs                                 Manifested_Tissues  \
0           535972  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
1           535875  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
2           535979  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
3           567376  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
4           565912  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
...            ...     ...                                                ...   
67963       873299     NaN                                                NaN   
67964       873211     NaN                                                NaN   
67965       873240     NaN                                                NaN   
67966       873216     NaN                                                NaN   
67967       915562     NaN                                                NaN   

      #Chr        Pos      

In [3]:
"----------------------------------- Create Scores DF -------------------------------------"

scores_y_columns = Variants_data.columns[Variants_data.columns.str.contains(pat = 'disease_causing')].tolist()
scores_y_columns = scores_y_columns + ['VariationID', 'PHRED', 'SIFTval', 'PolyPhenVal']
Scores_DF = pd.merge(Variants_data[scores_y_columns], CAPICE_score, on='VariationID', how='left')#VariationID
print('Scores_DF', Scores_DF)

Scores_DF['CAPICE'] = Scores_DF['CAPICE_Score'].apply(lambda x: x * 100)
Scores_DF['SIFT'] = Scores_DF['SIFTval'].apply(lambda x: x * 100)
Scores_DF['PolyPhen'] = Scores_DF['PolyPhenVal'].apply(lambda x: x * 100)


Scores_DF        Lung_disease_causing  Muscle - Skeletal_disease_causing  \
0                      True                              False   
1                      True                              False   
2                      True                              False   
3                      True                              False   
4                      True                              False   
...                     ...                                ...   
67963                 False                              False   
67964                 False                              False   
67965                 False                              False   
67966                 False                              False   
67967                 False                              False   

       Skin - Sun Exposed_disease_causing  \
0                                    True   
1                                    True   
2                                    True   
3                

In [4]:


"----------------------------------- Split data into non baiased folds -------------------------------------"

def split_data_on_genes(Variants_data, y):
    
    Genes_fraction = Variants_data['GeneID_y'][Variants_data[y] == True].value_counts(normalize=True)
    print(Genes_fraction.head(5))
    tissue = y.split('_')[0]
    Genes_as_factor = pd.DataFrame()
    Genes_as_factor['GeneID_y'] = Variants_data['GeneID_y'][Variants_data[y] == True].astype('category')
    Genes_as_factor["Gene_Categorical"] = Genes_as_factor["GeneID_y"].cat.codes
    Genes_fraction.columns = ['Fraction']
    return Genes_fraction, Genes_as_factor

def find_fold(Genes_fraction):
    
    Genes_fraction = Genes_fraction.sort_values()
    Genes_fraction = Genes_fraction.reset_index()
    Genes_fraction.columns = ['GeneID', 'Fraction']
    inds = Genes_fraction.index
    fold_list = []
    genes_list = []
    count_list = []
    fold = 0
    threshold = 0.1
    counter = 0
    for ind in inds:
        fraction = Genes_fraction.iloc[ind]['Fraction']
        gene = Genes_fraction.iloc[ind]['GeneID']

        counter += fraction

        if counter > threshold:
            fold += 1
            counter = 0
        if fraction >= threshold or fold > (1/threshold - 1):
            genes_list.append(gene)
            fold_list.append(None)
            break
        else:
            genes_list.append(gene)
            fold_list.append(fold)

    Fold_df = pd.DataFrame(list(zip(genes_list, fold_list)), columns=['GeneID', 'Fold'])
    Fold_merge = Genes_fraction.merge(Fold_df, on='GeneID', how='left')
    
    return Fold_merge

def random_split(Folds_df):
    
    threshold = 0.1
    Folds_df = Folds_df.sort_values('GeneID')
    Shafeled_df = Folds_df.sample(frac=1, random_state= 1234).reset_index(drop=True)
    inds = Shafeled_df.index
    new_fold = [None]*len(Folds_df)
    nan_inds = Shafeled_df[Shafeled_df['Fold'].isna()].index
    used_inds = [x for x in nan_inds]
    fold = 0
    for ind in inds:
        cuonter = 0
        fraction = Shafeled_df.iloc[ind]['Fraction']
        gene = Shafeled_df.iloc[ind]['GeneID']
        old_fold = Shafeled_df.iloc[ind]['Fold']
        cuonter += fraction
        if ind not in used_inds:
            new_fold[ind] = fold
            used_inds.append(ind)
            relevant_inds = [x for x in inds if x not in used_inds]
            for rel_ind in relevant_inds:
                fraction2 = Shafeled_df.iloc[rel_ind]['Fraction']
                if fraction2 + cuonter <= threshold:
                    cuonter += fraction2
                    new_fold[rel_ind] = fold
                    used_inds.append(rel_ind)
            fold += 1


    Shafeled_df['New_Fold'] = np.array(new_fold)
    print(Shafeled_df)
    folds = Shafeled_df['New_Fold'].unique()
    last_fold_num = Shafeled_df['New_Fold'].max()
    last_fold = Shafeled_df[Shafeled_df['New_Fold'] == last_fold_num]
    last_fold_sum = last_fold['Fraction'].sum()

    

    if last_fold_sum < 0.07:
        Shafeled_df['New_Fold'][Shafeled_df['New_Fold'] == last_fold_num] = last_fold_num - 1

        

    return Shafeled_df



y_columns = Variants_data.columns[Variants_data.columns.str.contains(pat = 'disease_causing')].tolist()
print(y_columns)
cols = overlap_cols 

"-------------------------- Data PreProcessing ------------------------"

def preprocessing_new(Relevant_data, y_columns, y):
    
    "------------- Remove nonrelevant coluns -------------------------"
    non_relevant_columns = ['VariationID', 'OMIMs', 'Manifested_Tissues', '#Chr', 'Pos', 'ConsDetail', 'motifEName', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway']# it will be good to replace oAA and nAA with blssuom64 matrix. What bStatistic doing?
    non_relevant_columns = non_relevant_columns + y_columns
    print(non_relevant_columns)

    relevant_columns = [x for x in cols if x not in non_relevant_columns and x in list(Variants_data)]
    relevant_columns.append(y)
    print(relevant_columns)
    Relevant_data = Variants_data[relevant_columns]
    print(Relevant_data)
    
    "---------------------- One Hot Columns -------------------------"
    
    one_hot_columns = ['Type', 'AnnoType', 'Consequence', 'Domain', 'Dst2SplType'] 

    one_hot = pd.get_dummies(Relevant_data[one_hot_columns])
    Relevant_data = Relevant_data.drop(one_hot_columns, axis=1)
    Relevant_data = Relevant_data.join(one_hot)
    
    "---------------------- Missing Values Imputation ---------------"
    
    special_imputation_cols = {'SIFTval':1, 'GC':0.42, 'CpG':0.02, 'priPhCons':0.115, 'mamPhCons':0.079, 'verPhCons':0.094,'priPhyloP':-0.033, 'mamPhyloP':-0.038, 'verPhyloP':0.017, 'GerpN':1.91, 'GerpS':-0.2}
    
    for cl in special_imputation_cols:
        Relevant_data[cl] = Relevant_data[cl].fillna(special_imputation_cols[cl])
        
    Relevant_data.fillna(0, inplace=True)
    
    return Relevant_data

"---------------------------------------- AUC drawing functions --------------------------------"

def plot_mean_auc(tprs, mean_fpr, aucs, dataset, color, ax):

    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(mean_fpr, mean_tpr, color=color,
             label= dataset+r' (auROC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
             lw=2, alpha=.8)

def cadd_as_score_measurements(cadd_score_column, y):
    CADD_Scores_df = pd.concat([cadd_score_column, y], axis=1)#, ignore_index=True)
    CADD_Scores_df.columns = ['Pathogenicity_Score', 'tissue_specific_disease']
    CADD_Scores_df.dropna(subset=['Pathogenicity_Score'], inplace=True)

    print('CADD_Scores_df', CADD_Scores_df)

    max = 102
    ROC = np.zeros((max, 2))
    Precision_Recall = np.zeros((max, 2))
    average_precision = 0
    TPR_previuos = 0

    for i in range(max):
        # t = thresholds[i]
        t = i
        print('threshold: ', t)
        TP_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] >= t) & (CADD_Scores_df['tissue_specific_disease'] == True)])
        TN_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] < t) & (CADD_Scores_df['tissue_specific_disease'] == False)])
        FP_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] >= t) & (CADD_Scores_df['tissue_specific_disease'] == False)])
        FN_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] < t) & (CADD_Scores_df['tissue_specific_disease'] == True)])
        FPR_t = FP_t / float(FP_t + TN_t)
        ROC[i, 0] = FPR_t

        TPR_t = TP_t / float(TP_t + FN_t)  # = Recall
        ROC[i, 1] = TPR_t
        print('TP_t', TP_t)
        print('TN_t', TN_t)
        print('FP_t', FP_t)
        print('FN_t', FN_t)
        print('FPR_t', FPR_t)
        print('TPR_t', TPR_t)


        Precision_Recall[i, 0] = TPR_t
        try:
            precision = TP_t / float(TP_t + FP_t)
        except:
            precision = 0
        print('precision', precision)
        Precision_Recall[i, 1] = precision

        average_precision += (TPR_t - TPR_previuos) * precision
        TPR_previuos = TPR_t
    average_precision = average_precision * -1
    AUC = 0.
    for i in range(max-1):
        AUC += (ROC[i + 1, 0] - ROC[i, 0]) * (ROC[i + 1, 1] + ROC[i, 1])
    AUC *= -0.5
    roc_auc = metrics.auc(ROC[:,0], ROC[:,1])
    average_precision = metrics.auc(Precision_Recall[:,0], Precision_Recall[:,1])
    return Precision_Recall, average_precision, ROC, AUC

def sift_as_score_measurements(cadd_score_column, y):
    print('@', 'SIFT')
    CADD_Scores_df = pd.concat([cadd_score_column, y], axis=1)#, ignore_index=True)
    CADD_Scores_df.columns = ['Pathogenicity_Score', 'tissue_specific_disease']
    print('CADD_Scores_df', CADD_Scores_df)
    # thresholds = np.linspace(101, 1, 101)
    # print(thresholds)
    max = 102
    ROC = np.zeros((max, 2))
    Precision_Recall = np.zeros((max, 2))
    average_precision = 0
    TPR_previuos = 0

    for i in range(-1, max-1):
        t = i
        print('threshold: ', t)
        TP_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] <= t) & (CADD_Scores_df['tissue_specific_disease'] == True)])
        TN_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] > t) & (CADD_Scores_df['tissue_specific_disease'] == False)])
        FP_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] <= t) & (CADD_Scores_df['tissue_specific_disease'] == False)])
        FN_t = len(CADD_Scores_df[(CADD_Scores_df['Pathogenicity_Score'] > t) & (CADD_Scores_df['tissue_specific_disease'] == True)])
        FPR_t = FP_t / float(FP_t + TN_t)
        ROC[i+1, 0] = FPR_t

        TPR_t = TP_t / float(TP_t + FN_t)  # = Recall
        ROC[i+1, 1] = TPR_t
        print('TP_t', TP_t)
        print('TN_t', TN_t)
        print('FP_t', FP_t)
        print('FN_t', FN_t)
        print('FPR_t', FPR_t)
        print('TPR_t', TPR_t)


        Precision_Recall[i+1, 0] = TPR_t
        try:
            precision = TP_t / float(TP_t + FP_t)
        except:
            precision = 0
        print('precision', precision)
        Precision_Recall[i+1, 1] = precision

        average_precision += (TPR_t - TPR_previuos) * precision
        TPR_previuos = TPR_t
    average_precision = average_precision
    AUC = 0.
    for i in range(max-1):
        AUC += (ROC[i + 1, 0] - ROC[i, 0]) * (ROC[i + 1, 1] + ROC[i, 1])
    AUC *= 0.5
    roc_auc = metrics.auc(ROC[:,1], ROC[:,0])
    print('ROC')
    print(ROC)
    print('AUC')
    print(AUC)
    
    roc_auc = metrics.auc(ROC[:,0], ROC[:,1])
    average_precision = metrics.auc(Precision_Recall[:,0], Precision_Recall[:,1])
    
    return Precision_Recall, average_precision, ROC, AUC




['Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing', 'Heart - Left Ventricle_disease_causing', 'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colon - Sigmoid_disease_causing', 'kidney_disease_causing', 'Heart - Atrial Appendage_disease_causing', 'Breast - Mammary Tissue_disease_causing', 'Uterus_disease_causing', 'Adipose - Visceral_disease_causing', 'Esophagus - Gastroesophageal Junction_disease_causing', 'Esophagus - Mucosa_disease_causing', 'brain-1_disease_causing', 'Skin - Not Sun Exposed_disease_causing', 'Artery - Tibial_disease_causing', 'Pituitary_disease_causing', 'Ovary_disease_causing', 'brain-3_disease_causing', 'Thyroid_disease_causing', 'Testis_disease_causing', 'Whole Blood_disease_causing', 'brain-2_disease_causing', 'brain_disease_causing']


In [5]:
import ast
from time import gmtime, strftime

"------------------------------ Train Model ---------------------------------------"
results_dict = {'Tissue':[], 'Data_set':[], 'Fold':[], 'ROC_AUC':[], 'PR_AUC':[]}
best_parameters_dict = {}
y_columns = ['Heart - Left Ventricle_disease_causing', 'brain_disease_causing', 'Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing',  'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colon - Sigmoid_disease_causing', 'kidney_disease_causing', 'Heart - Atrial Appendage_disease_causing', 'Breast - Mammary Tissue_disease_causing', 'Uterus_disease_causing', 'Adipose - Visceral_disease_causing', 'Esophagus - Gastroesophageal Junction_disease_causing', 'Esophagus - Mucosa_disease_causing', 'brain-1_disease_causing', 'Skin - Not Sun Exposed_disease_causing', 'Artery - Tibial_disease_causing', 'Pituitary_disease_causing', 'Ovary_disease_causing', 'brain-3_disease_causing', 'Thyroid_disease_causing', 'Testis_disease_causing', 'Whole Blood_disease_causing', 'brain-2_disease_causing']

def compare_score_tissue(y):

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
    tissue = y.replace("_disease_causing", "")

    if tissue.strip() in Best_param['Tissue'].unique():

        print("------------- ", y, " ------------------")
        pathogenic_proportion = Variants_data[y].value_counts(normalize=True)[True]
        print('Pathogenic_proportion', pathogenic_proportion)
        Relevant_data = preprocessing_new(Variants_data, y_columns, y)
        Genes_fraction = Variants_data['GeneID_y'][Variants_data[y] == True].value_counts(normalize=True)
        Genes_fraction, Genes_as_factor = split_data_on_genes(Variants_data, y)
        Folds_df = find_fold(Genes_fraction)
        print(Folds_df)
        Shafeled_Fold_df = random_split(Folds_df)

        folds_list = list(Shafeled_Fold_df['New_Fold'].unique())
        folds_list = [int(x) for x in folds_list if pd.notnull(x)]
        print('folds_list', folds_list)
        # break

        tprs = []
        aucs = []

        mean_fpr = np.linspace(0, 1, 100)
        y_real = []
        y_proba = []

        x_test_list = []
        y_test_list = []
        best_params_list = []
        num_genes = Variants_data['GeneID_y'][Variants_data[y]==True].nunique()
        if num_genes >= 20:
            for fold in folds_list:
                print(" Fold: ", fold)

                test_genes = Shafeled_Fold_df['GeneID'][Shafeled_Fold_df['New_Fold'] == fold].tolist()
                print('test_genes', test_genes)
                train_genes = Shafeled_Fold_df['GeneID'][Shafeled_Fold_df['New_Fold'] != fold].tolist()
                print('train_genes', train_genes)
                benign_genes = Variants_data['GeneID_y'][Variants_data[y] == False].tolist()
                print('benign_genes', len(benign_genes))

                Benign_data = Relevant_data[Relevant_data[y] == False]
                Pathogenic_test = Relevant_data[(Relevant_data[y] == True) & (Relevant_data['GeneID_y'].isin(test_genes))]
                Pathogenic_train = Relevant_data[(Relevant_data[y] == True) & (Relevant_data['GeneID_y'].isin(train_genes))]

                test_len =  len(Pathogenic_test)
                train_len = len(Pathogenic_train)
                test_fraction = test_len/(test_len+train_len)
                train_fraction = train_len/(test_len+train_len)
                print('test_fraction: ', test_fraction)
                print('train_fraction: ', train_fraction)

                X_train1, X_test1, y_train1, y_test1 = train_test_split(Benign_data, Benign_data[y], test_size=test_fraction, random_state=fold)

                X_train1 = X_train1[~X_train1['GeneID_y'].isin(test_genes)]
                X_test1 = X_test1[~X_test1['GeneID_y'].isin(train_genes)]
                y_train1 = y_train1[X_train1[~X_train1['GeneID_y'].isin(test_genes)].index]
                y_test1 = y_test1[X_test1[~X_test1['GeneID_y'].isin(train_genes)].index]

                relevant_cols2 = list(Pathogenic_train)
                relevant_cols = [x for x in relevant_cols2 if x != y and x != 'GeneID_y']
                print('relevant_cols', relevant_cols)


                X_train = pd.concat([X_train1[relevant_cols], Pathogenic_train[relevant_cols]])
                X_test = pd.concat([X_test1[relevant_cols], Pathogenic_test[relevant_cols]])
                y_train = pd.concat([y_train1, Pathogenic_train[y]])
                y_test = pd.concat([y_test1, Pathogenic_test[y]])

                X_train = X_train.sort_index()
                X_test = X_test.sort_index()
                y_train = y_train.sort_index()
                y_test = y_test.sort_index()

                print(y_test)


                best_parameters = Best_param['Best_Parameters'][(Best_param['Dataset'] == 'Full Trace')&(Best_param['Tissue'] == tissue.strip())&(Best_param['ML_Model'] == 'Random Forest')].values[0]
                best_parameters = ast.literal_eval(best_parameters)
                print(best_parameters)            
                model = RandomForestClassifier(**best_parameters)
                model.fit(X_train, y_train)

                y_pred = model.predict(X_test)  # predict the test data
                y_pred = pd.DataFrame(y_pred)
                predictions_proba = model.predict_proba(X_test)

                pred_true = predictions_proba[:, 1]
                clr = classification_report(y_test, y_pred, output_dict=True)
                print(clr)
                precision = clr['True']['precision']
                recall_1 = clr['True']['recall']
                f1_score = clr['True']['f1-score']
                print('@@@ ','precision:', precision, 'recall: ', recall_1, 'f1_score: ', f1_score)

                prec, recall, _ = precision_recall_curve(y_test, pred_true)  # pred[:, 1], pos_label=model.classes_[1]
                fpr, tpr, _ = roc_curve(y_test, pred_true)
                pr_auc1 = average_precision_score(y_test, pred_true)
                pr_auc1 =  metrics.auc(recall, prec)

                roc_auc = metrics.auc(fpr, tpr)
              
            
                data_set = 'Full TRACE'
                results_dict['Tissue'].append(tissue)
                results_dict['Data_set'].append(data_set)
                results_dict['Fold'].append(fold)
                results_dict['ROC_AUC'].append(roc_auc)
                results_dict['PR_AUC'].append(pr_auc1)

                tprs.append(interp(mean_fpr, fpr, tpr))
                tprs[-1][0] = 0.0
                aucs.append(roc_auc)
                y_real.append(y_test)
                y_proba.append(pred_true)

                x_test_list.append(X_test)
                y_test_list.append(y_test)
                # break
            if len(folds_list)>0:

                Precision_Recall, average_precision, ROC, AUC = cadd_as_score_measurements(Scores_DF['PHRED'], Scores_DF[y])
                ax2.plot(Precision_Recall[:, 0], Precision_Recall[:, 1], color='pink' ,label='%s (auPRC = %0.2f)' % ('CADD', average_precision))
                ax1.plot(ROC[:, 0], ROC[:, 1], color='pink', label='%s (auROC = %0.2f)' % ('CADD', AUC))

                results_dict['Tissue'].append(tissue)
                results_dict['Data_set'].append('CADD')
                results_dict['Fold'].append('All')
                results_dict['ROC_AUC'].append(AUC)
                results_dict['PR_AUC'].append(average_precision)

                Precision_Recall, average_precision, ROC, AUC = cadd_as_score_measurements(Scores_DF['CAPICE'], Scores_DF[y])
                ax2.plot(Precision_Recall[:, 0], Precision_Recall[:, 1], color='dimgray',label='%s (auPRC = %0.2f)' % ('CAPICE', average_precision))
                ax1.plot(ROC[:, 0], ROC[:, 1], color='dimgray', label='%s (auROC = %0.2f)' % ('CAPICE', AUC))

                results_dict['Tissue'].append(tissue)
                results_dict['Data_set'].append('CAPICE')
                results_dict['Fold'].append('All')
                results_dict['ROC_AUC'].append(AUC)
                results_dict['PR_AUC'].append(average_precision)
                print('------------------ Sift -----------------------')
                Precision_Recall, average_precision, ROC, AUC = sift_as_score_measurements(Scores_DF['SIFT'], Scores_DF[y])
                ax2.plot(Precision_Recall[:, 0], Precision_Recall[:, 1], color='forestgreen',label='%s (auPRC = %0.2f)' % ('SIFT', average_precision))
                ax1.plot(ROC[:, 0], ROC[:, 1], color='forestgreen', label='%s (auROC = %0.2f)' % ('SIFT', AUC))

                results_dict['Tissue'].append(tissue)
                results_dict['Data_set'].append('SIFT')
                results_dict['Fold'].append('All')
                results_dict['ROC_AUC'].append(AUC)
                results_dict['PR_AUC'].append(average_precision)

                Precision_Recall, average_precision, ROC, AUC = cadd_as_score_measurements(Scores_DF['PolyPhen'], Scores_DF[y])
                ax2.plot(Precision_Recall[:, 0], Precision_Recall[:, 1], color='darkorange',label='%s (auPRC = %0.2f)' % ('PolyPhen', average_precision))
                ax1.plot(ROC[:, 0], ROC[:, 1], color='darkorange', label='%s  (auROC = %0.2f)' % ('PolyPhen', AUC))

                results_dict['Tissue'].append(tissue)
                results_dict['Data_set'].append('PolyPhen')
                results_dict['Fold'].append('All')
                results_dict['ROC_AUC'].append(AUC)
                results_dict['PR_AUC'].append(average_precision)


                full_interp_list = tprs
                full_auc_list = aucs

                plot_mean_auc(full_interp_list, mean_fpr, full_auc_list, 'TRACEvar', 'blue', ax1)

                ax1.plot([0, 1], [0, 1], 'r--')

                ax1.set_xlabel('1-Specificity(False Positive Rate)')
                ax1.set_ylabel('Sensitivity(True Positive Rate)')
                ax1.set_title('Receiver Operating Characteristic')
                ax1.legend(loc="lower right", fontsize='small')

                fig.suptitle(tissue)



                y_real_trace = np.concatenate(y_real)
                y_proba_trace = np.concatenate(y_proba)

                precision_trace, recall_trace, _ = precision_recall_curve(y_real_trace, y_proba_trace)

                plt.plot(recall_trace, precision_trace, color='blue', label=r'TRACEvar (mean auPRC = %0.2f)' % (average_precision_score(y_real_trace, y_proba_trace)), lw=2, alpha=.8)
                ax2.axhline(y=pathogenic_proportion, color='red', linestyle='--',
                            label=r'Pathogenic variants frequency = %0.3f' % (pathogenic_proportion))
                ax2.set_xlabel('Recall')
                ax2.set_ylabel('Precision')
                ax2.set_title('Precision-Recall curve')
                ax2.legend(loc="lower right", fontsize='small')
                path = os.path.join('..', '..', 'Results', 'Slim Model Comparison3',  tissue + '_Compare_Scores_Slim_Model_Docker.pdf')
                plt.savefig(path)
                plt.close()
    #               plt.show()
                Results = pd.DataFrame.from_dict(results_dict)
                path = os.path.join('..', '..', 'Results', 'Slim Model Comparison3',  tissue + '_Compare_Scores_Slim_Model_Docker.csv')
                Results.to_csv(path, index=None)
            
    return tissue, strftime("%Y-%m-%d %H:%M:%S", gmtime())


In [6]:
import multiprocessing as mp

def driver_func_shap():
    PROCESSES = 20
    df_list = []
    
    with mp.Pool(PROCESSES) as pool:
        results = [pool.apply_async(compare_score_tissue, (y,)) for y in y_columns]
        for r in results:
            results_tuple = r.get(timeout=None)
            print('@', results_tuple[0], ' finished', results_tuple[1])

if __name__ == '__main__':
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    driver_func_shap()    
    

2023-01-02 08:39:29
-------------  Heart - Left Ventricle_disease_causing  ------------------
-------------  Muscle - Skeletal_disease_causing  ------------------
-------------  Lung_disease_causing  ------------------
-------------  brain_disease_causing  ------------------
-------------  brain-1_disease_causing  ------------------
Pathogenic_proportion 0.02863112052730697
['VariationID', 'OMIMs', 'Manifested_Tissues', '#Chr', 'Pos', 'ConsDetail', 'motifEName', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway', 'Heart - Left Ventricle_disease_causing', 'brain_disease_causing', 'Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing', 'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colo

Pathogenic_proportion 0.007120998116760828
['VariationID', 'OMIMs', 'Manifested_Tissues', '#Chr', 'Pos', 'ConsDetail', 'motifEName', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway', 'Heart - Left Ventricle_disease_causing', 'brain_disease_causing', 'Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing', 'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colon - Sigmoid_disease_causing', 'kidney_disease_causing', 'Heart - Atrial Appendage_disease_causing', 'Breast - Mammary Tissue_disease_causing', 'Uterus_disease_causing', 'Adipose - Visceral_disease_causing', 'Esophagus - Gastroesophageal Junction_disease_causing', 'Esophagus - Mucosa_disease_causing', 'brain-1_disease_causing', 'Skin

-------------  brain-2_disease_causing  ------------------
Pathogenic_proportion 0.007886064030131828
['VariationID', 'OMIMs', 'Manifested_Tissues', '#Chr', 'Pos', 'ConsDetail', 'motifEName', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway', 'Heart - Left Ventricle_disease_causing', 'brain_disease_causing', 'Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing', 'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colon - Sigmoid_disease_causing', 'kidney_disease_causing', 'Heart - Atrial Appendage_disease_causing', 'Breast - Mammary Tissue_disease_causing', 'Uterus_disease_causing', 'Adipose - Visceral_disease_causing', 'Esophagus - Gastroesophageal Junction_disease_causing', 'Esophagus 

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal_ganglia)_diff

       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.320469             0.299125 

[67968 rows x 558 columns]
       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.3

[67968 rows x 558 columns]
       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.3

[67968 rows x 558 columns]       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.32

[67968 rows x 558 columns]

       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.

[67968 rows x 558 columns]

       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.

[67968 rows x 558 columns]

       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.

[67968 rows x 558 columns]

       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.

[67968 rows x 558 columns]
       Whole_Brain_diff_net_max  Whole_Blood_diff_net_max  \
0                      0.019041                  0.066556   
1                      0.019041                  0.066556   
2                      0.019041                  0.066556   
3                      0.019041                  0.066556   
4                      0.019041                  0.066556   
...                         ...                       ...   
67963                       NaN                       NaN   
67964                 -0.078986                  0.005801   
67965                 -0.078986                  0.005801   
67966                  0.059560                 -0.148454   
67967                 -0.055618                 -0.212696   

       Vagina_diff_net_max  Uterus_diff_net_max  Thyroid_diff_net_max  \
0                 0.320469             0.299125              0.084393   
1                 0.320469             0.299125              0.084393   
2                 0.3

52  ENSG00000025708  0.144628   NaN
ENSG00000103197    0.192969
ENSG00000165699    0.088257
ENSG00000077498    0.032909
ENSG00000092295    0.031414
ENSG00000130826    0.028422
Name: GeneID_y, dtype: float64
              GeneID  Fraction  Fold
0    ENSG00000152795  0.000565     0
1    ENSG00000173991  0.000565     0
2    ENSG00000167552  0.000565     0
3    ENSG00000074181  0.000565     0
4    ENSG00000170876  0.000565     0
..               ...       ...   ...
108  ENSG00000025708  0.039526     6
109  ENSG00000135636  0.046866     7
110  ENSG00000183091  0.068323     7
111  ENSG00000171100  0.071711     8
112  ENSG00000104936  0.099944     8

[113 rows x 3 columns]
ENSG00000103197    0.407583
ENSG00000165699    0.186414
ENSG00000001626    0.039494
ENSG00000104450    0.023697
ENSG00000039139    0.023697
Name: GeneID_y, dtype: float64ENSG00000144285    0.048540
ENSG00000103197    0.043788
ENSG00000164190    0.037169
ENSG00000005339    0.025628
ENSG00000104133    0.023761
Name: GeneID_y,

46  ENSG00000103197  0.407583   NaN
             GeneID  Fraction  Fold New_Fold
0   ENSG00000163666  0.014706   0.0        0
1   ENSG00000107831  0.018382   1.0        0
2   ENSG00000101292  0.011029   0.0        0
3   ENSG00000077782  0.044118   3.0        0
4   ENSG00000187678  0.003676   0.0        0
5   ENSG00000064835  0.051471   3.0        1
6   ENSG00000131808  0.011029   0.0        1
7   ENSG00000013503  0.003676   0.0        0
8   ENSG00000171316  0.040441   2.0        2
9   ENSG00000104826  0.022059   1.0        1
10  ENSG00000011201  0.055147   5.0        2
11  ENSG00000109163  0.055147   4.0        3
12  ENSG00000133895  0.437500   NaN     None
13  ENSG00000179455  0.018382   1.0        3
14  ENSG00000120008  0.003676   0.0        0
15  ENSG00000158815  0.003676   0.0        1
16  ENSG00000169836  0.018382   0.0        3
17  ENSG00000121454  0.022059   1.0        4
18  ENSG00000111276  0.055147   4.0        4
19  ENSG00000163421  0.018382   1.0        4
20  ENSG00000214413


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4]
 Fold:  0
test_genes ['ENSG00000163666', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000187678', 'ENSG00000013503', 'ENSG00000120008']
train_genes ['ENSG00000064835', 'ENSG00000131808', 'ENSG00000171316', 'ENSG00000104826', 'ENSG00000011201', 'ENSG00000109163', 'ENSG00000133895', 'ENSG00000179455', 'ENSG00000158815', 'ENSG00000169836', 'ENSG00000121454', 'ENSG00000111276', 'ENSG00000163421', 'ENSG00000214413', 'ENSG00000165731', 'ENSG00000125848', 'ENSG00000166863', 'ENSG00000107187', 'ENSG00000165588', 'ENSG00000139318']
             GeneID  Fraction  Fold
0   ENSG00000185532  0.001709   0.0
1   ENSG00000130283  0.001709   0.0
2   ENSG00000115267  0.001709   0.0
3   ENSG00000113083  0.003419   0.0
4   ENSG00000197614  0.003419   0.0
5   ENSG00000107201  0.003419   0.0
6   ENSG00000008196  0.005128   0.0
7   ENSG00000120693  0.005128   0.0
8   ENSG00000092969  0.006838   0.0
9   ENSG00000105974  0.006838   0.0
10  ENSG00000065534  0.010256


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


             GeneID  Fraction  Fold
0   ENSG00000100241  0.001776     0
1   ENSG00000144381  0.001776     0
2   ENSG00000198513  0.001776     0
3   ENSG00000133812  0.001776     0
4   ENSG00000149196  0.001776     0
..              ...       ...   ...
68  ENSG00000104381  0.049734     7
69  ENSG00000198400  0.051510     7
70  ENSG00000104833  0.053286     8
71  ENSG00000169562  0.055062     8
72  ENSG00000158887  0.065719     9

[73 rows x 3 columns]
folds_list [0, 1, 2, 3, 4, 5, 6]
 Fold:  0
test_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000198650', 'ENSG00000117594']
train_genes ['ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000122787', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'E


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



              GeneID  Fraction  Fold
0    ENSG00000165078  0.000552   0.0
1    ENSG00000083937  0.000552   0.0
2    ENSG00000174227  0.000552   0.0
3    ENSG00000247626  0.000552   0.0
4    ENSG00000034693  0.000552   0.0
..               ...       ...   ...
123  ENSG00000025708  0.038653   6.0
124  ENSG00000007168  0.047488   6.0
125  ENSG00000165699  0.065157   7.0
126  ENSG00000104133  0.077305   7.0
127  ENSG00000103197  0.142463   NaN

[128 rows x 3 columns]
folds_list [0, 1, 2, 3, 4, 5]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


benign_genes 67383
 Fold:  0
folds_list [0, 1, 2, 3]
test_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486']
train_genes ['ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000104133', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000136104', 'ENSG00000013503', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000015479', 'ENSG00000142168', 'ENSG00000161011', 'ENSG00000147475', 'ENSG00000008086', 'ENSG00000092621', 'ENSG00000021574', 'ENSG00000141385', 'ENSG00000204843', 'ENSG00000083937', 'ENSG00000103671', 'ENSG00000059573', 'ENSG00000126012', 'ENSG00000185803', 'ENSG00000183735', 'ENSG00000172817', 'ENSG00000130294', 'ENSG00000124164', 'ENSG00000169359', 'ENSG00000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6]
 Fold:  0
test_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG00000198626', 'ENSG00000118058']
train_genes ['ENSG00000165280', 'ENSG00000123700', 'ENSG00000022267', 'ENSG00000183873', 'ENSG00000101997', 'ENSG00000198947', 'ENSG00000175084', 'ENSG00000104936', 'ENSG00000111245', 'ENSG00000179295', 'ENSG00000180340', 'ENSG00000165995', 'ENSG00000148400', 'ENSG00000103197', 'ENSG00000197859', 'ENSG00000162614', '


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4]
 Fold:  0
test_genes ['ENSG00000204103', 'ENSG00000143368', 'ENSG00000131652', 'ENSG00000165704', 'ENSG00000163818', 'ENSG00000135100', 'ENSG00000169344', 'ENSG00000164708', 'ENSG00000115919', 'ENSG00000173372', 'ENSG00000159189', 'ENSG00000070915', 'ENSG00000134569', 'ENSG00000213853', 'ENSG00000198931', 'ENSG00000138002', 'ENSG00000081052', 'ENSG00000118972', 'ENSG00000130203', 'ENSG00000134371', 'ENSG00000198793', 'ENSG00000177045', 'ENSG00000171862', 'ENSG00000040531', 'ENSG00000157483', 'ENSG00000133059', 'ENSG00000173369', 'ENSG00000137693', 'ENSG00000165195', 'ENSG00000160801', 'ENSG00000164953', 'ENSG00000164754']
train_genes ['ENSG00000165699', 'ENSG00000107485', 'ENSG00000103449', 'ENSG00000174775', 'ENSG00000108950', 'ENSG00000066468', 'ENSG00000070193', 'ENSG00000102900', 'ENSG00000105976', 'ENSG00000115085', 'ENSG00000147383', 'ENSG00000170927', 'ENSG00000123607', 'ENSG00000054282', 'ENSG00000126895', 'ENSG00000091483', 'ENSG00000198087', 'ENSG00


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



             GeneID  Fraction  Fold  New_Fold
0   ENSG00000168958  0.008197     0         0
1   ENSG00000137267  0.002049     0         0
2   ENSG00000213689  0.040984     5         0
3   ENSG00000181038  0.006148     0         0
4   ENSG00000100311  0.010246     1         0
5   ENSG00000196998  0.055328     6         1
6   ENSG00000112234  0.049180     5         2
7   ENSG00000101347  0.053279     6         3
8   ENSG00000167716  0.004098     0         0
9   ENSG00000158828  0.034836     4         1
10  ENSG00000088682  0.004098     0         0
11  ENSG00000125779  0.061475     8         4
12  ENSG00000186153  0.010246     1         0
13  ENSG00000197102  0.018443     3         2
14  ENSG00000116288  0.012295     2         0
15  ENSG00000125454  0.010246     1         2
16  ENSG00000103043  0.008197     0         1
17  ENSG00000100749  0.010246     1         2
18  ENSG00000112541  0.004098     0         2
19  ENSG00000115267  0.055328     7         5
20  ENSG00000129003  0.010246    


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


 Fold:  0



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


test_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000169704', 'ENSG00000213741']
folds_list [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
train_genes ['ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000184500', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000255072', 'ENSG00000144659', 'ENSG00000151702', 'ENSG00000105610', 'E


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 Fold:  0
test_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000167552']
train_genes ['ENSG00000183091', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG00000197563', 'ENSG00000048392', 'ENSG00000124155', 'ENSG00000070748', 'ENSG000001


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
              GeneID  Fraction  Fold New_Fold
0    ENSG00000172426  0.002581   0.0        0
1    ENSG00000161202  0.009032   3.0        0
2    ENSG00000168303  0.007742   3.0        0
3    ENSG00000152669  0.009032   3.0        0
4    ENSG00000151632  0.005161   1.0        0
..               ...       ...   ...      ...
106  ENSG00000164818  0.002581   0.0        8
107  ENSG00000133703  0.009032   3.0        8
108  ENSG00000114841  0.021935   6.0        8
109  ENSG00000167646  0.006452   2.0        8
110  ENSG00000125848  0.001290   0.0        8

[111 rows x 4 columns]
 Fold:  0
test_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000099940', 'ENSG00000109654']



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


train_genes ['ENSG00000122877', 'ENSG00000111199', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000133812', 'ENSG00000135924', 'ENSG00000100596', 'ENSG00000169247', 'ENSG00000158887', 'ENSG00000261609', 'ENSG00000099956', 'ENSG00000104833', 'ENSG00000087053', 'ENSG00000123560', 'ENSG00000156515', 'ENSG00000168356', 'ENSG00000165280', 'ENSG00000186575', 'ENSG00000170445',


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6, 7]
 Fold:  0
test_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000119650']
train_genes ['ENSG00000187098', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG00000104044', 'ENSG00000197859', 'ENSG00000163913', 'ENSG00000106327', 'ENSG00000126934', 'ENSG00000141527', 'ENSG00000049167', 'ENSG00000179295', 'ENSG00000092295', 'ENSG00000138449', 'ENSG00000205155', 'ENSG00000140694', 'ENSG00000074181', 'ENSG00000258366'


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 Fold:  0
test_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000124788']
train_genes ['ENSG00000147044', 'ENSG00000198707', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000100014', 'ENSG00000122591', 'ENSG00000184381', 'ENSG00000101997', 'ENSG00000164494', 'ENSG00000140650', 'ENSG00000113971', 'ENSG00000173085', 'ENSG00000046651', 'ENSG00000197694', 'ENSG00000185344', 'ENSG00000073584', 'ENSG00000178538', 'ENSG00000131398', 'ENSG00000162065', 'ENSG00000103089', 'ENSG000000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


folds_list [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 Fold:  0
test_genes ['ENSG00000186487', 'ENSG00000143442', 'ENSG00000162928', 'ENSG00000198689', 'ENSG00000086062', 'ENSG00000086848', 'ENSG00000204843', 'ENSG00000104884', 'ENSG00000184009', 'ENSG00000179029', 'ENSG00000164751', 'ENSG00000163541', 'ENSG00000142186', 'ENSG00000169372', 'ENSG00000134262', 'ENSG00000146282', 'ENSG00000081189', 'ENSG00000163288', 'ENSG00000136908', 'ENSG00000102466', 'ENSG00000079215', 'ENSG00000146938', 'ENSG00000113448', 'ENSG00000168778', 'ENSG00000008086', 'ENSG00000187049', 'ENSG00000169432', 'ENSG00000088682', 'ENSG00000149196', 'ENSG00000106290', 'ENSG00000168280', 'ENSG00000012660', 'ENSG00000043355', 'ENSG00000169057', 'ENSG00000135454', 'ENSG00000157764', 'ENSG00000172922', 'ENSG00000139174', 'ENSG00000198910', 'ENSG00000123560', 'ENSG00000197603', 'ENSG00000169306', 'ENSG00000084774', 'ENSG00000048342', 'ENSG00000213024', 'ENSG00000101871', 'ENSG00000107371', 'ENSG00000130741', 'ENSG00000175283', 'ENSG

benign_genes 62076
test_fraction:  0.09917355371900827
train_fraction:  0.9008264462809917
test_fraction:  0.09926470588235294
train_fraction:  0.9007352941176471
test_fraction:  0.0995260663507109
train_fraction:  0.9004739336492891
test_fraction:  0.09914529914529914
train_fraction:  0.9008547008547009
test_fraction:  0.09969167523124357
train_fraction:  0.9003083247687564
test_fraction:  0.09911242603550297
train_fraction:  0.900887573964497
test_fraction:  0.09995124329595319
train_fraction:  0.9000487567040468
test_fraction:  0.09888059701492537
train_fraction:  0.9011194029850746
test_fraction:  0.0994671403197158
train_fraction:  0.9005328596802842
test_fraction:  0.1
train_fraction:  0.9
test_fraction:  0.09836065573770492
train_fraction:  0.9016393442622951
test_fraction:  0.09994353472614342
train_fraction:  0.9000564652738566
test_fraction:  0.09935483870967741
train_fraction:  0.9006451612903226
test_fraction:  0.09947643979057591
train_fraction:  0.900523560209424
test_fra

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

261      False
272      False
276      False
286      False
330      False
         ...  
67926    False
67935    False
67942    False
67954    False
67963    False
Name: Heart - Left Ventricle_disease_causing, Length: 6564, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
3        False
17       False
18       False
28       False
34       False
         ...  
67929    False
67935    False
67945    False
67949    False
67966    False
Name: Artery - Aorta_disease_causing, Length: 6736, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}
261      False
275      False
276      False
286      False
292      False
         ...  
67923    False
67935    False
67947    False
67954    False
67963    False
Name: Lung_disease_causing, Length: 6710, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_sample

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9913895486935868, 'recall': 1.0, 'f1-score': 0.9956761592366185, 'support': 6678}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 58}, 'accuracy': 0.9913895486935868, 'macro avg': {'precision': 0.4956947743467934, 'recall': 0.5, 'f1-score': 0.4978380796183092, 'support': 6736}, 'weighted avg': {'precision': 0.9828532372588736, 'recall': 0.9913895486935868, 'f1-score': 0.9871029381505549, 'support': 6736}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000119699', 'ENSG00000123066', 'ENSG00000197594', 'ENSG00000163513']
train_genes ['ENSG00000107796', 'ENSG00000130283', 'ENSG00000092969', 'ENSG00000105974', 'ENSG00000065534', 'ENSG00000166949', 'ENSG00000106991', 'ENSG00000106799', 'ENSG00000008196', 'ENSG00000113083', 'ENSG00000197614', 'ENSG00000171303', 'ENSG00000185532', 'ENSG00000120693', 'ENSG00000139567', 'ENSG00000115267', 'ENSG00000204217', 'ENSG00000107201']
benign_genes 67383
test_fraction:  0.07692307692307693
train_fraction:  0.9230769230769231
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_

12       False
28       False
30       False
54       False
67       False
         ...  
67942    False
67946    False
67949    False
67957    False
67959    False
Name: Artery - Aorta_disease_causing, Length: 5210, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927884615384616, 'recall': 1.0, 'f1-score': 0.9963811821471652, 'support': 6608}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927884615384616, 'macro avg': {'precision': 0.4963942307692308, 'recall': 0.5, 'f1-score': 0.4981905910735826, 'support': 6656}, 'weighted avg': {'precision': 0.9856289293639053, 'recall': 0.9927884615384616, 'f1-score': 0.9891957409297577, 'support': 6656}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000196998', 'ENSG00000158828', 'ENSG00000103043']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9727286424589423, 'recall': 1.0, 'f1-score': 0.9861758191399985, 'support': 6456}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9727286424589423, 'macro avg': {'precision': 0.48636432122947115, 'recall': 0.5, 'f1-score': 0.49308790956999926, 'support': 6637}, 'weighted avg': {'precision': 0.9462010118600168, 'recall': 0.9727286424589423, 'f1-score': 0.9592814657778862, 'support': 6637}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000118873', 'ENSG00000112425', 'ENSG00000100014']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000034693', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000090487', 'ENSG00000137285', 'ENSG00000131462', 'ENSG00000107147', 'ENSG00000188021', 'ENSG00000162928', 'ENSG00000168280', 'ENSG00000165078', 'ENSG00000099246', 'ENSG00000197912', 'ENSG00000163541', 'ENSG00000196998', 'ENSG00000101276', 'ENSG00000112357', 'ENSG00000169372', 'ENSG00000013503', 'ENSG0000026023

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

10       False
12       False
28       False
30       False
54       False
         ...  
67920    False
67923    False
67942    False
67955    False
67958    False
Name: brain-2_disease_causing, Length: 6642, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}
268      False
270      False
312      False
321      False
325      False
         ...  
67931    False
67939    False
67942    False
67958    False
67966    False
Name: brain-0_disease_causing, Length: 6664, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9694849657636201, 'recall': 1.0, 'f1-score': 0.9845060841962058, 'support': 6513}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 205}, 'accuracy': 0.9694849657636201, 'macro avg': {'precision': 0.48474248288181004, 'recall': 0.5, 'f1-score': 0.4922530420981029, 'support': 6718}, 'weighted avg': {'precision': 0.9399010988416877, 'recall': 0.9694849657636201, 'f1-score': 0.9544638473310343, 'support': 6718}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000165699', 'ENSG00000107485', 'ENSG00000103449', 'ENSG00000174775', 'ENSG00000108950', 'ENSG00000066468', 'ENSG00000070193', 'ENSG00000102900', 'ENSG00000105976', 'ENSG00000115085', 'ENSG00000147383', 'ENSG00000123607', 'ENSG00000054282']
train_genes ['ENSG00000204103', 'ENSG00000143368', 'ENSG00000131652', 'ENSG00000165704', 'ENSG00000163818', 'ENSG00000135100', 'ENSG00000169344', 'ENSG00000164708', 'ENSG00000115919', 'ENSG00000173372', 'ENSG00000159189', 'ENSG00000070915', 'ENSG00000134569', 'ENSG00000213853', 'ENSG00000198931', 'ENSG00000138002', 'ENSG00000081052', 'ENSG00000118972', 'ENSG00000130203', 'ENSG00000134371', 'ENSG00000198793', 'ENSG00000177045', 'ENSG00000171862', 'ENSG00000040531', 'ENSG00000157483', 'ENSG00000133059', 'ENSG00000173369', 'ENSG00000137693', 'ENSG00000165195', 'ENSG00000160801', 'ENSG00000164953', 'ENSG00000170927', 'ENSG00000164754', 'ENSG00000126895', 'ENSG00000091483', 'ENSG00000198087', 'ENSG00000162399', 'ENSG0000011876

268      False
270      False
312      False
321      False
325      False
         ...  
67898    False
67909    False
67916    False
67942    False
67949    False
Name: kidney_disease_causing, Length: 6680, dtype: bool
{'random_state': 1234, 'n_estimators': 150, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 40, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9959958475456029, 'recall': 1.0, 'f1-score': 0.9979939074225426, 'support': 6716}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.9959958475456029, 'macro avg': {'precision': 0.49799792377280144, 'recall': 0.5, 'f1-score': 0.4989969537112713, 'support': 6743}, 'weighted avg': {'precision': 0.9920077283280837, 'recall': 0.9959958475456029, 'f1-score': 0.9939977876686632, 'support': 6743}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000064835', 'ENSG00000131808', 'ENSG00000104826', 'ENSG00000158815', 'ENSG00000214413', 'ENSG00000125848', 'ENSG00000166863']
train_genes ['ENSG00000163666', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000187678', 'ENSG00000013503', 'ENSG00000171316', 'ENSG00000011201', 'ENSG00000109163', 'ENSG00000133895', 'ENSG00000179455', 'ENSG00000120008', 'ENSG00000169836', 'ENSG00000121454', 'ENSG00000111276', 'ENSG00000163421', 'ENSG00000165731', 'ENSG00000107187', 'ENSG00000165588', 'ENSG00000139318']
benign_genes 67696
test_fraction:  0.09926470588235294
train_fraction:  0.9007352941176471
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', '

12       False
28       False
30       False
54       False
67       False
         ...  
67949    False
67951    False
67957    False
67958    False
67964    False
Name: Pituitary_disease_causing, Length: 6736, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.991362763915547, 'recall': 1.0, 'f1-score': 0.9956626506024097, 'support': 5165}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 45}, 'accuracy': 0.991362763915547, 'macro avg': {'precision': 0.4956813819577735, 'recall': 0.5, 'f1-score': 0.49783132530120483, 'support': 5210}, 'weighted avg': {'precision': 0.9828001296782727, 'recall': 0.991362763915547, 'f1-score': 0.9870628772286845, 'support': 5210}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 585
TN_t 0
FP_t 67383
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.008606991525423728
threshold:  1
TP_t 584
TN_t 10520
FP_t 56863
FN_t 1
FPR_t 0.8438775358770016
TPR_t 0.9982905982905983
precision 0.010165892039619127
threshold:  2
TP_t 584
TN_t 14466
FP_t 52917
FN_t 1
FPR_t 0.7853167712924625
TPR_t 0.9982905982905983
p

TP_t 0
TN_t 67358
FP_t 25
FN_t 585
FPR_t 0.00037101346036834214
TPR_t 0.0
precision 0.0
threshold:  58
TP_t 0
TN_t 67361
FP_t 22
FN_t 585
FPR_t 0.0003264918451241411
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 67366
FP_t 17
FN_t 585
FPR_t 0.0002522891530504727
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67367
FP_t 16
FN_t 585
FPR_t 0.00023744861463573897
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67371
FP_t 12
FN_t 585
FPR_t 0.00017808646097680424
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67371
FP_t 12
FN_t 585
FPR_t 0.00017808646097680424
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67372
FP_t 11
FN_t 585
FPR_t 0.00016324592256207056
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67375
FP_t 8
FN_t 585
FPR_t 0.00011872430731786949
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67377
FP_t 6
FN_t 585
FPR_t 8.904323048840212e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67380
FP_t 3
FN_t 585
FPR_t 4.452161524420106e-05
TPR_t 0.0
precis

threshold:  27
TP_t 457
TN_t 56555
FP_t 9870
FN_t 79
FPR_t 0.148588633797516
TPR_t 0.8526119402985075
precision 0.044252929214679965
threshold:  28
TP_t 455
TN_t 56625
FP_t 9800
FN_t 81
FPR_t 0.14753481369966126
TPR_t 0.8488805970149254
precision 0.04436860068259386
threshold:  29
TP_t 451
TN_t 56706
FP_t 9719
FN_t 85
FPR_t 0.14631539330071508
TPR_t 0.8414179104477612
precision 0.044346116027531954
threshold:  30
TP_t 451
TN_t 56775
FP_t 9650
FN_t 85
FPR_t 0.14527662777568687
TPR_t 0.8414179104477612
precision 0.04464904464904465
threshold:  31
TP_t 451
TN_t 56839
FP_t 9586
FN_t 85
FPR_t 0.14431313511479113
TPR_t 0.8414179104477612
precision 0.04493374514297101
threshold:  32
TP_t 449
TN_t 56910
FP_t 9515
FN_t 87
FPR_t 0.1432442604441099
TPR_t 0.8376865671641791
precision 0.045062224006423124
threshold:  33
TP_t 449
TN_t 56980
FP_t 9445
FN_t 87
FPR_t 0.14219044034625516
TPR_t 0.8376865671641791
precision 0.04538103901354356
threshold:  34
TP_t 449
TN_t 57047
FP_t 9378
FN_t 87
FPR_t 0.1

threshold:  88
TP_t 283
TN_t 60729
FP_t 5696
FN_t 253
FPR_t 0.08575084681972149
TPR_t 0.5279850746268657
precision 0.047332329821040305
threshold:  89
TP_t 275
TN_t 60868
FP_t 5557
FN_t 261
FPR_t 0.08365826119683854
TPR_t 0.5130597014925373
precision 0.04715363511659808
threshold:  90
TP_t 267
TN_t 60985
FP_t 5440
FN_t 269
FPR_t 0.0818968761761385
TPR_t 0.498134328358209
precision 0.04678465042929735
threshold:  91
TP_t 256
TN_t 61136
FP_t 5289
FN_t 280
FPR_t 0.0796236356793376
TPR_t 0.47761194029850745
precision 0.04616771866546438
threshold:  92
TP_t 244
TN_t 61316
FP_t 5109
FN_t 292
FPR_t 0.07691381257056831
TPR_t 0.4552238805970149
precision 0.04558191668223426
threshold:  93
TP_t 237
TN_t 61515
FP_t 4910
FN_t 299
FPR_t 0.0739179525780956
TPR_t 0.44216417910447764
precision 0.046046240528463185
threshold:  94
TP_t 221
TN_t 61788
FP_t 4637
FN_t 315
FPR_t 0.06980805419646217
TPR_t 0.4123134328358209
precision 0.045491972004940304
threshold:  95
TP_t 202
TN_t 62064
FP_t 4361
FN_t 334


TPR_t 1.0
precision 0.012221269296740995
threshold:  43
TP_t 171
TN_t 3188
FP_t 13896
FN_t 0
FPR_t 0.8133926480917818
TPR_t 1.0
precision 0.01215611004478567
threshold:  44
TP_t 171
TN_t 3115
FP_t 13969
FN_t 0
FPR_t 0.8176656520721143
TPR_t 1.0
precision 0.012093352192362093
threshold:  45
TP_t 171
TN_t 3049
FP_t 14035
FN_t 0
FPR_t 0.8215289159447436
TPR_t 1.0
precision 0.012037167394058848
threshold:  46
TP_t 171
TN_t 2970
FP_t 14114
FN_t 0
FPR_t 0.8261531257316788
TPR_t 1.0
precision 0.011970598529926496
threshold:  47
TP_t 171
TN_t 2906
FP_t 14178
FN_t 0
FPR_t 0.8298993210021073
TPR_t 1.0
precision 0.01191720677399122
threshold:  48
TP_t 171
TN_t 2853
FP_t 14231
FN_t 0
FPR_t 0.8330016389604308
TPR_t 1.0
precision 0.011873350923482849
threshold:  49
TP_t 171
TN_t 2788
FP_t 14296
FN_t 0
FPR_t 0.8368063685319598
TPR_t 1.0
precision 0.011820004147369877
threshold:  50
TP_t 171
TN_t 2714
FP_t 14370
FN_t 0
FPR_t 0.8411379068133926
TPR_t 1.0
precision 0.011759851454507943
threshold:  51
TP

 [1.         1.        ]]
AUC
0.8181518975382731
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 171
TN_t 0
FP_t 17146
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.009874689611364555
threshold:  1
TP_t 171
TN_t 4878
FP_t 12268
FN_t 0
FPR_t 0.7155021579377114
TPR_t 1.0
precision 0.013747085778599566
threshold:  2
TP_t 171
TN_t 5802
FP_t 11344
FN_t 0
FPR_t 0.66161203779

TN_t 10503
FP_t 6643
FN_t 9
FPR_t 0.38743730316108715
TPR_t 0.9473684210526315
precision 0.023806024981631153
threshold:  58
TP_t 162
TN_t 10546
FP_t 6600
FN_t 9
FPR_t 0.3849294296045725
TPR_t 0.9473684210526315
precision 0.023957409050576754
threshold:  59
TP_t 162
TN_t 10569
FP_t 6577
FN_t 9
FPR_t 0.3835880088650414
TPR_t 0.9473684210526315
precision 0.02403917495177326
threshold:  60
TP_t 162
TN_t 10604
FP_t 6542
FN_t 9
FPR_t 0.3815467164353202
TPR_t 0.9473684210526315
precision 0.02416467780429594
threshold:  61
TP_t 162
TN_t 10637
FP_t 6509
FN_t 9
FPR_t 0.3796220692872973
TPR_t 0.9473684210526315
precision 0.024284215260080948
threshold:  62
TP_t 162
TN_t 10680
FP_t 6466
FN_t 9
FPR_t 0.3771141957307827
TPR_t 0.9473684210526315
precision 0.02444176222088111
threshold:  63
TP_t 162
TN_t 10730
FP_t 6416
FN_t 9
FPR_t 0.3741980636883238
TPR_t 0.9473684210526315
precision 0.0246275463666768
threshold:  64
TP_t 162
TN_t 10776
FP_t 6370
FN_t 9
FPR_t 0.37151522220926164
TPR_t 0.94736842105

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9728391356542617, 'recall': 1.0, 'f1-score': 0.9862326005932913, 'support': 6483}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9728391356542617, 'macro avg': {'precision': 0.48641956782713086, 'recall': 0.5, 'f1-score': 0.49311630029664566, 'support': 6664}, 'weighted avg': {'precision': 0.946415983860531, 'recall': 0.9728391356542617, 'f1-score': 0.9594456707152322, 'support': 6664}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000007168', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000090487', 'ENSG00000137285', 'ENSG00000131462', 'ENSG00000107147', 'ENSG00000188021', 'ENSG00000162928', 'ENSG00000168280', 'ENSG00000165078', 'ENSG00000099246', 'ENSG00000197912']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000112425', 'ENSG00000100014', 'ENSG00000163541', 'ENSG00000196998', 'ENSG00000101276', 'ENSG00000112357', 'ENSG00000169372', 'ENSG00000013503', 'ENSG0000026023

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992773261065944, 'recall': 1.0, 'f1-score': 0.9963735267452402, 'support': 6594}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.992773261065944, 'macro avg': {'precision': 0.496386630532972, 'recall': 0.5, 'f1-score': 0.4981867633726201, 'support': 6642}, 'weighted avg': {'precision': 0.985598747887509, 'recall': 0.992773261065944, 'f1-score': 0.9891729953866477, 'support': 6642}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000112234', 'ENSG00000197102', 'ENSG00000125454', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000145335']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000116288', 'ENSG00000103043', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

2        False
31       False
50       False
63       False
74       False
         ...  
67934    False
67935    False
67936    False
67942    False
67954    False
Name: brain-2_disease_causing, Length: 6655, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884540410856201, 'recall': 1.0, 'f1-score': 0.9941934997360682, 'support': 6592}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884540410856201, 'macro avg': {'precision': 0.49422702054281004, 'recall': 0.5, 'f1-score': 0.4970967498680341, 'support': 6669}, 'weighted avg': {'precision': 0.9770413913384928, 'recall': 0.9884540410856201, 'f1-score': 0.982714582435172, 'support': 6669}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000109163', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000111834', 'ENSG00000131808']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000180340', 'ENSG00000179295', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000256061', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG00000163161', 'ENSG00000105479', 'ENSG00000132155', 'ENSG00000013503', 'ENSG00000159079', 'ENSG00000091651', 'ENSG00000214413', 'ENSG00000197265', 'ENSG00000039139', 'ENSG00000167131', 'ENSG00000112312', 'ENSG00000080572', 'ENSG00000169032', 'ENSG00000157423', 'ENSG00000169126', 'ENSG00000169071', 'ENSG0000017369

10       False
12       False
28       False
30       False
54       False
         ...  
67903    False
67914    False
67921    False
67942    False
67949    False
Name: Testis_disease_causing, Length: 6665, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.969311377245509, 'recall': 1.0, 'f1-score': 0.9844165716457621, 'support': 6475}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 205}, 'accuracy': 0.969311377245509, 'macro avg': {'precision': 0.4846556886227545, 'recall': 0.5, 'f1-score': 0.49220828582288106, 'support': 6680}, 'weighted avg': {'precision': 0.9395645460575854, 'recall': 0.969311377245509, 'f1-score': 0.954206182845256, 'support': 6680}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000170927', 'ENSG00000126895', 'ENSG00000091483', 'ENSG00000198087', 'ENSG00000162399', 'ENSG00000118762', 'ENSG00000138079', 'ENSG00000203485', 'ENSG00000136872', 'ENSG00000068078', 'ENSG00000163687', 'ENSG00000100360', 'ENSG00000147224', 'ENSG00000166341', 'ENSG00000089597']
train_genes ['ENSG00000204103', 'ENSG00000143368', 'ENSG00000131652', 'ENSG00000165704', 'ENSG00000163818', 'ENSG00000135100', 'ENSG00000169344', 'ENSG00000164708', 'ENSG00000115919', 'ENSG00000173372', 'ENSG00000159189', 'ENSG00000070915', 'ENSG00000134569', 'ENSG00000213853', 'ENSG00000198931', 'ENSG00000138002', 'ENSG00000081052', 'ENSG00000118972', 'ENSG00000130203', 'ENSG00000134371', 'ENSG00000198793', 'ENSG00000177045', 'ENSG00000171862', 'ENSG00000165699', 'ENSG00000040531', 'ENSG00000157483', 'ENSG00000133059', 'ENSG00000173369', 'ENSG00000137693', 'ENSG00000165195', 'ENSG00000160801', 'ENSG00000107485', 'ENSG00000103449', 'ENSG00000164953', 'ENSG00000174775', 'ENSG0000010895

260      False
289      False
308      False
321      False
335      False
         ...  
67923    False
67928    False
67937    False
67958    False
67967    False
Name: kidney_disease_causing, Length: 6715, dtype: bool
{'random_state': 1234, 'n_estimators': 150, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 40, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9729523227383863, 'recall': 1.0, 'f1-score': 0.9862907598172101, 'support': 6367}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9729523227383863, 'macro avg': {'precision': 0.48647616136919314, 'recall': 0.5, 'f1-score': 0.4931453799086051, 'support': 6544}, 'weighted avg': {'precision': 0.9466362223220209, 'recall': 0.9729523227383863, 'f1-score': 0.9596138856595624, 'support': 6544}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000183091', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000119401', 'ENSG00000120729']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG00000197563', 'ENSG00000048392', 'ENSG00000167552', 'ENSG00000124155', 'ENSG00000070748', 'ENSG00000171100', 'ENSG00000170876', 'ENSG0000017226

10       False
12       False
54       False
63       False
67       False
         ...  
67898    False
67928    False
67941    False
67942    False
67949    False
Name: Muscle - Skeletal_disease_causing, Length: 6541, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9959916864608076, 'recall': 1.0, 'f1-score': 0.9979918185198958, 'support': 6709}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.9959916864608076, 'macro avg': {'precision': 0.4979958432304038, 'recall': 0.5, 'f1-score': 0.4989959092599479, 'support': 6736}, 'weighted avg': {'precision': 0.9919994394990437, 'recall': 0.9959916864608076, 'f1-score': 0.9939915544017193, 'support': 6736}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000171316', 'ENSG00000011201', 'ENSG00000165588']
train_genes ['ENSG00000163666', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000187678', 'ENSG00000064835', 'ENSG00000131808', 'ENSG00000013503', 'ENSG00000104826', 'ENSG00000109163', 'ENSG00000133895', 'ENSG00000179455', 'ENSG00000120008', 'ENSG00000158815', 'ENSG00000169836', 'ENSG00000121454', 'ENSG00000111276', 'ENSG00000163421', 'ENSG00000214413', 'ENSG00000165731', 'ENSG00000125848', 'ENSG00000166863', 'ENSG00000107187', 'ENSG00000139318']
benign_genes 67696
test_fraction:  0.09926470588235294
train_fraction:  0.9007352941176471
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', '

2        False
31       False
50       False
63       False
74       False
         ...  
67876    False
67900    False
67904    False
67934    False
67963    False
Name: Pituitary_disease_causing, Length: 6742, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9727696705280577, 'recall': 1.0, 'f1-score': 0.986196903835888, 'support': 6466}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9727696705280577, 'macro avg': {'precision': 0.48638483526402887, 'recall': 0.5, 'f1-score': 0.493098451917944, 'support': 6647}, 'weighted avg': {'precision': 0.946280831899266, 'recall': 0.9727696705280577, 'f1-score': 0.9593424372202274, 'support': 6647}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000163541', 'ENSG00000196998', 'ENSG00000101276', 'ENSG00000112357', 'ENSG00000169372', 'ENSG00000013503', 'ENSG00000260230', 'ENSG00000073464', 'ENSG00000174227', 'ENSG00000213689', 'ENSG00000187566', 'ENSG00000127980', 'ENSG00000123560', 'ENSG00000125875', 'ENSG00000158290', 'ENSG00000135069', 'ENSG00000060642', 'ENSG00000173085', 'ENSG00000161011', 'ENSG00000204843', 'ENSG00000183785', 'ENSG00000134899']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG0000011887

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927873779113449, 'recall': 1.0, 'f1-score': 0.9963806364047656, 'support': 6607}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927873779113449, 'macro avg': {'precision': 0.49639368895567243, 'recall': 0.5, 'f1-score': 0.4981903182023828, 'support': 6655}, 'weighted avg': {'precision': 0.9856267777400834, 'recall': 0.9927873779113449, 'f1-score': 0.9891941194179242, 'support': 6655}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000101347', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000164494']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000145335', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

23       False
41       False
42       False
49       False
57       False
         ...  
67900    False
67926    False
67928    False
67935    False
67944    False
Name: brain-2_disease_causing, Length: 6663, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9694713328369322, 'recall': 1.0, 'f1-score': 0.9844990548204159, 'support': 6510}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 205}, 'accuracy': 0.9694713328369322, 'macro avg': {'precision': 0.4847356664184661, 'recall': 0.5, 'f1-score': 0.49224952741020794, 'support': 6715}, 'weighted avg': {'precision': 0.9398746651926179, 'recall': 0.9694713328369322, 'f1-score': 0.9544436108534485, 'support': 6715}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000184908', 'ENSG00000019186', 'ENSG00000103494', 'ENSG00000021488', 'ENSG00000036828', 'ENSG00000086848', 'ENSG00000166086', 'ENSG00000213281', 'ENSG00000131183', 'ENSG00000133703', 'ENSG00000143839', 'ENSG00000214413', 'ENSG00000172071', 'ENSG00000169031', 'ENSG00000162599', 'ENSG00000197891', 'ENSG00000137106', 'ENSG00000121879', 'ENSG00000115339']
train_genes ['ENSG00000204103', 'ENSG00000143368', 'ENSG00000131652', 'ENSG00000165704', 'ENSG00000163818', 'ENSG00000135100', 'ENSG00000169344', 'ENSG00000164708', 'ENSG00000115919', 'ENSG00000173372', 'ENSG00000159189', 'ENSG00000070915', 'ENSG00000134569', 'ENSG00000213853', 'ENSG00000198931', 'ENSG00000138002', 'ENSG00000081052', 'ENSG00000118972', 'ENSG00000130203', 'ENSG00000134371', 'ENSG00000198793', 'ENSG00000177045', 'ENSG00000171862', 'ENSG00000165699', 'ENSG00000040531', 'ENSG00000157483', 'ENSG00000133059', 'ENSG00000173369', 'ENSG00000137693', 'ENSG00000165195', 'ENSG00000160801', 'ENSG0000010748

281      False
299      False
300      False
307      False
315      False
         ...  
67924    False
67931    False
67954    False
67957    False
67959    False
Name: kidney_disease_causing, Length: 6683, dtype: bool
{'random_state': 1234, 'n_estimators': 150, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 40, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992820819623093, 'recall': 1.0, 'f1-score': 0.9963974782347643, 'support': 6638}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.992820819623093, 'macro avg': {'precision': 0.4964104098115465, 'recall': 0.5, 'f1-score': 0.49819873911738216, 'support': 6686}, 'weighted avg': {'precision': 0.9856931798770703, 'recall': 0.992820819623093, 'f1-score': 0.9892441610114218, 'support': 6686}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000163541', 'ENSG00000138449', 'ENSG00000114956', 'ENSG00000122787', 'ENSG00000117020', 'ENSG00000103494']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000025708', 'ENSG00000198650', 'ENSG00000117594', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000140521', 'ENSG00000158104', 'ENSG00000008710', 'ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000198677', 'ENSG00000099377', 'ENSG00000112234', 'ENSG00000025796', 'ENSG00000130175', 'ENSG00000168509', 'ENSG00000168306', 'ENSG00000134538', 'ENSG00000123810', 'ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG0000016233

10       False
12       False
28       False
30       False
54       False
         ...  
67918    False
67941    False
67942    False
67957    False
67958    False
Name: Liver_disease_causing, Length: 6670, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899595384384834, 'recall': 1.0, 'f1-score': 0.9949544393403117, 'support': 6606}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 67}, 'accuracy': 0.9899595384384834, 'macro avg': {'precision': 0.4949797692192417, 'recall': 0.5, 'f1-score': 0.49747721967015585, 'support': 6673}, 'weighted avg': {'precision': 0.9800198877453351, 'recall': 0.9899595384384834, 'f1-score': 0.9849646375366551, 'support': 6673}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000013503', 'ENSG00000147475', 'ENSG00000008086']
train_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486', 'ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000104133', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000136104', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000015479', 'ENSG00000142168', 'ENSG00000161011', 'ENSG00000092621', 'ENSG00000021574', 'ENSG00000141385', 'ENSG00000204843', 'ENSG00000083937', 'ENSG00000103671', 'ENSG00000059573', 'ENSG00000126012', 'ENSG00000185803', 'ENSG00000183735', 'ENSG00000172817', 'ENSG00000130294', 'ENSG00000124164', 'ENSG00000169359', 'ENSG00000120948', 'ENSG00000188021', 'ENSG0000010308

10       False
12       False
28       False
30       False
54       False
         ...  
67906    False
67909    False
67915    False
67939    False
67942    False
Name: brain-3_disease_causing, Length: 6691, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9959952536339365, 'recall': 1.0, 'f1-score': 0.9979936092739837, 'support': 6715}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.9959952536339365, 'macro avg': {'precision': 0.49799762681696824, 'recall': 0.5, 'f1-score': 0.49899680463699186, 'support': 6742}, 'weighted avg': {'precision': 0.9920065452613295, 'recall': 0.9959952536339365, 'f1-score': 0.9939968979938892, 'support': 6742}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000109163', 'ENSG00000179455', 'ENSG00000169836', 'ENSG00000139318']
train_genes ['ENSG00000163666', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000187678', 'ENSG00000064835', 'ENSG00000131808', 'ENSG00000013503', 'ENSG00000171316', 'ENSG00000104826', 'ENSG00000011201', 'ENSG00000133895', 'ENSG00000120008', 'ENSG00000158815', 'ENSG00000121454', 'ENSG00000111276', 'ENSG00000163421', 'ENSG00000214413', 'ENSG00000165731', 'ENSG00000125848', 'ENSG00000166863', 'ENSG00000107187', 'ENSG00000165588']
benign_genes 67696
test_fraction:  0.09926470588235294
train_fraction:  0.9007352941176471
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', '

23       False
41       False
42       False
49       False
57       False
         ...  
67918    False
67921    False
67930    False
67948    False
67957    False
Name: Pituitary_disease_causing, Length: 6737, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9727121965927936, 'recall': 1.0, 'f1-score': 0.9861673672143676, 'support': 6452}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9727121965927936, 'macro avg': {'precision': 0.4863560982963968, 'recall': 0.5, 'f1-score': 0.4930836836071838, 'support': 6633}, 'weighted avg': {'precision': 0.9461690174003776, 'recall': 0.9727121965927936, 'f1-score': 0.9592570259712196, 'support': 6633}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000025708', 'ENSG00000166340', 'ENSG00000108231', 'ENSG00000176884', 'ENSG00000091136', 'ENSG00000197694', 'ENSG00000115839', 'ENSG00000140650', 'ENSG00000127824', 'ENSG00000114388', 'ENSG00000012061']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000112425', 'ENSG00000090487', 'ENSG00000137285', 'ENSG00000131462', 'ENSG00000100014', 'ENSG00000107147', 'ENSG00000188021', 'ENSG0000016292

277      False
278      False
282      False
284      False
290      False
         ...  
67944    False
67946    False
67957    False
67962    False
67967    False
Name: brain-0_disease_causing, Length: 6652, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927960378208014, 'recall': 1.0, 'f1-score': 0.9963849977406236, 'support': 6615}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927960378208014, 'macro avg': {'precision': 0.4963980189104007, 'recall': 0.5, 'f1-score': 0.4981924988703118, 'support': 6663}, 'weighted avg': {'precision': 0.9856439727126822, 'recall': 0.9927960378208014, 'f1-score': 0.9892070779009793, 'support': 6663}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000125779', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000159363']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000164494', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

20       False
24       False
26       False
27       False
32       False
         ...  
67932    False
67945    False
67949    False
67962    False
67966    False
Name: brain-2_disease_causing, Length: 6645, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884471117779445, 'recall': 1.0, 'f1-score': 0.994189994718177, 'support': 6588}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884471117779445, 'macro avg': {'precision': 0.49422355588897227, 'recall': 0.5, 'f1-score': 0.4970949973590885, 'support': 6665}, 'weighted avg': {'precision': 0.9770276927821603, 'recall': 0.9884471117779445, 'f1-score': 0.9827042288377119, 'support': 6665}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000179295', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000256061']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000131808', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG00000163161', 'ENSG00000105479', 'ENSG00000132155', 'ENSG00000013503', 'ENSG00000159079', 'ENSG00000091651', 'ENSG00000214413', 'ENSG00000197265', 'ENSG00000039139', 'ENSG00000167131', 'ENSG00000112312', 'ENSG00000080572', 'ENSG00000169032', 'ENSG00000157423', 'ENSG00000169126', 'ENSG00000169071', 'ENSG0000017369

2        False
31       False
50       False
63       False
74       False
         ...  
67923    False
67934    False
67954    False
67965    False
67967    False
Name: Testis_disease_causing, Length: 6661, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}
{'False': {'precision': 0.9305983460353494, 'recall': 0.9994775339602926, 'f1-score': 0.9638088840372827, 'support': 5742}, 'True': {'precision': 0.9817073170731707, 'recall': 0.2733446519524618, 'f1-score': 0.4276228419654715, 'support': 589}, 'accuracy': 0.9319222871584268, 'macro avg': {'precision': 0.95615283155426, 'recall': 0.6364110929563772, 'f1-score': 0.6957158630013771, 'support': 6331}, 'weighted avg': {'precision': 0.9353532321420114, 'recall': 0.9319222871584268, 'f1-score': 0.9139252039266688, 'support': 6331}}
@@@  precision: 0.9817073170731707 recall:  0.2733446519524618 f1_score:  0.4276228419654715


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000115267', 'ENSG00000128973', 'ENSG00000105647', 'ENSG00000147044', 'ENSG00000179295', 'ENSG00000130638', 'ENSG00000198198', 'ENSG00000155755', 'ENSG00000215193', 'ENSG00000167716', 'ENSG00000087470', 'ENSG00000104833', 'ENSG00000173898', 'ENSG00000129003', 'ENSG00000165280', 'ENSG00000107960', 'ENSG00000130294', 'ENSG00000158321', 'ENSG00000171385', 'ENSG00000139190', 'ENSG00000162695', 'ENSG00000165282', 'ENSG00000103051', 'ENSG00000014216', 'ENSG00000070610', 'ENSG00000038382', 'ENSG00000138083', 'ENSG00000100749', 'ENSG00000213638', 'ENSG00000158352', 'ENSG00000188603', 'ENSG00000113851', 'ENSG00000164073', 'ENSG00000145348', 'ENSG00000118058', 'ENSG00000103043', 'ENSG00000197121', 'ENSG00000121680', 'ENSG00000166340', 'ENSG00000173409', 'ENSG00000114388', 'ENSG00000147099', 'ENSG00000122550', 'ENSG00000000419', 'ENSG00000138760', 'ENSG00000182173', 'ENSG00000109618', 'ENSG00000147162', 'ENSG00000152217', 'ENSG00000170113', 'ENSG00000075624', 'ENSG0000

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9693251533742331, 'recall': 1.0, 'f1-score': 0.9844236760124611, 'support': 6478}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 205}, 'accuracy': 0.9693251533742331, 'macro avg': {'precision': 0.48466257668711654, 'recall': 0.5, 'f1-score': 0.49221183800623053, 'support': 6683}, 'weighted avg': {'precision': 0.9395912529639805, 'recall': 0.9693251533742331, 'f1-score': 0.9542266307360051, 'support': 6683}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000008710', 'ENSG00000111642', 'ENSG00000143373', 'ENSG00000101076', 'ENSG00000112234', 'ENSG00000173599', 'ENSG00000154803', 'ENSG00000134086', 'ENSG00000198569', 'ENSG00000141510']
train_genes ['ENSG00000204103', 'ENSG00000143368', 'ENSG00000131652', 'ENSG00000165704', 'ENSG00000163818', 'ENSG00000135100', 'ENSG00000169344', 'ENSG00000164708', 'ENSG00000115919', 'ENSG00000173372', 'ENSG00000159189', 'ENSG00000070915', 'ENSG00000134569', 'ENSG00000213853', 'ENSG00000198931', 'ENSG00000138002', 'ENSG00000081052', 'ENSG00000118972', 'ENSG00000130203', 'ENSG00000134371', 'ENSG00000198793', 'ENSG00000177045', 'ENSG00000171862', 'ENSG00000165699', 'ENSG00000040531', 'ENSG00000157483', 'ENSG00000133059', 'ENSG00000173369', 'ENSG00000137693', 'ENSG00000165195', 'ENSG00000160801', 'ENSG00000107485', 'ENSG00000103449', 'ENSG00000164953', 'ENSG00000174775', 'ENSG00000108950', 'ENSG00000066468', 'ENSG00000070193', 'ENSG00000102900', 'ENSG00000105976', 'ENSG0000011508

277      False
278      False
282      False
285      False
311      False
         ...  
67933    False
67941    False
67942    False
67959    False
67967    False
Name: kidney_disease_causing, Length: 7943, dtype: bool
{'random_state': 1234, 'n_estimators': 150, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_depth': 40, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9729399174438159, 'recall': 1.0, 'f1-score': 0.9862843858969392, 'support': 6364}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9729399174438159, 'macro avg': {'precision': 0.48646995872190796, 'recall': 0.5, 'f1-score': 0.4931421929484696, 'support': 6541}, 'weighted avg': {'precision': 0.9466120829555793, 'recall': 0.9729399174438159, 'f1-score': 0.9595954489906927, 'support': 6541}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000130714', 'ENSG00000128591', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000170876', 'ENSG00000173991', 'ENSG00000152795']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG00000197563', 'ENSG00000048392', 'ENSG00000167552', 'ENSG00000124155', 'ENSG0000007074

2        False
31       False
50       False
63       False
77       False
         ...  
67904    False
67928    False
67954    False
67964    False
67967    False
Name: Muscle - Skeletal_disease_causing, Length: 6555, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9727901383042694, 'recall': 1.0, 'f1-score': 0.9862074220833651, 'support': 6471}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9727901383042694, 'macro avg': {'precision': 0.4863950691521347, 'recall': 0.5, 'f1-score': 0.49310371104168255, 'support': 6652}, 'weighted avg': {'precision': 0.9463206531820396, 'recall': 0.9727901383042694, 'f1-score': 0.9593728545251736, 'support': 6652}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000142168', 'ENSG00000089280', 'ENSG00000116337', 'ENSG00000104450', 'ENSG00000188603', 'ENSG00000165280', 'ENSG00000081189', 'ENSG00000164751', 'ENSG00000103148', 'ENSG00000116641', 'ENSG00000129255', 'ENSG00000140854', 'ENSG00000081307', 'ENSG00000159363', 'ENSG00000167552', 'ENSG00000077279', 'ENSG00000011143']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000112425', 'ENSG0000009048

259      False
270      False
271      False
282      False
307      False
         ...  
67913    False
67920    False
67932    False
67942    False
67967    False
Name: brain-0_disease_causing, Length: 6653, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927765237020316, 'recall': 1.0, 'f1-score': 0.9963751699139103, 'support': 6597}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927765237020316, 'macro avg': {'precision': 0.4963882618510158, 'recall': 0.5, 'f1-score': 0.49818758495695514, 'support': 6645}, 'weighted avg': {'precision': 0.9856052260138906, 'recall': 0.9927765237020316, 'f1-score': 0.989177877490153, 'support': 6645}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000115267', 'ENSG00000163541', 'ENSG00000103089', 'ENSG00000102466']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000004848', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

1        False
12       False
13       False
24       False
49       False
         ...  
67916    False
67920    False
67921    False
67929    False
67938    False
Name: brain-2_disease_causing, Length: 6649, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.995992281430904, 'recall': 1.0, 'f1-score': 0.9979921172008626, 'support': 6710}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.995992281430904, 'macro avg': {'precision': 0.497996140715452, 'recall': 0.5, 'f1-score': 0.4989960586004313, 'support': 6737}, 'weighted avg': {'precision': 0.9920006246699371, 'recall': 0.995992281430904, 'f1-score': 0.9939924456609452, 'support': 6737}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000121454', 'ENSG00000111276', 'ENSG00000163421', 'ENSG00000165731', 'ENSG00000107187']
train_genes ['ENSG00000163666', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000187678', 'ENSG00000064835', 'ENSG00000131808', 'ENSG00000013503', 'ENSG00000171316', 'ENSG00000104826', 'ENSG00000011201', 'ENSG00000109163', 'ENSG00000133895', 'ENSG00000179455', 'ENSG00000120008', 'ENSG00000158815', 'ENSG00000169836', 'ENSG00000214413', 'ENSG00000125848', 'ENSG00000166863', 'ENSG00000165588', 'ENSG00000139318']
benign_genes 67696
test_fraction:  0.16544117647058823
train_fraction:  0.8345588235294118
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', '

19       False
20       False
24       False
26       False
27       False
         ...  
67920    False
67932    False
67941    False
67956    False
67960    False
Name: Pituitary_disease_causing, Length: 11226, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958677685950413, 'recall': 1.0, 'f1-score': 0.9979296066252588, 'support': 6748}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958677685950413, 'macro avg': {'precision': 0.49793388429752067, 'recall': 0.5, 'f1-score': 0.4989648033126294, 'support': 6776}, 'weighted avg': {'precision': 0.9917526125264668, 'recall': 0.9958677685950413, 'f1-score': 0.9938059305648238, 'support': 6776}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000184895', 'ENSG00000214413']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

12       False
28       False
30       False
54       False
67       False
         ...  
67931    False
67942    False
67946    False
67949    False
67957    False
Name: Ovary_disease_causing, Length: 6762, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9694070250535062, 'recall': 1.0, 'f1-score': 0.9844658952886274, 'support': 7700}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 243}, 'accuracy': 0.9694070250535062, 'macro avg': {'precision': 0.4847035125267531, 'recall': 0.5, 'f1-score': 0.4922329476443137, 'support': 7943}, 'weighted avg': {'precision': 0.9397499802230892, 'recall': 0.9694070250535062, 'f1-score': 0.9543481548183849, 'support': 7943}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 2051
TN_t 0
FP_t 65917
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.03017596516007533
threshold:  1
TP_t 2046
TN_t 10516
FP_t 55401
FN_t 5
FPR_t 0.8404660406268489
TPR_t 0.9975621647976597
precision 0.03561543683743276
threshold:  2
TP_t 2046
TN_t 14462
FP_t 51455
FN_t 5
FPR_t 0.7806028793786125
TPR_t 0.9975621647976597


threshold:  56
TP_t 2
TN_t 65891
FP_t 26
FN_t 2049
FPR_t 0.0003944354263695253
TPR_t 0.0009751340809361287
precision 0.07142857142857142
threshold:  57
TP_t 1
TN_t 65893
FP_t 24
FN_t 2050
FPR_t 0.00036409423972571565
TPR_t 0.00048756704046806434
precision 0.04
threshold:  58
TP_t 0
TN_t 65895
FP_t 22
FN_t 2051
FPR_t 0.00033375305308190606
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 65900
FP_t 17
FN_t 2051
FPR_t 0.0002579000864723819
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 65901
FP_t 16
FN_t 2051
FPR_t 0.00024272949315047712
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 65905
FP_t 12
FN_t 2051
FPR_t 0.00018204711986285783
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 65905
FP_t 12
FN_t 2051
FPR_t 0.00018204711986285783
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 65906
FP_t 11
FN_t 2051
FPR_t 0.00016687652654095303
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 65909
FP_t 8
FN_t 2051
FPR_t 0.00012136474657523856
TPR_t 0.0
precision 0.0
threshold:  6

TN_t 56135
FP_t 8896
FN_t 358
FPR_t 0.13679629715058972
TPR_t 0.8145077720207254
precision 0.15017195261750096
threshold:  26
TP_t 1565
TN_t 56205
FP_t 8826
FN_t 365
FPR_t 0.13571988743829866
TPR_t 0.810880829015544
precision 0.150611105764604
threshold:  27
TP_t 1562
TN_t 56266
FP_t 8765
FN_t 368
FPR_t 0.13478187326044502
TPR_t 0.8093264248704664
precision 0.1512539943836545
threshold:  28
TP_t 1556
TN_t 56332
FP_t 8699
FN_t 374
FPR_t 0.13376697267457058
TPR_t 0.8062176165803109
precision 0.15173086299366162
threshold:  29
TP_t 1545
TN_t 56406
FP_t 8625
FN_t 385
FPR_t 0.1326290538358629
TPR_t 0.8005181347150259
precision 0.15191740412979352
threshold:  30
TP_t 1540
TN_t 56470
FP_t 8561
FN_t 390
FPR_t 0.1316449078131968
TPR_t 0.7979274611398963
precision 0.15246015246015246
threshold:  31
TP_t 1536
TN_t 56530
FP_t 8501
FN_t 394
FPR_t 0.1307222709169473
TPR_t 0.7958549222797927
precision 0.1530337750323802
threshold:  32
TP_t 1527
TN_t 56594
FP_t 8437
FN_t 403
FPR_t 0.1297381248942812
T

precision 0.17082018927444795
threshold:  86
TP_t 1071
TN_t 59866
FP_t 5165
FN_t 859
FPR_t 0.07942365948547615
TPR_t 0.5549222797927461
precision 0.1717447081462476
threshold:  87
TP_t 1061
TN_t 59968
FP_t 5063
FN_t 869
FPR_t 0.07785517676185204
TPR_t 0.549740932642487
precision 0.1732527759634226
threshold:  88
TP_t 1034
TN_t 60086
FP_t 4945
FN_t 896
FPR_t 0.0760406575325614
TPR_t 0.5357512953367876
precision 0.1729386184980766
threshold:  89
TP_t 999
TN_t 60198
FP_t 4833
FN_t 931
FPR_t 0.0743184019928957
TPR_t 0.5176165803108809
precision 0.1712962962962963
threshold:  90
TP_t 978
TN_t 60302
FP_t 4729
FN_t 952
FPR_t 0.07271916470606327
TPR_t 0.5067357512953368
precision 0.1713684948309094
threshold:  91
TP_t 948
TN_t 60434
FP_t 4597
FN_t 982
FPR_t 0.0706893635343144
TPR_t 0.49119170984455957
precision 0.17096483318304778
threshold:  92
TP_t 909
TN_t 60587
FP_t 4444
FN_t 1021
FPR_t 0.06833663944887823
TPR_t 0.4709844559585492
precision 0.16981132075471697
threshold:  93
TP_t 871
TN_t 

TP_t 690
TN_t 3416
FP_t 13141
FN_t 8
FPR_t 0.7936824303919793
TPR_t 0.9885386819484241
precision 0.04988793290434531
threshold:  41
TP_t 690
TN_t 3329
FP_t 13228
FN_t 8
FPR_t 0.7989370054961648
TPR_t 0.9885386819484241
precision 0.049576088518465296
threshold:  42
TP_t 690
TN_t 3255
FP_t 13302
FN_t 8
FPR_t 0.803406414205472
TPR_t 0.9885386819484241
precision 0.049313893653516294
threshold:  43
TP_t 690
TN_t 3180
FP_t 13377
FN_t 8
FPR_t 0.8079362203297699
TPR_t 0.9885386819484241
precision 0.0490509703561527
threshold:  44
TP_t 690
TN_t 3107
FP_t 13450
FN_t 8
FPR_t 0.8123452316240865
TPR_t 0.9885386819484241
precision 0.048797736916548796
threshold:  45
TP_t 690
TN_t 3041
FP_t 13516
FN_t 8
FPR_t 0.8163314610134687
TPR_t 0.9885386819484241
precision 0.048571026326904125
threshold:  46
TP_t 690
TN_t 2962
FP_t 13595
FN_t 8
FPR_t 0.8211028567977291
TPR_t 0.9885386819484241
precision 0.04830241512075604
threshold:  47
TP_t 690
TN_t 2898
FP_t 13659
FN_t 8
FPR_t 0.8249682913571299
TPR_t 0.9885

 [1.         1.        ]]
AUC
0.7945939294887001
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                     True
29                    98.6                     True
30                    99.8                     True
32                    99.9                     True
33                    94.5                     True
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 698
TN_t 0
FP_t 16619
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.040307212565686895
threshold:  1
TP_t 692
TN_t 4872
FP_t 11747
FN_t 6
FPR_t 0.7068415668812804
TPR_t 0.9914040114613181
precision 0.05563148163035614
threshold:  2
TP_t 690
TN_t 5794
FP_t 10825
FN_t 8
FPR_t

threshold:  57
TP_t 624
TN_t 10438
FP_t 6181
FN_t 74
FPR_t 0.37192370178711115
TPR_t 0.8939828080229226
precision 0.0916972814107274
threshold:  58
TP_t 623
TN_t 10480
FP_t 6139
FN_t 75
FPR_t 0.36939647391539804
TPR_t 0.8925501432664756
precision 0.09213250517598344
threshold:  59
TP_t 620
TN_t 10500
FP_t 6119
FN_t 78
FPR_t 0.36819303207172516
TPR_t 0.8882521489971347
precision 0.09200178067962606
threshold:  60
TP_t 618
TN_t 10533
FP_t 6086
FN_t 80
FPR_t 0.36620735302966484
TPR_t 0.8853868194842407
precision 0.09218377088305489
threshold:  61
TP_t 618
TN_t 10566
FP_t 6053
FN_t 80
FPR_t 0.3642216739876045
TPR_t 0.8853868194842407
precision 0.0926397841403088
threshold:  62
TP_t 616
TN_t 10607
FP_t 6012
FN_t 82
FPR_t 0.3617546182080751
TPR_t 0.8825214899713467
precision 0.09293904646952324
threshold:  63
TP_t 615
TN_t 10656
FP_t 5963
FN_t 83
FPR_t 0.3588061856910765
TPR_t 0.8810888252148997
precision 0.09349346305868045
threshold:  64
TP_t 615
TN_t 10702
FP_t 5917
FN_t 83
FPR_t 0.356038

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9727942281677439, 'recall': 1.0, 'f1-score': 0.9862095238095239, 'support': 6472}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9727942281677439, 'macro avg': {'precision': 0.48639711408387193, 'recall': 0.5, 'f1-score': 0.49310476190476193, 'support': 6653}, 'weighted avg': {'precision': 0.9463286103564765, 'recall': 0.9727942281677439, 'f1-score': 0.959378932525964, 'support': 6653}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000092621', 'ENSG00000184381', 'ENSG00000147044', 'ENSG00000004848', 'ENSG00000075624', 'ENSG00000137267', 'ENSG00000247626', 'ENSG00000107290', 'ENSG00000108518', 'ENSG00000215301', 'ENSG00000120903', 'ENSG00000273079', 'ENSG00000143811', 'ENSG00000159082', 'ENSG00000165282', 'ENSG00000128039', 'ENSG00000215193', 'ENSG00000185344', 'ENSG00000101901']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000072121', 'ENSG00000114956', 'ENSG0000013123

276      False
282      False
285      False
287      False
298      False
         ...  
67946    False
67950    False
67955    False
67956    False
67964    False
Name: brain-0_disease_causing, Length: 6632, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800689345122134, 'recall': 1.0, 'f1-score': 0.9899341557556951, 'support': 6540}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.9800689345122134, 'macro avg': {'precision': 0.4900344672561067, 'recall': 0.5, 'f1-score': 0.49496707787784755, 'support': 6673}, 'weighted avg': {'precision': 0.9605351163959053, 'recall': 0.9800689345122134, 'f1-score': 0.9702037132687316, 'support': 6673}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000187098', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000172922', 'ENSG00000049167']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000119650', 'ENSG00000154227', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG00000104044', 'ENSG00000197859', 'ENSG00000163913', 'ENSG00000106327', 'ENSG00000126934', 'ENSG00000141527', 'ENSG00000179295', 'ENSG00000092295', 'ENSG00000138449', 'ENSG00000205155', 'ENSG00000140694', 'ENSG00000074181', 'ENSG00000258366', 'ENSG00000121552', 'ENSG0000008081

268      False
270      False
286      False
288      False
312      False
         ...  
67919    False
67942    False
67946    False
67955    False
67958    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6671, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927808693036547, 'recall': 1.0, 'f1-score': 0.9963773584905661, 'support': 6601}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927808693036547, 'macro avg': {'precision': 0.49639043465182736, 'recall': 0.5, 'f1-score': 0.49818867924528304, 'support': 6649}, 'weighted avg': {'precision': 0.9856138544553202, 'recall': 0.9927808693036547, 'f1-score': 0.9891843801167435, 'support': 6649}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000004848', 'ENSG00000087086', 'ENSG00000113721', 'ENSG00000143324', 'ENSG00000213024']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000103089', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000100225', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG0000011667

18       False
27       False
35       False
43       False
45       False
         ...  
67912    False
67915    False
67931    False
67932    False
67936    False
Name: brain-2_disease_causing, Length: 6652, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884401741480259, 'recall': 1.0, 'f1-score': 0.9941864854662137, 'support': 6584}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884401741480259, 'macro avg': {'precision': 0.49422008707401294, 'recall': 0.5, 'f1-score': 0.49709324273310684, 'support': 6661}, 'weighted avg': {'precision': 0.9770139778697796, 'recall': 0.9884401741480259, 'f1-score': 0.9826938628298381, 'support': 6661}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000112210', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG00000163161', 'ENSG00000105479', 'ENSG00000132155', 'ENSG00000013503', 'ENSG00000159079', 'ENSG00000091651', 'ENSG00000214413', 'ENSG00000197265', 'ENSG00000039139', 'ENSG00000167131', 'ENSG00000112312', 'ENSG00000080572', 'ENSG00000169032', 'ENSG00000157423', 'ENSG00000169071']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000179295', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000131808', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000256061', 'ENSG00000169126', 'ENSG0000017369

41       False
42       False
46       False
49       False
57       False
         ...  
67892    False
67918    False
67926    False
67927    False
67949    False
Name: Testis_disease_causing, Length: 6671, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}
{'False': {'precision': 0.9748928352725046, 'recall': 0.9996860282574568, 'f1-score': 0.9871337777088822, 'support': 6370}, 'True': {'precision': 0.9375, 'recall': 0.15463917525773196, 'f1-score': 0.26548672566371684, 'support': 194}, 'accuracy': 0.9747105423522242, 'macro avg': {'precision': 0.9561964176362523, 'recall': 0.5771626017575944, 'f1-score': 0.6263102516862995, 'support': 6564}, 'weighted avg': {'precision': 0.9737876844433051, 'recall': 0.9747105423522242, 'f1-score': 0.9658053913443542, 'support': 6564}}
@@@  precision: 0.9375 recall:  0.15463917525773196 f1_score:  0.26548672566371684


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000165280', 'ENSG00000123700', 'ENSG00000022267', 'ENSG00000183873', 'ENSG00000101997', 'ENSG00000198947', 'ENSG00000175084', 'ENSG00000111245', 'ENSG00000179295', 'ENSG00000180340', 'ENSG00000165995', 'ENSG00000148400', 'ENSG00000197859', 'ENSG00000162614', 'ENSG00000182533']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG00000198626', 'ENSG00000118058', 'ENSG00000104936', 'ENSG00000103197', 'ENSG0000010632

268      False
270      False
312      False
321      False
325      False
         ...  
67928    False
67931    False
67939    False
67942    False
67957    False
Name: Heart - Left Ventricle_disease_causing, Length: 6521, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915865384615384, 'recall': 1.0, 'f1-score': 0.995775497887749, 'support': 6600}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9915865384615384, 'macro avg': {'precision': 0.4957932692307692, 'recall': 0.5, 'f1-score': 0.4978877489438745, 'support': 6656}, 'weighted avg': {'precision': 0.9832438632581361, 'recall': 0.9915865384615384, 'f1-score': 0.987397579035328, 'support': 6656}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000122877', 'ENSG00000111199', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000134259', 'ENSG00000130816', 'ENSG00000100241']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000099940', 'ENSG00000109654', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000133422', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000133812', 'ENSG00000135924', 'ENSG00000100596', 'ENSG00000169247', 'ENSG0000015888

10       False
12       False
28       False
30       False
54       False
         ...  
67916    False
67918    False
67928    False
67931    False
67942    False
Name: Nerve - Tibial_disease_causing, Length: 6683, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.995991448423303, 'recall': 1.0, 'f1-score': 0.9979916990226269, 'support': 11181}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 45}, 'accuracy': 0.995991448423303, 'macro avg': {'precision': 0.4979957242116515, 'recall': 0.5, 'f1-score': 0.4989958495113134, 'support': 11226}, 'weighted avg': {'precision': 0.9919989653323491, 'recall': 0.995991448423303, 'f1-score': 0.9939911978239793, 'support': 11226}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 272
TN_t 0
FP_t 67696
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.004001883239171375
threshold:  1
TP_t 271
TN_t 10520
FP_t 57176
FN_t 1
FPR_t 0.8445993854880642
TPR_t 0.9963235294117647
precision 0.004717391682768465
threshold:  2
TP_t 271
TN_t 14466
FP_t 53230
FN_t 1
FPR_t 0.7863093831245569
TPR_t 0.9963235294117647
p

TP_t 0
TN_t 67679
FP_t 17
FN_t 272
FPR_t 0.000251122666036398
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67680
FP_t 16
FN_t 272
FPR_t 0.0002363507445048452
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67684
FP_t 12
FN_t 272
FPR_t 0.0001772630583786339
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67684
FP_t 12
FN_t 272
FPR_t 0.0001772630583786339
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67685
FP_t 11
FN_t 272
FPR_t 0.00016249113684708107
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67688
FP_t 8
FN_t 272
FPR_t 0.0001181753722524226
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67690
FP_t 6
FN_t 272
FPR_t 8.863152918931695e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67693
FP_t 3
FN_t 272
FPR_t 4.431576459465847e-05
TPR_t 0.0
precision 0.0
threshold:  67
TP_t 0
TN_t 67694
FP_t 2
FN_t 272
FPR_t 2.954384306310565e-05
TPR_t 0.0
precision 0.0
threshold:  68
TP_t 0
TN_t 67694
FP_t 2
FN_t 272
FPR_t 2.954384306310565e-05
TPR_t 0.0
precision 0.0


FPR_t 0.15098045094746942
TPR_t 0.7159533073929961
precision 0.01794246708922477
threshold:  29
TP_t 184
TN_t 56718
FP_t 9986
FN_t 73
FPR_t 0.1497061645478532
TPR_t 0.7159533073929961
precision 0.01809242871189774
threshold:  30
TP_t 183
TN_t 56786
FP_t 9918
FN_t 74
FPR_t 0.14868673542816024
TPR_t 0.7120622568093385
precision 0.018117018117018116
threshold:  31
TP_t 183
TN_t 56850
FP_t 9854
FN_t 74
FPR_t 0.14772727272727273
TPR_t 0.7120622568093385
precision 0.01823253960346717
threshold:  32
TP_t 182
TN_t 56922
FP_t 9782
FN_t 75
FPR_t 0.14664787718877428
TPR_t 0.708171206225681
precision 0.018265756724207147
threshold:  33
TP_t 182
TN_t 56992
FP_t 9712
FN_t 75
FPR_t 0.1455984648596786
TPR_t 0.708171206225681
precision 0.01839498686072367
threshold:  34
TP_t 181
TN_t 57058
FP_t 9646
FN_t 76
FPR_t 0.14460901894938835
TPR_t 0.7042801556420234
precision 0.01841864251551847
threshold:  35
TP_t 180
TN_t 57126
FP_t 9578
FN_t 77
FPR_t 0.14358958982969536
TPR_t 0.7003891050583657
precision 0.0

FN_t 134
FPR_t 0.08558707124010555
TPR_t 0.4785992217898833
precision 0.02109053497942387
threshold:  90
TP_t 123
TN_t 61120
FP_t 5584
FN_t 134
FPR_t 0.08371312065243464
TPR_t 0.4785992217898833
precision 0.021552479411249343
threshold:  91
TP_t 119
TN_t 61278
FP_t 5426
FN_t 138
FPR_t 0.08134444710961862
TPR_t 0.46303501945525294
precision 0.02146077547339946
threshold:  92
TP_t 116
TN_t 61467
FP_t 5237
FN_t 141
FPR_t 0.0785110338210602
TPR_t 0.45136186770428016
precision 0.02167009153745563
threshold:  93
TP_t 112
TN_t 61669
FP_t 5035
FN_t 145
FPR_t 0.07548272967138403
TPR_t 0.4357976653696498
precision 0.02176024868855644
threshold:  94
TP_t 109
TN_t 61955
FP_t 4749
FN_t 148
FPR_t 0.071195130726793
TPR_t 0.42412451361867703
precision 0.02243721696171264
threshold:  95
TP_t 103
TN_t 62244
FP_t 4460
FN_t 154
FPR_t 0.06686255696809787
TPR_t 0.40077821011673154
precision 0.022572868726714882
threshold:  96
TP_t 97
TN_t 62675
FP_t 4029
FN_t 160
FPR_t 0.06040117534180859
TPR_t 0.3774319066

threshold:  44
TP_t 87
TN_t 3109
FP_t 14053
FN_t 6
FPR_t 0.8188439575807015
TPR_t 0.9354838709677419
precision 0.0061527581329561525
threshold:  45
TP_t 87
TN_t 3043
FP_t 14119
FN_t 6
FPR_t 0.8226896632094162
TPR_t 0.9354838709677419
precision 0.006124172884696607
threshold:  46
TP_t 87
TN_t 2964
FP_t 14198
FN_t 6
FPR_t 0.8272928563104534
TPR_t 0.9354838709677419
precision 0.006090304515225761
threshold:  47
TP_t 87
TN_t 2900
FP_t 14262
FN_t 6
FPR_t 0.8310220254049645
TPR_t 0.9354838709677419
precision 0.006063140288521849
threshold:  48
TP_t 87
TN_t 2847
FP_t 14315
FN_t 6
FPR_t 0.8341102435613564
TPR_t 0.9354838709677419
precision 0.006040827662824608
threshold:  49
TP_t 87
TN_t 2782
FP_t 14380
FN_t 6
FPR_t 0.8378976809229693
TPR_t 0.9354838709677419
precision 0.006013686320591691
threshold:  50
TP_t 87
TN_t 2708
FP_t 14454
FN_t 6
FPR_t 0.8422095326884979
TPR_t 0.9354838709677419
precision 0.005983082318960182
threshold:  51
TP_t 87
TN_t 2644
FP_t 14518
FN_t 6
FPR_t 0.845938701783009


 [1.         1.        ]]
AUC
0.7442837576892183
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 94
TN_t 0
FP_t 17223
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.0054281919501068315
threshold:  1
TP_t 85
TN_t 4869
FP_t 12354
FN_t 9
FPR_t 0.7172966382163386
TPR_t 0.9042553191489362
precision 0.006833346732052416
threshold:  2
TP_t 85
TN_t 5793
FP_t 11430
FN_t 9
FPR_t 

threshold:  57
TP_t 70
TN_t 10488
FP_t 6735
FN_t 24
FPR_t 0.391046855948441
TPR_t 0.7446808510638298
precision 0.010286554004408524
threshold:  58
TP_t 70
TN_t 10531
FP_t 6692
FN_t 24
FPR_t 0.38855019450734485
TPR_t 0.7446808510638298
precision 0.010351966873706004
threshold:  59
TP_t 70
TN_t 10554
FP_t 6669
FN_t 24
FPR_t 0.38721477094582823
TPR_t 0.7446808510638298
precision 0.010387297818667459
threshold:  60
TP_t 70
TN_t 10589
FP_t 6634
FN_t 24
FPR_t 0.3851826046565639
TPR_t 0.7446808510638298
precision 0.010441527446300716
threshold:  61
TP_t 70
TN_t 10622
FP_t 6601
FN_t 24
FPR_t 0.3832665621552575
TPR_t 0.7446808510638298
precision 0.01049317943336831
threshold:  62
TP_t 70
TN_t 10665
FP_t 6558
FN_t 24
FPR_t 0.3807699007141613
TPR_t 0.7446808510638298
precision 0.01056125528062764
threshold:  63
TP_t 70
TN_t 10715
FP_t 6508
FN_t 24
FPR_t 0.3778668060152122
TPR_t 0.7446808510638298
precision 0.010641532380662816
threshold:  64
TP_t 69
TN_t 10760
FP_t 6463
FN_t 25
FPR_t 0.3752540207

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9928035982008996, 'recall': 1.0, 'f1-score': 0.9963888052964189, 'support': 6622}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9928035982008996, 'macro avg': {'precision': 0.4964017991004498, 'recall': 0.5, 'f1-score': 0.49819440264820947, 'support': 6670}, 'weighted avg': {'precision': 0.9856589846006533, 'recall': 0.9928035982008996, 'f1-score': 0.9892183911053802, 'support': 6670}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000008710']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000198650', 'ENSG00000122787', 'ENSG00000117594', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000140521', 'ENSG00000158104', 'ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000198677', 'ENSG00000099377', 'ENSG00000112234', 'ENSG00000025796', 'ENSG00000130175', 'ENSG00000168509', 'ENSG00000168306', 'ENSG00000134538', 'ENSG00000123810', 'ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG0000016233

2        False
31       False
50       False
63       False
74       False
         ...  
67923    False
67924    False
67933    False
67954    False
67958    False
Name: Liver_disease_causing, Length: 6689, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.972708082026538, 'recall': 1.0, 'f1-score': 0.9861652526179011, 'support': 6451}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.972708082026538, 'macro avg': {'precision': 0.486354041013269, 'recall': 0.5, 'f1-score': 0.49308262630895056, 'support': 6632}, 'weighted avg': {'precision': 0.9461610128397462, 'recall': 0.972708082026538, 'f1-score': 0.9592509114351749, 'support': 6632}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000140521', 'ENSG00000197102', 'ENSG00000115275', 'ENSG00000183735', 'ENSG00000100427', 'ENSG00000118971', 'ENSG00000083937', 'ENSG00000101204', 'ENSG00000197121', 'ENSG00000147852', 'ENSG00000124155', 'ENSG00000110436', 'ENSG00000160716', 'ENSG00000117020']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000112425', 'ENSG00000090487', 'ENSG00000137285', 'ENSG00000131462', 'ENSG0000010001

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9729977116704805, 'recall': 1.0, 'f1-score': 0.9863140802598005, 'support': 6378}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9729977116704805, 'macro avg': {'precision': 0.4864988558352403, 'recall': 0.5, 'f1-score': 0.49315704012990025, 'support': 6555}, 'weighted avg': {'precision': 0.9467245469159915, 'recall': 0.9729977116704805, 'f1-score': 0.9596813430811605, 'support': 6555}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG00000197563', 'ENSG00000048392', 'ENSG00000124155', 'ENSG00000070748', 'ENSG00000172269', 'ENSG00000171714', 'ENSG00000106692', 'ENSG00000197467', 'ENSG00000173540', 'ENSG00000234438', 'ENSG00000178209', 'ENSG00000107815', 'ENSG00000064419']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG0000017719

test_fraction:  0.09994353472614342
train_fraction:  0.9000564652738566
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'B

260      False
275      False
294      False
304      False
315      False
         ...  
67894    False
67904    False
67912    False
67920    False
67959    False
Name: brain-0_disease_causing, Length: 5343, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927841250751653, 'recall': 1.0, 'f1-score': 0.9963789981894992, 'support': 6604}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927841250751653, 'macro avg': {'precision': 0.49639206253758267, 'recall': 0.5, 'f1-score': 0.4981894990947496, 'support': 6652}, 'weighted avg': {'precision': 0.9856203190012616, 'recall': 0.9927841250751653, 'f1-score': 0.9891892519608317, 'support': 6652}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000260230', 'ENSG00000100225', 'ENSG00000184381', 'ENSG00000136143']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000143324', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG00000116675', 'ENSG0000021302

2        False
17       False
36       False
46       False
57       False
         ...  
67903    False
67908    False
67939    False
67958    False
67960    False
Name: brain-2_disease_causing, Length: 6513, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9742072523137612, 'recall': 1.0, 'f1-score': 0.9869351367968029, 'support': 6421}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9742072523137612, 'macro avg': {'precision': 0.4871036261568806, 'recall': 0.5, 'f1-score': 0.49346756839840145, 'support': 6591}, 'weighted avg': {'precision': 0.9490797704607283, 'recall': 0.9742072523137612, 'f1-score': 0.9614793678307193, 'support': 6591}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000147044', 'ENSG00000198707', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000100014', 'ENSG00000122591', 'ENSG00000184381']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000124788', 'ENSG00000101997', 'ENSG00000164494', 'ENSG00000140650', 'ENSG00000113971', 'ENSG00000173085', 'ENSG00000046651', 'ENSG00000197694', 'ENSG00000185344', 'ENSG00000073584', 'ENSG00000178538', 'ENSG00000131398', 'ENSG00000162065', 'ENSG00000103089', 'ENSG00000086062', 'ENSG00000167632', 'ENSG0000014218

10       False
12       False
54       False
63       False
67       False
         ...  
67921    False
67942    False
67949    False
67958    False
67959    False
Name: brain-1_disease_causing, Length: 6566, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899865490958003, 'recall': 1.0, 'f1-score': 0.9949680811115282, 'support': 6624}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 67}, 'accuracy': 0.9899865490958003, 'macro avg': {'precision': 0.49499327454790015, 'recall': 0.5, 'f1-score': 0.4974840405557641, 'support': 6691}, 'weighted avg': {'precision': 0.9800733673906115, 'recall': 0.9899865490958003, 'f1-score': 0.9850050170800723, 'support': 6691}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000015479', 'ENSG00000141385', 'ENSG00000204843']
train_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486', 'ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000104133', 'ENSG00000136104', 'ENSG00000013503', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000142168', 'ENSG00000161011', 'ENSG00000147475', 'ENSG00000008086', 'ENSG00000092621', 'ENSG00000021574', 'ENSG00000083937', 'ENSG00000103671', 'ENSG00000059573', 'ENSG00000126012', 'ENSG00000185803', 'ENSG00000183735', 'ENSG00000172817', 'ENSG00000130294', 'ENSG00000124164', 'ENSG00000169359', 'ENSG00000120948', 'ENSG00000188021', 'ENSG0000010308

2        False
31       False
50       False
63       False
74       False
         ...  
67901    False
67903    False
67904    False
67919    False
67921    False
Name: brain-3_disease_causing, Length: 6681, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992783663442346, 'recall': 1.0, 'f1-score': 0.9963787656984359, 'support': 6466}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 47}, 'accuracy': 0.992783663442346, 'macro avg': {'precision': 0.496391831721173, 'recall': 0.5, 'f1-score': 0.49818938284921793, 'support': 6513}, 'weighted avg': {'precision': 0.9856194023980055, 'recall': 0.992783663442346, 'f1-score': 0.9891885611862561, 'support': 6513}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000185345', 'ENSG00000145348', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000116675']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000136143', 'ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575', 'ENSG0000021302

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9726745274190529, 'recall': 1.0, 'f1-score': 0.9861480075901328, 'support': 5197}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 146}, 'accuracy': 0.9726745274190529, 'macro avg': {'precision': 0.48633726370952646, 'recall': 0.5, 'f1-score': 0.4930740037950664, 'support': 5343}, 'weighted avg': {'precision': 0.9460957362898781, 'recall': 0.9726745274190529, 'f1-score': 0.9592010472479731, 'support': 5343}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000104133']
train_genes ['ENSG00000120948', 'ENSG00000129003', 'ENSG00000162735', 'ENSG00000148606', 'ENSG00000107815', 'ENSG00000214160', 'ENSG00000225830', 'ENSG00000165699', 'ENSG00000100150', 'ENSG00000168778', 'ENSG00000133103', 'ENSG00000189056', 'ENSG00000181830', 'ENSG00000068796', 'ENSG00000034693', 'ENSG00000182173', 'ENSG00000165195', 'ENSG00000126705', 'ENSG00000105647', 'ENSG00000187323', 'ENSG00000160213', 'ENSG00000102100', 'ENSG00000157911', 'ENSG00000166341', 'ENSG00000130741', 'ENSG00000145348', 'ENSG00000007168', 'ENSG00000103197', 'ENSG00000118873', 'ENSG00000072121', 'ENSG00000114956', 'ENSG00000131238', 'ENSG00000112425', 'ENSG00000090487', 'ENSG00000137285', 'ENSG00000131462', 'ENSG00000100014', 'ENSG00000107147', 'ENSG00000188021', 'ENSG00000162928', 'ENSG00000168280', 'ENSG00000165078', 'ENSG00000099246', 'ENSG00000197912', 'ENSG00000163541', 'ENSG00000196998', 'ENSG00000101276', 'ENSG00000112357', 'ENSG00000169372', 'ENSG0000001350

relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigra_diff_net_max', 'Brain_-_Spinal_cord_(cervical_c-1)_diff_net_max', 'Brain_-_Putamen_(basal

2        False
23       False
45       False
54       False
56       False
         ...  
67927    False
67943    False
67957    False
67962    False
67967    False
Name: brain-2_disease_causing, Length: 6368, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}
281      False
303      False
312      False
314      False
318      False
         ...  
67883    False
67913    False
67925    False
67927    False
67962    False
Name: brain-0_disease_causing, Length: 5112, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884575026232949, 'recall': 1.0, 'f1-score': 0.9941952506596307, 'support': 6594}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884575026232949, 'macro avg': {'precision': 0.49422875131164745, 'recall': 0.5, 'f1-score': 0.49709762532981533, 'support': 6671}, 'weighted avg': {'precision': 0.977048234492281, 'recall': 0.9884575026232949, 'f1-score': 0.9827197545869591, 'support': 6671}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000169126', 'ENSG00000173698', 'ENSG00000104826', 'ENSG00000101292', 'ENSG00000188613', 'ENSG00000169946', 'ENSG00000165506', 'ENSG00000177990', 'ENSG00000141519', 'ENSG00000104884', 'ENSG00000143622', 'ENSG00000120008', 'ENSG00000139318', 'ENSG00000115947', 'ENSG00000108061']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000179295', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000131808', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000256061', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG00000163161', 'ENSG0000010547

20       False
24       False
26       False
27       False
32       False
         ...  
67912    False
67932    False
67953    False
67960    False
67962    False
Name: Testis_disease_causing, Length: 6673, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}
{'False': {'precision': 0.9263483786866548, 'recall': 0.9994725738396625, 'f1-score': 0.961522198731501, 'support': 5688}, 'True': {'precision': 0.9785714285714285, 'recall': 0.23259762308998302, 'f1-score': 0.3758573388203018, 'support': 589}, 'accuracy': 0.9275131432212841, 'macro avg': {'precision': 0.9524599036290416, 'recall': 0.6160350984648227, 'f1-score': 0.6686897687759015, 'support': 6277}, 'weighted avg': {'precision': 0.9312487094787739, 'recall': 0.9275131432212841, 'f1-score': 0.906566550732824, 'support': 6277}}
@@@  precision: 0.9785714285714285 recall:  0.23259762308998302 f1_score:  0.3758573388203018


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000103197', 'ENSG00000171453', 'ENSG00000101152', 'ENSG00000136531', 'ENSG00000179456', 'ENSG00000037474', 'ENSG00000173085', 'ENSG00000156709', 'ENSG00000103723', 'ENSG00000124486', 'ENSG00000185803', 'ENSG00000168137', 'ENSG00000147180', 'ENSG00000125675', 'ENSG00000100722', 'ENSG00000130005', 'ENSG00000164494', 'ENSG00000124164', 'ENSG00000102452', 'ENSG00000151729', 'ENSG00000101361', 'ENSG00000101997', 'ENSG00000196159', 'ENSG00000120903', 'ENSG00000101901', 'ENSG00000213689', 'ENSG00000177565', 'ENSG00000051180', 'ENSG00000107815', 'ENSG00000166948', 'ENSG00000077279', 'ENSG00000163933', 'ENSG00000197912', 'ENSG00000064309', 'ENSG00000147475', 'ENSG00000064313', 'ENSG00000128881', 'ENSG00000168282', 'ENSG00000143324', 'ENSG00000214160', 'ENSG00000213923', 'ENSG00000172817', 'ENSG00000148985', 'ENSG00000172943', 'ENSG00000128039', 'ENSG00000198824']
train_genes ['ENSG00000186487', 'ENSG00000143442', 'ENSG00000162928', 'ENSG00000198689', 'ENSG0000008606

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

0         True
1         True
2         True
3         True
4         True
         ...  
67887    False
67909    False
67929    False
67938    False
67950    False
Name: brain_disease_causing, Length: 6275, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927763819095478, 'recall': 1.0, 'f1-score': 0.9963750985027582, 'support': 6322}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 46}, 'accuracy': 0.9927763819095478, 'macro avg': {'precision': 0.4963881909547739, 'recall': 0.5, 'f1-score': 0.4981875492513791, 'support': 6368}, 'weighted avg': {'precision': 0.9856049444774122, 'recall': 0.9927763819095478, 'f1-score': 0.9891776653163374, 'support': 6368}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000135917', 'ENSG00000104833', 'ENSG00000168575']
train_genes ['ENSG00000168958', 'ENSG00000137267', 'ENSG00000213689', 'ENSG00000181038', 'ENSG00000100311', 'ENSG00000196998', 'ENSG00000112234', 'ENSG00000101347', 'ENSG00000167716', 'ENSG00000158828', 'ENSG00000088682', 'ENSG00000125779', 'ENSG00000186153', 'ENSG00000197102', 'ENSG00000116288', 'ENSG00000125454', 'ENSG00000103043', 'ENSG00000100749', 'ENSG00000112541', 'ENSG00000115267', 'ENSG00000129003', 'ENSG00000136104', 'ENSG00000145335', 'ENSG00000066427', 'ENSG00000162065', 'ENSG00000213923', 'ENSG00000068120', 'ENSG00000160710', 'ENSG00000164494', 'ENSG00000159363', 'ENSG00000163541', 'ENSG00000004848', 'ENSG00000103089', 'ENSG00000087086', 'ENSG00000260230', 'ENSG00000102466', 'ENSG00000113721', 'ENSG00000100225', 'ENSG00000143324', 'ENSG00000184381', 'ENSG00000185345', 'ENSG00000145348', 'ENSG00000136143', 'ENSG00000182287', 'ENSG00000131943', 'ENSG00000113231', 'ENSG00000116675', 'ENSG0000021302

4        False
18       False
24       False
28       False
29       False
         ...  
67931    False
67937    False
67939    False
67952    False
67956    False
Name: brain-2_disease_causing, Length: 8174, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 10, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9726134585289515, 'recall': 1.0, 'f1-score': 0.9861166203887346, 'support': 4972}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 140}, 'accuracy': 0.9726134585289515, 'macro avg': {'precision': 0.48630672926447577, 'recall': 0.5, 'f1-score': 0.4930583101943673, 'support': 5112}, 'weighted avg': {'precision': 0.9459769397116485, 'recall': 0.9726134585289515, 'f1-score': 0.9591102966691684, 'support': 5112}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 1811
TN_t 0
FP_t 66157
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.026644891713747645
threshold:  1
TP_t 1810
TN_t 10520
FP_t 55637
FN_t 1
FPR_t 0.8409843251658933
TPR_t 0.9994478188845941
precision 0.03150730238306613
threshold:  2
TP_t 1810
TN_t 14466
FP_t 51691
FN_t 1
FPR_t 0.7813383315446589
TPR_t 0.9994478188845941

precision 0.027777777777777776
threshold:  56
TP_t 0
TN_t 66129
FP_t 28
FN_t 1811
FPR_t 0.0004232356364405883
TPR_t 0.0
precision 0.0
threshold:  57
TP_t 0
TN_t 66132
FP_t 25
FN_t 1811
FPR_t 0.0003778889611076681
TPR_t 0.0
precision 0.0
threshold:  58
TP_t 0
TN_t 66135
FP_t 22
FN_t 1811
FPR_t 0.00033254228577474796
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 66140
FP_t 17
FN_t 1811
FPR_t 0.00025696449355321434
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 66141
FP_t 16
FN_t 1811
FPR_t 0.0002418489351089076
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 66145
FP_t 12
FN_t 1811
FPR_t 0.0001813867013316807
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 66145
FP_t 12
FN_t 1811
FPR_t 0.0001813867013316807
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 66146
FP_t 11
FN_t 1811
FPR_t 0.00016627114288737398
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 66149
FP_t 8
FN_t 1811
FPR_t 0.0001209244675544538
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 66151
FP_t 6


FN_t 337
FPR_t 0.13881732302784935
TPR_t 0.8077581289218483
precision 0.13526939243408484
threshold:  26
TP_t 1407
TN_t 56224
FP_t 8984
FN_t 346
FPR_t 0.13777450619555884
TPR_t 0.802624073017684
precision 0.13540563949571746
threshold:  27
TP_t 1405
TN_t 56286
FP_t 8922
FN_t 348
FPR_t 0.1368237026131763
TPR_t 0.8014831717056474
precision 0.13605112811077757
threshold:  28
TP_t 1398
TN_t 56351
FP_t 8857
FN_t 355
FPR_t 0.13582689240583978
TPR_t 0.7974900171135196
precision 0.13632374451487078
threshold:  29
TP_t 1389
TN_t 56427
FP_t 8781
FN_t 364
FPR_t 0.1346613912403386
TPR_t 0.7923559612093554
precision 0.13657817109144543
threshold:  30
TP_t 1385
TN_t 56492
FP_t 8716
FN_t 368
FPR_t 0.13366458103300208
TPR_t 0.7900741585852824
precision 0.13711513711513712
threshold:  31
TP_t 1380
TN_t 56551
FP_t 8657
FN_t 373
FPR_t 0.13275978407557354
TPR_t 0.7872219053051911
precision 0.13749128225565407
threshold:  32
TP_t 1371
TN_t 56615
FP_t 8593
FN_t 382
FPR_t 0.13177830940988836
TPR_t 0.78208784

threshold:  86
TP_t 930
TN_t 59902
FP_t 5306
FN_t 823
FPR_t 0.08137038400196295
TPR_t 0.5305191100969766
precision 0.14913406029506093
threshold:  87
TP_t 915
TN_t 59999
FP_t 5209
FN_t 838
FPR_t 0.07988283646178383
TPR_t 0.5219623502567028
precision 0.14941214892227303
threshold:  88
TP_t 892
TN_t 60121
FP_t 5087
FN_t 861
FPR_t 0.07801190038032144
TPR_t 0.5088419851682829
precision 0.14918882756313764
threshold:  89
TP_t 866
TN_t 60242
FP_t 4966
FN_t 887
FPR_t 0.07615629984051037
TPR_t 0.4940102681118083
precision 0.14849108367626887
threshold:  90
TP_t 847
TN_t 60348
FP_t 4860
FN_t 906
FPR_t 0.07453073242546927
TPR_t 0.4831717056474615
precision 0.14841422814087962
threshold:  91
TP_t 824
TN_t 60487
FP_t 4721
FN_t 929
FPR_t 0.07239909213593423
TPR_t 0.47005134055904163
precision 0.1486023444544635
threshold:  92
TP_t 796
TN_t 60651
FP_t 4557
FN_t 957
FPR_t 0.06988406330511594
TPR_t 0.4540787221905305
precision 0.1487016626190921
threshold:  93
TP_t 760
TN_t 60821
FP_t 4387
FN_t 993
FP

FP_t 13304
FN_t 9
FPR_t 0.7957413720916323
TPR_t 0.9832089552238806
precision 0.03810281252259417
threshold:  41
TP_t 527
TN_t 3328
FP_t 13391
FN_t 9
FPR_t 0.8009450325976434
TPR_t 0.9832089552238806
precision 0.03786463572352349
threshold:  42
TP_t 527
TN_t 3254
FP_t 13465
FN_t 9
FPR_t 0.8053711346372391
TPR_t 0.9832089552238806
precision 0.03766437964551172
threshold:  43
TP_t 527
TN_t 3179
FP_t 13540
FN_t 9
FPR_t 0.809857048866559
TPR_t 0.9832089552238806
precision 0.03746356721404706
threshold:  44
TP_t 527
TN_t 3106
FP_t 13613
FN_t 9
FPR_t 0.8142233387164304
TPR_t 0.9832089552238806
precision 0.03727015558698727
threshold:  45
TP_t 527
TN_t 3040
FP_t 13679
FN_t 9
FPR_t 0.8181709432382319
TPR_t 0.9832089552238806
precision 0.03709700126707025
threshold:  46
TP_t 527
TN_t 2961
FP_t 13758
FN_t 9
FPR_t 0.8228961062264489
TPR_t 0.9832089552238806
precision 0.03689184459222961
threshold:  47
TP_t 527
TN_t 2897
FP_t 13822
FN_t 9
FPR_t 0.826724086368802
TPR_t 0.9832089552238806
precision 

 [1.         1.        ]]
AUC
0.76597130532516
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                     True
29                    98.6                     True
30                    99.8                     True
32                    99.9                     True
33                    94.5                     True
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 536
TN_t 0
FP_t 16781
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.030952243460183635
threshold:  1
TP_t 527
TN_t 4869
FP_t 11912
FN_t 9
FPR_t 0.709850426077111
TPR_t 0.9832089552238806
precision 0.04236674973872498
threshold:  2
TP_t 521
TN_t 5787
FP_t 10994
FN_t 15
FPR_t 0

threshold:  57
TP_t 439
TN_t 10415
FP_t 6366
FN_t 97
FPR_t 0.3793576068172338
TPR_t 0.8190298507462687
precision 0.0645113886847906
threshold:  58
TP_t 437
TN_t 10456
FP_t 6325
FN_t 99
FPR_t 0.37691436743936596
TPR_t 0.8152985074626866
precision 0.06462585034013606
threshold:  59
TP_t 436
TN_t 10478
FP_t 6303
FN_t 100
FPR_t 0.37560336094392466
TPR_t 0.8134328358208955
precision 0.06469802641341445
threshold:  60
TP_t 435
TN_t 10512
FP_t 6269
FN_t 101
FPR_t 0.37357725999642455
TPR_t 0.8115671641791045
precision 0.06488663484486873
threshold:  61
TP_t 434
TN_t 10544
FP_t 6237
FN_t 102
FPR_t 0.37167034145760086
TPR_t 0.8097014925373134
precision 0.06505771248688352
threshold:  62
TP_t 434
TN_t 10587
FP_t 6194
FN_t 102
FPR_t 0.36910791967105655
TPR_t 0.8097014925373134
precision 0.06547978273989137
threshold:  63
TP_t 433
TN_t 10636
FP_t 6145
FN_t 103
FPR_t 0.3661879506584828
TPR_t 0.8078358208955224
precision 0.06582547886895714
threshold:  64
TP_t 430
TN_t 10679
FP_t 6102
FN_t 106
FPR_t 

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9730224051211706, 'recall': 1.0, 'f1-score': 0.9863267670915411, 'support': 6384}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9730224051211706, 'macro avg': {'precision': 0.4865112025605853, 'recall': 0.5, 'f1-score': 0.49316338354577055, 'support': 6561}, 'weighted avg': {'precision': 0.9467726008677873, 'recall': 0.9730224051211706, 'f1-score': 0.9597180431507999, 'support': 6561}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000171100', 'ENSG00000151929', 'ENSG00000133703', 'ENSG00000143549', 'ENSG00000129152', 'ENSG00000102452', 'ENSG00000115665']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG0000019756

19       False
20       False
24       False
26       False
27       False
         ...  
67931    False
67933    False
67941    False
67946    False
67967    False
Name: Muscle - Skeletal_disease_causing, Length: 6551, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927819916809396, 'recall': 1.0, 'f1-score': 0.9963779237522256, 'support': 8115}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 59}, 'accuracy': 0.9927819916809396, 'macro avg': {'precision': 0.4963909958404698, 'recall': 0.5, 'f1-score': 0.4981889618761128, 'support': 8174}, 'weighted avg': {'precision': 0.9856160830059733, 'recall': 0.9927819916809396, 'f1-score': 0.9891860596096538, 'support': 8174}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 488
TN_t 0
FP_t 67480
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.00717984934086629
threshold:  1
TP_t 487
TN_t 10520
FP_t 56960
FN_t 1
FPR_t 0.8441019561351512
TPR_t 0.9979508196721312
precision 0.008477379149476908
threshold:  2
TP_t 487
TN_t 14466
FP_t 53014
FN_t 1
FPR_t 0.7856253704801422
TPR_t 0.9979508196721312
pr

FP_t 26
FN_t 486
FPR_t 0.0003852993479549496
TPR_t 0.004098360655737705
precision 0.07142857142857142
threshold:  57
TP_t 2
TN_t 67457
FP_t 23
FN_t 486
FPR_t 0.00034084173088322464
TPR_t 0.004098360655737705
precision 0.08
threshold:  58
TP_t 2
TN_t 67460
FP_t 20
FN_t 486
FPR_t 0.0002963841138114997
TPR_t 0.004098360655737705
precision 0.09090909090909091
threshold:  59
TP_t 0
TN_t 67463
FP_t 17
FN_t 488
FPR_t 0.0002519264967397748
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67464
FP_t 16
FN_t 488
FPR_t 0.00023710729104919976
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67468
FP_t 12
FN_t 488
FPR_t 0.00017783046828689981
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67468
FP_t 12
FN_t 488
FPR_t 0.00017783046828689981
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67469
FP_t 11
FN_t 488
FPR_t 0.00016301126259632483
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67472
FP_t 8
FN_t 488
FPR_t 0.00011855364552459988
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t

TPR_t 0.7112526539278131
precision 0.03200229270156668
threshold:  26
TP_t 332
TN_t 56431
FP_t 10059
FN_t 139
FPR_t 0.15128590765528652
TPR_t 0.7048832271762208
precision 0.03195072659031854
threshold:  27
TP_t 331
TN_t 56494
FP_t 9996
FN_t 140
FPR_t 0.15033839675139118
TPR_t 0.70276008492569
precision 0.03205190277912269
threshold:  28
TP_t 326
TN_t 56561
FP_t 9929
FN_t 145
FPR_t 0.14933072642502632
TPR_t 0.692144373673036
precision 0.0317893710385178
threshold:  29
TP_t 325
TN_t 56645
FP_t 9845
FN_t 146
FPR_t 0.1480673785531659
TPR_t 0.6900212314225053
precision 0.0319567354965585
threshold:  30
TP_t 325
TN_t 56714
FP_t 9776
FN_t 146
FPR_t 0.14702962851556625
TPR_t 0.6900212314225053
precision 0.032175032175032175
threshold:  31
TP_t 322
TN_t 56775
FP_t 9715
FN_t 149
FPR_t 0.1461121973229057
TPR_t 0.6836518046709129
precision 0.032081299192985954
threshold:  32
TP_t 321
TN_t 56847
FP_t 9643
FN_t 150
FPR_t 0.1450293277184539
TPR_t 0.6815286624203821
precision 0.03221597751906865
thres

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


TP_t 319
TN_t 56915
FP_t 9575
FN_t 152
FPR_t 0.14400661753647165
TPR_t 0.6772823779193206
{'False': {'precision': 0.992824039467783, 'recall': 1.0, 'f1-score': 0.9963990997749438, 'support': 6641}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.992824039467783, 'macro avg': {'precision': 0.4964120197338915, 'recall': 0.5, 'f1-score': 0.4981995498874719, 'support': 6689}, 'weighted avg': {'precision': 0.9856995733451258, 'recall': 0.992824039467783, 'f1-score': 0.9892489791606222, 'support': 6689}}
precision 0.03224176268445523
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0
threshold:  34


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000158104', 'ENSG00000099377', 'ENSG00000168306']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000198650', 'ENSG00000122787', 'ENSG00000117594', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000140521', 'ENSG00000008710', 'ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000198677', 'ENSG00000112234', 'ENSG00000025796', 'ENSG00000130175', 'ENSG00000168509', 'ENSG00000134538', 'ENSG00000123810', 'ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG0000016233

FPR_t 0.09088584749586404
TPR_t 0.40976645435244163
precision 0.030949326491340603
threshold:  87
TP_t 188
TN_t 60554
FP_t 5936
FN_t 283
FPR_t 0.08927658294480373
TPR_t 0.3991507430997877
precision 0.03069888961463096
threshold:  88
TP_t 183
TN_t 60694
FP_t 5796
FN_t 288
FPR_t 0.08717100315836968
TPR_t 0.3885350318471338
precision 0.030607124937280482
threshold:  89
TP_t 176
TN_t 60834
FP_t 5656
FN_t 295
FPR_t 0.08506542337193564
TPR_t 0.37367303609341823
precision 0.03017832647462277
threshold:  90
TP_t 172
TN_t 60955
FP_t 5535
FN_t 299
FPR_t 0.08324560084223191
TPR_t 0.3651804670912951
precision 0.03013842649377957
threshold:  91
TP_t 165
TN_t 61110
FP_t 5380
FN_t 306
FPR_t 0.08091442322153708
TPR_t 0.3503184713375796
precision 0.029756537421100092
threshold:  92
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff

TP_t 160
TN_t 61297
FP_t 5193
FN_t 311
FPR_t 0.07810197022108588
TPR_t 0.33970276008492567
precision 0.029889781430973286
threshold:  93
TP_t 153
TN_t 61496
FP_t 4994
FN_t 318
FPR_t 0.07510903895322604
TPR_t 0.3248407643312102
precision 0.029726054012045852
threshold:  94
TP_t 146
TN_t 61778
FP_t 4712
FN_t 325
FPR_t 0.07086779966912318
TPR_t 0.3099787685774947
precision 0.030053519967064634
threshold:  95
TP_t 132
TN_t 62059
FP_t 4431
FN_t 339
FPR_t 0.06664160024063769
TPR_t 0.2802547770700637
precision 0.028928336620644313
threshold:  96
TP_t 117
TN_t 62481
FP_t 4009
FN_t 354
FPR_t 0.060294781170100765
TPR_t 0.2484076433121019
precision 0.028356761997091615
threshold:  97
TP_t 99
TN_t 63005
FP_t 3485
FN_t 372
FPR_t 0.052413896826590464
TPR_t 0.21019108280254778
precision 0.027622767857142856
threshold:  98
TP_t 66
TN_t 63733
FP_t 2757
FN_t 405
FPR_t 0.0414648819371334
TPR_t 0.14012738853503184
precision 0.023379383634431455
threshold:  99
TP_t 26
TN_t 64856
FP_t 1634
FN_t 445
FPR_t 0.

TPR_t 0.9736842105263158
precision 0.01295064753237662
threshold:  47
TP_t 185
TN_t 2901
FP_t 14164
FN_t 5
FPR_t 0.8300029299736302
TPR_t 0.9736842105263158
precision 0.012892884521569448
threshold:  48
TP_t 185
TN_t 2848
FP_t 14217
FN_t 5
FPR_t 0.8331087020216819
TPR_t 0.9736842105263158
precision 0.012845438133592556
threshold:  49
TP_t 185
TN_t 2783
FP_t 14282
FN_t 5
FPR_t 0.8369176677409903
TPR_t 0.9736842105263158
precision 0.01278772378516624
threshold:  50
TP_t 185
TN_t 2709
FP_t 14356
FN_t 5
FPR_t 0.8412540287137416
TPR_t 0.9736842105263158
precision 0.01272264631043257
threshold:  51
TP_t 185
TN_t 2645
FP_t 14420
FN_t 5
FPR_t 0.8450043949604453
TPR_t 0.9736842105263158
precision 0.01266689489900719
threshold:  52
TP_t 185
TN_t 2585
FP_t 14480
FN_t 5
FPR_t 0.8485203633167302
TPR_t 0.9736842105263158
precision 0.01261506989430617
threshold:  53
TP_t 185
TN_t 2509
FP_t 14556
FN_t 5
FPR_t 0.8529739232346909
23       False
41       False
42       False
46       False
49       False

 [1.         1.        ]]
AUC
0.7541355806745109
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 190
TN_t 0
FP_t 17127
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.010971877345960617
threshold:  1
TP_t 183
TN_t 4871
FP_t 12256
FN_t 7
FPR_t 0.7155952589478601
TPR_t 0.9631578947368421
precision 0.014711793552536377
threshold:  2
TP_t 182
TN_t 5794
FP_t 11333
FN_t 8
FPR_

precision 0.023112931538911644
threshold:  57
TP_t 158
TN_t 10480
FP_t 6647
FN_t 32
FPR_t 0.38810065977696034
TPR_t 0.8315789473684211
precision 0.023218221895664953
threshold:  58
TP_t 158
TN_t 10523
FP_t 6604
FN_t 32
FPR_t 0.3855900040871139
TPR_t 0.8315789473684211
precision 0.02336586808636498
threshold:  59
TP_t 157
TN_t 10545
FP_t 6582
FN_t 33
FPR_t 0.38430548257137853
TPR_t 0.8263157894736842
precision 0.02329722510758273
threshold:  60
TP_t 156
TN_t 10579
FP_t 6548
FN_t 34
FPR_t 0.3823203129561511
TPR_t 0.8210526315789474
precision 0.023269689737470168
threshold:  61
TP_t 156
TN_t 10612
FP_t 6515
FN_t 34
FPR_t 0.380393530682548
TPR_t 0.8210526315789474
precision 0.02338479988007795
threshold:  62
TP_t 156
TN_t 10655
FP_t 6472
FN_t 34
FPR_t 0.37788287499270157
TPR_t 0.8210526315789474
precision 0.023536511768255886
threshold:  63
TP_t 156
TN_t 10705
FP_t 6422
FN_t 34
FPR_t 0.3749635079114848
TPR_t 0.8210526315789474
precision 0.023715415019762844
threshold:  64
TP_t 156
TN_t 107

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884609620860183, 'recall': 1.0, 'f1-score': 0.9941970005275454, 'support': 6596}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884609620860183, 'macro avg': {'precision': 0.49423048104300915, 'recall': 0.5, 'f1-score': 0.4970985002637727, 'support': 6673}, 'weighted avg': {'precision': 0.9770550735680169, 'recall': 0.9884609620860183, 'f1-score': 0.9827249236444912, 'support': 6673}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000077782', 'ENSG00000085840', 'ENSG00000100014', 'ENSG00000120498', 'ENSG00000165474', 'ENSG00000095015', 'ENSG00000106327', 'ENSG00000122735', 'ENSG00000141013', 'ENSG00000160188', 'ENSG00000099246', 'ENSG00000167513', 'ENSG00000154099', 'ENSG00000093009', 'ENSG00000171595', 'ENSG00000272047']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000179295', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000131808', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000256061', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG0000016316

1        False
12       False
13       False
24       False
49       False
         ...  
67906    False
67915    False
67920    False
67939    False
67945    False
Name: Testis_disease_causing, Length: 6659, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958592132505176, 'recall': 1.0, 'f1-score': 0.9979253112033196, 'support': 6734}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958592132505176, 'macro avg': {'precision': 0.4979296066252588, 'recall': 0.5, 'f1-score': 0.4989626556016598, 'support': 6762}, 'weighted avg': {'precision': 0.9917355726159399, 'recall': 0.9958592132505176, 'f1-score': 0.9937931152977157, 'support': 6762}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000106327', 'ENSG00000169297', 'ENSG00000203908', 'ENSG00000013503', 'ENSG00000166863']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

2        False
31       False
50       False
63       False
74       False
         ...  
67921    False
67960    False
67963    False
67965    False
67967    False
Name: Ovary_disease_causing, Length: 6769, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899715611435413, 'recall': 1.0, 'f1-score': 0.9949605114704776, 'support': 6614}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 67}, 'accuracy': 0.9899715611435413, 'macro avg': {'precision': 0.4949857805717707, 'recall': 0.5, 'f1-score': 0.4974802557352388, 'support': 6681}, 'weighted avg': {'precision': 0.9800436918729806, 'recall': 0.9899715611435413, 'f1-score': 0.9849826108166051, 'support': 6681}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000136104', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000161011', 'ENSG00000092621', 'ENSG00000083937', 'ENSG00000103671', 'ENSG00000059573', 'ENSG00000126012']
train_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486', 'ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000104133', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000013503', 'ENSG00000015479', 'ENSG00000142168', 'ENSG00000147475', 'ENSG00000008086', 'ENSG00000021574', 'ENSG00000141385', 'ENSG00000204843', 'ENSG00000185803', 'ENSG00000183735', 'ENSG00000172817', 'ENSG00000130294', 'ENSG00000124164', 'ENSG00000169359', 'ENSG00000120948', 'ENSG00000188021', 'ENSG0000010308

41       False
42       False
46       False
49       False
57       False
         ...  
67906    False
67911    False
67924    False
67926    False
67928    False
Name: brain-3_disease_causing, Length: 6668, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.972981224240574, 'recall': 1.0, 'f1-score': 0.9863056092843328, 'support': 6374}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.972981224240574, 'macro avg': {'precision': 0.486490612120287, 'recall': 0.5, 'f1-score': 0.4931528046421664, 'support': 6551}, 'weighted avg': {'precision': 0.9466924627246861, 'recall': 0.972981224240574, 'f1-score': 0.9596568391968152, 'support': 6551}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000104936']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG00000079805', 'ENSG00000138435', 'ENSG00000197563', 'ENSG00000048392', 'ENSG00000167552', 'ENSG00000124155', 'ENSG00000070748', 'ENSG00000171100', 'ENSG0000017087

1        False
12       False
13       False
24       False
49       False
         ...  
67916    False
67919    False
67920    False
67921    False
67956    False
Name: Muscle - Skeletal_disease_causing, Length: 6516, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9906110283159464, 'recall': 1.0, 'f1-score': 0.995283372014674, 'support': 6647}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 63}, 'accuracy': 0.9906110283159464, 'macro avg': {'precision': 0.4953055141579732, 'recall': 0.5, 'f1-score': 0.497641686007337, 'support': 6710}, 'weighted avg': {'precision': 0.9813102094211766, 'recall': 0.9906110283159464, 'f1-score': 0.9859386846172188, 'support': 6710}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000105877', 'ENSG00000141013', 'ENSG00000140694', 'ENSG00000141519', 'ENSG00000004838', 'ENSG00000256061', 'ENSG00000159079', 'ENSG00000167972', 'ENSG00000157423', 'ENSG00000167646', 'ENSG00000111319', 'ENSG00000100368']
train_genes ['ENSG00000104450', 'ENSG00000258366', 'ENSG00000119661', 'ENSG00000187726', 'ENSG00000164818', 'ENSG00000198223', 'ENSG00000086288', 'ENSG00000165699', 'ENSG00000114841', 'ENSG00000135069', 'ENSG00000168484', 'ENSG00000198003', 'ENSG00000130363', 'ENSG00000167131', 'ENSG00000001626', 'ENSG00000103197', 'ENSG00000080572', 'ENSG00000168878', 'ENSG00000039139', 'ENSG00000168447', 'ENSG00000234602', 'ENSG00000105479', 'ENSG00000129295', 'ENSG00000092621', 'ENSG00000160188', 'ENSG00000172426', 'ENSG00000171595', 'ENSG00000165506', 'ENSG00000157856', 'ENSG00000152669', 'ENSG00000122735', 'ENSG00000154099', 'ENSG00000111834', 'ENSG00000169126', 'ENSG00000139537']
benign_genes 67335
test_fraction:  0.0995260663507109
train_fraction:  0

268      False
270      False
286      False
288      False
312      False
         ...  
67909    False
67921    False
67942    False
67946    False
67966    False
Name: Lung_disease_causing, Length: 6716, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800629590766002, 'recall': 1.0, 'f1-score': 0.9899311075781665, 'support': 6538}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.9800629590766002, 'macro avg': {'precision': 0.4900314795383001, 'recall': 0.5, 'f1-score': 0.49496555378908325, 'support': 6671}, 'weighted avg': {'precision': 0.9605234037539817, 'recall': 0.9800629590766002, 'f1-score': 0.9701948105750342, 'support': 6671}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000154227', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG00000104044', 'ENSG00000197859', 'ENSG00000163913', 'ENSG00000106327', 'ENSG00000126934', 'ENSG00000141527', 'ENSG00000179295', 'ENSG00000092295', 'ENSG00000074181', 'ENSG00000204020']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000172922', 'ENSG00000049167', 'ENSG00000138449', 'ENSG00000205155', 'ENSG00000140694', 'ENSG00000258366', 'ENSG0000012155

260      False
289      False
308      False
321      False
332      False
         ...  
67906    False
67923    False
67934    False
67946    False
67961    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6666, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884367022075387, 'recall': 1.0, 'f1-score': 0.9941847292500566, 'support': 6582}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884367022075387, 'macro avg': {'precision': 0.4942183511037693, 'recall': 0.5, 'f1-score': 0.4970923646250283, 'support': 6659}, 'weighted avg': {'precision': 0.9770071142709145, 'recall': 0.9884367022075387, 'f1-score': 0.9826886751650207, 'support': 6659}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000101871', 'ENSG00000010704', 'ENSG00000011201', 'ENSG00000163421', 'ENSG00000092621', 'ENSG00000157764', 'ENSG00000234602', 'ENSG00000112053', 'ENSG00000114251', 'ENSG00000213281', 'ENSG00000161594', 'ENSG00000167098', 'ENSG00000158815']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000179295', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000131808', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000256061', 'ENSG00000086288', 'ENSG00000104450', 'ENSG00000187726', 'ENSG00000163161', 'ENSG00000105479', 'ENSG00000132155', 'ENSG0000001350

18       False
27       False
35       False
43       False
45       False
         ...  
67912    False
67917    False
67925    False
67936    False
67943    False
Name: Testis_disease_causing, Length: 6666, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}
{'False': {'precision': 0.9765395894428153, 'recall': 1.0, 'f1-score': 0.9881305637982196, 'support': 6327}, 'True': {'precision': 1.0, 'recall': 0.21649484536082475, 'f1-score': 0.3559322033898305, 'support': 194}, 'accuracy': 0.976690691611716, 'macro avg': {'precision': 0.9882697947214076, 'recall': 0.6082474226804124, 'f1-score': 0.672031383594025, 'support': 6521}, 'weighted avg': {'precision': 0.9772375375563093, 'recall': 0.976690691611716, 'f1-score': 0.9693226383390526, 'support': 6521}}
@@@  precision: 1.0 recall:  0.21649484536082475 f1_score:  0.3559322033898305


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000104936', 'ENSG00000106327', 'ENSG00000160014', 'ENSG00000138347']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG00000198626', 'ENSG00000165280', 'ENSG00000123700', 'ENSG00000022267', 'ENSG00000118058', 'ENSG00000183873', 'ENSG00000101997', 'ENSG00000198947', 'ENSG00000175084', 'ENSG00000111245', 'ENSG00000179295', 'ENSG00000180340', 'ENSG00000165995', 'ENSG00000148400', 'ENSG00000103197', 'ENSG0000019785

260      False
289      False
308      False
321      False
335      False
         ...  
67934    False
67948    False
67963    False
67964    False
67965    False
Name: Heart - Left Ventricle_disease_causing, Length: 6455, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9916205297022296, 'recall': 1.0, 'f1-score': 0.9957926371149511, 'support': 6627}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9916205297022296, 'macro avg': {'precision': 0.4958102648511148, 'recall': 0.5, 'f1-score': 0.49789631855747557, 'support': 6683}, 'weighted avg': {'precision': 0.9833112749269303, 'recall': 0.9916205297022296, 'f1-score': 0.9874484222895079, 'support': 6683}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000198400', 'ENSG00000169432', 'ENSG00000133422']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000133812', 'ENSG00000135924', 'ENSG00000100596', 'ENSG00000169247', 'ENSG0000015888

2        False
31       False
50       False
63       False
74       False
         ...  
67878    False
67884    False
67904    False
67947    False
67955    False
Name: Nerve - Tibial_disease_causing, Length: 6634, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992822966507177, 'recall': 1.0, 'f1-score': 0.9963985594237695, 'support': 6640}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.992822966507177, 'macro avg': {'precision': 0.4964114832535885, 'recall': 0.5, 'f1-score': 0.49819927971188477, 'support': 6688}, 'weighted avg': {'precision': 0.9856974428241111, 'recall': 0.992822966507177, 'f1-score': 0.9892473735905847, 'support': 6688}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000025796', 'ENSG00000134538', 'ENSG00000123810']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000198650', 'ENSG00000122787', 'ENSG00000117594', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000140521', 'ENSG00000158104', 'ENSG00000008710', 'ENSG00000198677', 'ENSG00000099377', 'ENSG00000112234', 'ENSG00000130175', 'ENSG00000168509', 'ENSG00000168306', 'ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG0000016233

20       False
24       False
26       False
27       False
32       False
         ...  
67933    False
67949    False
67953    False
67956    False
67958    False
Name: Liver_disease_causing, Length: 6666, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
{'False': {'precision': 0.9443429141053331, 'recall': 0.9996482588814632, 'f1-score': 0.9712088850918411, 'support': 5686}, 'True': {'precision': 0.9921875, 'recall': 0.4312393887945671, 'f1-score': 0.6011834319526628, 'support': 589}, 'accuracy': 0.9462948207171314, 'macro avg': {'precision': 0.9682652070526665, 'recall': 0.7154438238380152, 'f1-score': 0.7861961585222519, 'support': 6275}, 'weighted avg': {'precision': 0.9488338242395098, 'recall': 0.9462948207171314, 'f1-score': 0.9364766154665063, 'support': 6275}}
@@@  precision: 0.9921875 recall:  0.4312393887945671 f1_score:  0.6011834319526628


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000134323', 'ENSG00000221838', 'ENSG00000102974', 'ENSG00000198513', 'ENSG00000087258', 'ENSG00000260230', 'ENSG00000164022', 'ENSG00000112357', 'ENSG00000162065', 'ENSG00000185345', 'ENSG00000164877', 'ENSG00000168958', 'ENSG00000198707', 'ENSG00000126583', 'ENSG00000129696', 'ENSG00000178568', 'ENSG00000148384', 'ENSG00000068615', 'ENSG00000203879', 'ENSG00000079432', 'ENSG00000197780', 'ENSG00000172534', 'ENSG00000030582', 'ENSG00000120948', 'ENSG00000164099', 'ENSG00000116198', 'ENSG00000115204', 'ENSG00000076685', 'ENSG00000177542', 'ENSG00000161011', 'ENSG00000108433', 'ENSG00000114956', 'ENSG00000116641', 'ENSG00000108641', 'ENSG00000127616', 'ENSG00000106477', 'ENSG00000134899', 'ENSG00000141485', 'ENSG00000106153', 'ENSG00000118971', 'ENSG00000125676', 'ENSG00000146701', 'ENSG00000007168', 'ENSG00000181830', 'ENSG00000129255', 'ENSG00000119977', 'ENSG00000141551', 'ENSG00000034693', 'ENSG00000180902', 'ENSG00000169379', 'ENSG00000129250', 'ENSG0000

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

258       True
259       True
260       True
261       True
262       True
         ...  
67901    False
67903    False
67936    False
67953    False
67955    False
Name: brain_disease_causing, Length: 6295, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9728360957642725, 'recall': 1.0, 'f1-score': 0.9862310385064177, 'support': 6339}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9728360957642725, 'macro avg': {'precision': 0.4864180478821363, 'recall': 0.5, 'f1-score': 0.49311551925320884, 'support': 6516}, 'weighted avg': {'precision': 0.9464100692218729, 'recall': 0.9728360957642725, 'f1-score': 0.9594411530221273, 'support': 6516}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000135069', 'ENSG00000175084', 'ENSG00000166548', 'ENSG00000092529', 'ENSG00000113721', 'ENSG00000145348', 'ENSG00000163359', 'ENSG00000092621', 'ENSG00000169032']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG00000163380', 'ENSG0000007980

18       False
27       False
29       False
40       False
43       False
         ...  
67922    False
67926    False
67943    False
67946    False
67964    False
Name: Muscle - Skeletal_disease_causing, Length: 6549, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9741090466037161, 'recall': 1.0, 'f1-score': 0.9868847400092579, 'support': 6396}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9741090466037161, 'macro avg': {'precision': 0.48705452330185806, 'recall': 0.5, 'f1-score': 0.4934423700046289, 'support': 6566}, 'weighted avg': {'precision': 0.9488884346752007, 'recall': 0.9741090466037161, 'f1-score': 0.9613333531981745, 'support': 6566}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000101997', 'ENSG00000164494', 'ENSG00000140650', 'ENSG00000113971', 'ENSG00000173085', 'ENSG00000046651', 'ENSG00000197694', 'ENSG00000185344', 'ENSG00000073584', 'ENSG00000178538', 'ENSG00000131398', 'ENSG00000162065', 'ENSG00000103089', 'ENSG00000086062', 'ENSG00000167632', 'ENSG00000142186', 'ENSG00000141385', 'ENSG00000106477', 'ENSG00000158290', 'ENSG00000166813', 'ENSG00000103494', 'ENSG00000141252', 'ENSG00000147852', 'ENSG00000175115', 'ENSG00000132773', 'ENSG00000126583', 'ENSG00000164877']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG0000001350

2        False
31       False
50       False
63       False
77       False
         ...  
67904    False
67928    False
67953    False
67960    False
67963    False
Name: brain-1_disease_causing, Length: 6607, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884488448844885, 'recall': 1.0, 'f1-score': 0.9941908713692945, 'support': 6589}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 77}, 'accuracy': 0.9884488448844885, 'macro avg': {'precision': 0.49422442244224424, 'recall': 0.5, 'f1-score': 0.49709543568464726, 'support': 6666}, 'weighted avg': {'precision': 0.9770311189534795, 'recall': 0.9884488448844885, 'f1-score': 0.9827068183996822, 'support': 6666}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000130363', 'ENSG00000171316', 'ENSG00000139537', 'ENSG00000187678', 'ENSG00000184895', 'ENSG00000119661', 'ENSG00000107831', 'ENSG00000105697', 'ENSG00000105146', 'ENSG00000129295', 'ENSG00000099949', 'ENSG00000105429', 'ENSG00000164818', 'ENSG00000133703', 'ENSG00000114841', 'ENSG00000167646', 'ENSG00000125848']
train_genes ['ENSG00000172426', 'ENSG00000161202', 'ENSG00000168303', 'ENSG00000152669', 'ENSG00000151632', 'ENSG00000138449', 'ENSG00000115839', 'ENSG00000109163', 'ENSG00000198003', 'ENSG00000157856', 'ENSG00000115904', 'ENSG00000118873', 'ENSG00000180340', 'ENSG00000105877', 'ENSG00000169297', 'ENSG00000169836', 'ENSG00000168509', 'ENSG00000179295', 'ENSG00000111834', 'ENSG00000175294', 'ENSG00000125875', 'ENSG00000135069', 'ENSG00000131808', 'ENSG00000100485', 'ENSG00000139351', 'ENSG00000004838', 'ENSG00000139549', 'ENSG00000107404', 'ENSG00000166863', 'ENSG00000112210', 'ENSG00000256061', 'ENSG00000086288', 'ENSG00000104450', 'ENSG0000018772

1        False
2        False
12       False
17       False
28       False
         ...  
67949    False
67960    False
67961    False
67964    False
67966    False
Name: Testis_disease_causing, Length: 10040, dtype: bool
{'random_state': 1234, 'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': 50, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899520095980804, 'recall': 1.0, 'f1-score': 0.9949506368226695, 'support': 6601}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 67}, 'accuracy': 0.9899520095980804, 'macro avg': {'precision': 0.4949760047990402, 'recall': 0.5, 'f1-score': 0.49747531841133474, 'support': 6668}, 'weighted avg': {'precision': 0.9800049813072778, 'recall': 0.9899520095980804, 'f1-score': 0.9849533823734915, 'support': 6668}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000142168', 'ENSG00000185803', 'ENSG00000183735', 'ENSG00000124164']
train_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486', 'ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000104133', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000136104', 'ENSG00000013503', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000015479', 'ENSG00000161011', 'ENSG00000147475', 'ENSG00000008086', 'ENSG00000092621', 'ENSG00000021574', 'ENSG00000141385', 'ENSG00000204843', 'ENSG00000083937', 'ENSG00000103671', 'ENSG00000059573', 'ENSG00000126012', 'ENSG00000172817', 'ENSG00000130294', 'ENSG00000169359', 'ENSG00000120948', 'ENSG00000188021', 'ENSG0000010308

20       False
24       False
26       False
27       False
32       False
         ...  
67929    False
67932    False
67939    False
67943    False
67945    False
Name: brain-3_disease_causing, Length: 6671, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958634953464323, 'recall': 1.0, 'f1-score': 0.9979274611398964, 'support': 6741}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958634953464323, 'macro avg': {'precision': 0.49793174767321613, 'recall': 0.5, 'f1-score': 0.4989637305699482, 'support': 6769}, 'weighted avg': {'precision': 0.9917441013636135, 'recall': 0.9958634953464323, 'f1-score': 0.9937995295529681, 'support': 6769}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000011201', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000169836', 'ENSG00000187678']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000214413', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

23       False
41       False
42       False
49       False
57       False
         ...  
67911    False
67921    False
67927    False
67928    False
67930    False
Name: Ovary_disease_causing, Length: 6766, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9884462151394422, 'recall': 1.0, 'f1-score': 0.9941895411741134, 'support': 9924}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 116}, 'accuracy': 0.9884462151394422, 'macro avg': {'precision': 0.4942231075697211, 'recall': 0.5, 'f1-score': 0.4970947705870567, 'support': 10040}, 'weighted avg': {'precision': 0.9770259202234884, 'recall': 0.9884462151394422, 'f1-score': 0.982702889104771, 'support': 10040}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 775
TN_t 0
FP_t 67193
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.011402424670433144
threshold:  1
TP_t 773
TN_t 10519
FP_t 56674
FN_t 2
FPR_t 0.8434509547125445
TPR_t 0.9974193548387097
precision 0.013455881072989015
threshold:  2
TP_t 772
TN_t 14464
FP_t 52729
FN_t 3
FPR_t 0.784739481791258
TPR_t 0.9961290322580645
pr

scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


threshold:  19
 Fold:  7
test_genes ['ENSG00000135636', 'ENSG00000155657', 'ENSG00000088682', 'ENSG00000105048', 'ENSG00000022267', 'ENSG00000174611', 'ENSG00000164494', 'ENSG00000007314', 'ENSG00000085998', 'ENSG00000157764', 'ENSG00000214960', 'ENSG00000119723', 'ENSG00000143858']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 

FP_t 10
FN_t 774
FPR_t 0.0001488250264164422
TPR_t 0.0012903225806451613
precision 0.09090909090909091
threshold:  64
TP_t 0
TN_t 67185
FP_t 8
FN_t 775
FPR_t 0.00011906002113315375
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67187
FP_t 6
FN_t 775
FPR_t 8.929501584986532e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67190
FP_t 3
FN_t 775
FPR_t 4.464750792493266e-05
TPR_t 0.0
precision 0.0
threshold:  67
TP_t 0
TN_t 67191
FP_t 2
FN_t 775
FPR_t 2.976500528328844e-05
TPR_t 0.0
precision 0.0
threshold:  68
TP_t 0
TN_t 67191
FP_t 2
FN_t 775
FPR_t 2.976500528328844e-05
TPR_t 0.0
precision 0.0
threshold:  69
TP_t 0
TN_t 67191
FP_t 2
FN_t 775
FPR_t 2.976500528328844e-05
TPR_t 0.0
precision 0.0
threshold:  70
TP_t 0
TN_t 67192
FP_t 1
FN_t 775
FPR_t 1.488250264164422e-05
TPR_t 0.0
precision 0.0
threshold:  71
TP_t 0
TN_t 67192
FP_t 1
FN_t 775
FPR_t 1.488250264164422e-05
TPR_t 0.0
precision 0.0
threshold:  72
TP_t 0
TN_t 67193
FP_t 0
FN_t 775
FPR_t 0.0
TPR_t 0.0
precision 0
thresho

TN_t 67193
FP_t 0
FN_t 775
FPR_t 0.0
TPR_t 0.0
precision 0
threshold:  101
TP_t 0
TN_t 67193
FP_t 0
FN_t 775
FPR_t 0.0
TPR_t 0.0
precision 0
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                99.649990                    False
1                99.915028                    False
2                24.470943                    False
3                94.967872                    False
4                99.703240                    False
...                    ...                      ...
67963             0.089181                    False
67964             0.001610                    False
67965             0.054819                    False
67966             0.013161                    False
67967             0.079646                    False

[66961 rows x 2 columns]
threshold:  0
TP_t 752
TN_t 0
FP_t 66209
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.011230417705828766
threshold:  1
TP_t 679
TN_t 49634
FP_t 16575
FN_t 73
FPR_t 0.25034360887492635
TPR_t 0.90292553

FP_t 8127
FN_t 311
FPR_t 0.12274766270446616
TPR_t 0.586436170212766
precision 0.051470588235294115
threshold:  56
TP_t 437
TN_t 58135
FP_t 8074
FN_t 315
FPR_t 0.1219471673035388
TPR_t 0.5811170212765957
precision 0.05134531782399248
threshold:  57
TP_t 434
TN_t 58203
FP_t 8006
FN_t 318
FPR_t 0.12092011660046217
TPR_t 0.5771276595744681
precision 0.051421800947867295
threshold:  58
TP_t 432
TN_t 58248
FP_t 7961
FN_t 320
FPR_t 0.12024045069401441
TPR_t 0.574468085106383
precision 0.051471464315501016
threshold:  59
TP_t 432
TN_t 58312
FP_t 7897
FN_t 320
FPR_t 0.1192738147381776
TPR_t 0.574468085106383
precision 0.05186697082482891
threshold:  60
TP_t 430
TN_t 58369
FP_t 7840
FN_t 322
FPR_t 0.11841290459001041
TPR_t 0.5718085106382979
precision 0.05199516324062878
threshold:  61
TP_t 428
TN_t 58429
FP_t 7780
FN_t 324
FPR_t 0.1175066833814134
TPR_t 0.5691489361702128
precision 0.05214424951267056
threshold:  62
TP_t 427
TN_t 58477
FP_t 7732
FN_t 325
FPR_t 0.1167817064145358
TPR_t 0.567819

FP_t 8251
FN_t 40
FPR_t 0.4869857758366287
TPR_t 0.8717948717948718
precision 0.031913645430012905
threshold:  7
TP_t 272
TN_t 8692
FP_t 8251
FN_t 40
FPR_t 0.4869857758366287
TPR_t 0.8717948717948718
precision 0.031913645430012905
threshold:  8
TP_t 283
TN_t 8082
FP_t 8861
FN_t 29
FPR_t 0.5229888449507171
TPR_t 0.907051282051282
precision 0.03094925634295713
threshold:  9
TP_t 285
TN_t 7808
FP_t 9135
FN_t 27
FPR_t 0.5391607153396683
TPR_t 0.9134615384615384
precision 0.030254777070063694
threshold:  10
TP_t 287
TN_t 7561
FP_t 9382
FN_t 25
FPR_t 0.5537390072596352
TPR_t 0.9198717948717948
precision 0.029682490433343676
threshold:  11
TP_t 289
TN_t 7324
FP_t 9619
FN_t 23
FPR_t 0.5677270849318302
TPR_t 0.9262820512820513
precision 0.029168348809043197
threshold:  12
TP_t 289
TN_t 7089
FP_t 9854
FN_t 23
FPR_t 0.5815971197544709
TPR_t 0.9262820512820513
precision 0.028492556442867002
threshold:  13
TP_t 291
TN_t 6886
FP_t 10057
FN_t 21
FPR_t 0.5935784689842413
TPR_t 0.9326923076923077
preci

threshold:  68
TP_t 305
TN_t 1779
FP_t 15164
FN_t 7
FPR_t 0.8950008853213717
TPR_t 0.9775641025641025
precision 0.019716853060960632
threshold:  69
TP_t 305
TN_t 1727
FP_t 15216
FN_t 7
FPR_t 0.8980699994097857
TPR_t 0.9775641025641025
precision 0.0196507956961536
threshold:  70
TP_t 305
TN_t 1694
FP_t 15249
FN_t 7
FPR_t 0.9000177064274332
TPR_t 0.9775641025641025
precision 0.01960910376751961
threshold:  71
TP_t 305
TN_t 1665
FP_t 15278
FN_t 7
FPR_t 0.9017293277459718
TPR_t 0.9775641025641025
precision 0.019572611178848745
threshold:  72
TP_t 305
TN_t 1628
FP_t 15315
FN_t 7
FPR_t 0.903913120462728
TPR_t 0.9775641025641025
precision 0.019526248399487835
threshold:  73
TP_t 305
TN_t 1595
FP_t 15348
FN_t 7
FPR_t 0.9058608274803753
TPR_t 0.9775641025641025
precision 0.019485082731744714
threshold:  74
TP_t 305
TN_t 1561
FP_t 15382
FN_t 7
FPR_t 0.9078675559228
TPR_t 0.9775641025641025
precision 0.019442850768151974
threshold:  75
TP_t 305
TN_t 1527
FP_t 15416
FN_t 7
FPR_t 0.9098742843652246

threshold:  5
TP_t 290
TN_t 6960
FP_t 10044
FN_t 23
FPR_t 0.5906845448129852
TPR_t 0.9265175718849841
precision 0.028062705631894717
threshold:  6
TP_t 290
TN_t 7224
FP_t 9780
FN_t 23
FPR_t 0.5751587861679605
TPR_t 0.9265175718849841
precision 0.028798411122144985
threshold:  7
TP_t 289
TN_t 7413
FP_t 9591
FN_t 24
FPR_t 0.5640437544107268
TPR_t 0.9233226837060703
precision 0.02925101214574899
threshold:  8
TP_t 288
TN_t 7570
FP_t 9434
FN_t 25
FPR_t 0.5548106327922842
TPR_t 0.9201277955271565
precision 0.02962353425221148
threshold:  9
TP_t 286
TN_t 7731
FP_t 9273
FN_t 27
FPR_t 0.5453422724064926
TPR_t 0.9137380191693291
precision 0.02991944764096663
threshold:  10
TP_t 285
TN_t 7865
FP_t 9139
FN_t 28
FPR_t 0.5374617737003058
TPR_t 0.9105431309904153
precision 0.03024193548387097
threshold:  11
TP_t 285
TN_t 7996
FP_t 9008
FN_t 28
FPR_t 0.5297577040696306
TPR_t 0.9105431309904153
precision 0.030668244915527818
threshold:  12
TP_t 283
TN_t 8112
FP_t 8892
FN_t 30
FPR_t 0.5229357798165137


threshold:  67
TP_t 233
TN_t 10822
FP_t 6182
FN_t 80
FPR_t 0.3635615149376617
TPR_t 0.744408945686901
precision 0.03632112236944661
threshold:  68
TP_t 233
TN_t 10875
FP_t 6129
FN_t 80
FPR_t 0.3604446012702893
TPR_t 0.744408945686901
precision 0.03662370323797548
threshold:  69
TP_t 233
TN_t 10910
FP_t 6094
FN_t 80
FPR_t 0.35838626205598684
TPR_t 0.744408945686901
precision 0.03682629998419472
threshold:  70
TP_t 232
TN_t 10944
FP_t 6060
FN_t 81
FPR_t 0.35638673253352154
TPR_t 0.7412140575079872
precision 0.036872218690400506
threshold:  71
TP_t 231
TN_t 10987
FP_t 6017
FN_t 82
FPR_t 0.3538579157845213
TPR_t 0.7380191693290735
precision 0.03697183098591549
threshold:  72
TP_t 231
TN_t 11021
FP_t 5983
FN_t 82
FPR_t 0.351858386262056
TPR_t 0.7380191693290735
precision 0.03717412294818152
threshold:  73
TP_t 231
TN_t 11070
FP_t 5934
FN_t 82
FPR_t 0.34897671136203245
TPR_t 0.7380191693290735
precision 0.037469586374695864
threshold:  74
TP_t 228
TN_t 11124
FP_t 5880
FN_t 85
FPR_t 0.3458009

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927992799279928, 'recall': 1.0, 'f1-score': 0.9963866305329719, 'support': 6618}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.9927992799279928, 'macro avg': {'precision': 0.4963996399639964, 'recall': 0.5, 'f1-score': 0.49819331526648597, 'support': 6666}, 'weighted avg': {'precision': 0.9856504102255409, 'recall': 0.9927992799279928, 'f1-score': 0.9892119293230135, 'support': 6666}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000198677', 'ENSG00000112234', 'ENSG00000130175', 'ENSG00000168509']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000198650', 'ENSG00000122787', 'ENSG00000117594', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000140521', 'ENSG00000158104', 'ENSG00000008710', 'ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000099377', 'ENSG00000025796', 'ENSG00000168306', 'ENSG00000134538', 'ENSG00000123810', 'ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG0000016233

1        False
12       False
13       False
24       False
49       False
         ...  
67919    False
67920    False
67924    False
67933    False
67955    False
Name: Liver_disease_causing, Length: 6686, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
{'False': {'precision': 0.9369043706868222, 'recall': 0.9992989835261129, 'f1-score': 0.9670963364993216, 'support': 5706}, 'True': {'precision': 0.9808612440191388, 'recall': 0.34804753820033957, 'f1-score': 0.5137844611528822, 'support': 589}, 'accuracy': 0.9383637807783956, 'macro avg': {'precision': 0.9588828073529805, 'recall': 0.6736732608632262, 'f1-score': 0.7404403988261019, 'support': 6295}, 'weighted avg': {'precision': 0.9410172536721654, 'recall': 0.9383637807783956, 'f1-score': 0.9246816113874785, 'support': 6295}}
@@@  precision: 0.9808612440191388 recall:  0.34804753820033957 f1_score:  0.5137844611528822


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000021574', 'ENSG00000112640', 'ENSG00000123240', 'ENSG00000125351', 'ENSG00000155961', 'ENSG00000153187', 'ENSG00000167371', 'ENSG00000167632', 'ENSG00000117984', 'ENSG00000117713', 'ENSG00000147133', 'ENSG00000136854', 'ENSG00000176887', 'ENSG00000185129', 'ENSG00000161395', 'ENSG00000127824', 'ENSG00000164414', 'ENSG00000147852', 'ENSG00000137267', 'ENSG00000130226', 'ENSG00000187323', 'ENSG00000118873', 'ENSG00000089280', 'ENSG00000158828', 'ENSG00000006283', 'ENSG00000158290', 'ENSG00000083937', 'ENSG00000101204', 'ENSG00000164690', 'ENSG00000164418', 'ENSG00000105464', 'ENSG00000196511', 'ENSG00000136425', 'ENSG00000186153', 'ENSG00000124313', 'ENSG00000102172', 'ENSG00000117020', 'ENSG00000081307', 'ENSG00000107290', 'ENSG00000133731', 'ENSG00000100150', 'ENSG00000169359', 'ENSG00000073464', 'ENSG00000050030', 'ENSG00000174080', 'ENSG00000125875', 'ENSG00000167113', 'ENSG00000166206', 'ENSG00000142168', 'ENSG00000130921', 'ENSG00000168434', 'ENSG0000

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

485      False
492      False
493      False
498      False
629      False
         ...  
67942    False
67943    False
67944    False
67958    False
67962    False
Name: brain_disease_causing, Length: 6312, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.98004800480048, 'recall': 1.0, 'f1-score': 0.9899234790514433, 'support': 6533}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.98004800480048, 'macro avg': {'precision': 0.49002400240024, 'recall': 0.5, 'f1-score': 0.49496173952572164, 'support': 6666}, 'weighted avg': {'precision': 0.9604940917134017, 'recall': 0.98004800480048, 'f1-score': 0.9701725305495168, 'support': 6666}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000138449', 'ENSG00000205155', 'ENSG00000140694', 'ENSG00000258366', 'ENSG00000121552', 'ENSG00000080815', 'ENSG00000169032', 'ENSG00000160710', 'ENSG00000171954', 'ENSG00000170426', 'ENSG00000163378', 'ENSG00000104055', 'ENSG00000164509']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG00000104044', 'ENSG00000197859', 'ENSG0000016391

299      False
300      False
307      False
315      False
320      False
         ...  
67912    False
67926    False
67929    False
67930    False
67962    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6646, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899565282566332, 'recall': 1.0, 'f1-score': 0.9949529190207156, 'support': 6604}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 67}, 'accuracy': 0.9899565282566332, 'macro avg': {'precision': 0.4949782641283166, 'recall': 0.5, 'f1-score': 0.4974764595103578, 'support': 6671}, 'weighted avg': {'precision': 0.9800139278379262, 'recall': 0.9899565282566332, 'f1-score': 0.9849601374925507, 'support': 6671}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000172817', 'ENSG00000130294', 'ENSG00000169359', 'ENSG00000120948', 'ENSG00000188021', 'ENSG00000103089', 'ENSG00000121680', 'ENSG00000198707', 'ENSG00000099940', 'ENSG00000198835', 'ENSG00000159363', 'ENSG00000107371', 'ENSG00000185963']
train_genes ['ENSG00000127824', 'ENSG00000178568', 'ENSG00000072121', 'ENSG00000113448', 'ENSG00000066427', 'ENSG00000196998', 'ENSG00000213380', 'ENSG00000100749', 'ENSG00000107290', 'ENSG00000135486', 'ENSG00000165280', 'ENSG00000136854', 'ENSG00000032444', 'ENSG00000108518', 'ENSG00000170113', 'ENSG00000125744', 'ENSG00000112367', 'ENSG00000187049', 'ENSG00000089280', 'ENSG00000068615', 'ENSG00000197102', 'ENSG00000101276', 'ENSG00000104133', 'ENSG00000123240', 'ENSG00000261609', 'ENSG00000136104', 'ENSG00000013503', 'ENSG00000148606', 'ENSG00000214274', 'ENSG00000015479', 'ENSG00000142168', 'ENSG00000161011', 'ENSG00000147475', 'ENSG00000008086', 'ENSG00000092621', 'ENSG00000021574', 'ENSG00000141385', 'ENSG0000020484

1        False
12       False
13       False
24       False
31       False
         ...  
67921    False
67924    False
67943    False
67956    False
67960    False
Name: brain-3_disease_causing, Length: 10358, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9730144127568231, 'recall': 1.0, 'f1-score': 0.9863226608641592, 'support': 6346}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 176}, 'accuracy': 0.9730144127568231, 'macro avg': {'precision': 0.48650720637841155, 'recall': 0.5, 'f1-score': 0.4931613304320796, 'support': 6522}, 'weighted avg': {'precision': 0.9467570474325053, 'recall': 0.9730144127568231, 'f1-score': 0.959706164649487, 'support': 6522}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000025708', 'ENSG00000125826', 'ENSG00000198467', 'ENSG00000138347', 'ENSG00000239474', 'ENSG00000163541', 'ENSG00000167113', 'ENSG00000163050', 'ENSG00000173402', 'ENSG00000188157', 'ENSG00000187714', 'ENSG00000131018', 'ENSG00000111799']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG0000010984

7        False
23       False
45       False
54       False
56       False
         ...  
67943    False
67946    False
67958    False
67959    False
67962    False
Name: Muscle - Skeletal_disease_causing, Length: 6506, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915586373228821, 'recall': 1.0, 'f1-score': 0.9957614290039358, 'support': 6578}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9915586373228821, 'macro avg': {'precision': 0.49577931866144104, 'recall': 0.5, 'f1-score': 0.4978807145019679, 'support': 6634}, 'weighted avg': {'precision': 0.9831885312496109, 'recall': 0.9915586373228821, 'f1-score': 0.9873558456418284, 'support': 6634}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000144381', 'ENSG00000198513', 'ENSG00000100285']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000133812', 'ENSG00000135924', 'ENSG00000100596', 'ENSG00000169247', 'ENSG0000015888

41       False
42       False
46       False
49       False
57       False
         ...  
67884    False
67890    False
67900    False
67927    False
67928    False
Name: Nerve - Tibial_disease_causing, Length: 6677, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9699457784663051, 'recall': 1.0, 'f1-score': 0.9847436300723497, 'support': 6261}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 194}, 'accuracy': 0.9699457784663051, 'macro avg': {'precision': 0.48497288923315257, 'recall': 0.5, 'f1-score': 0.49237181503617483, 'support': 6455}, 'weighted avg': {'precision': 0.9407948131646068, 'recall': 0.9699457784663051, 'f1-score': 0.9551479268602604, 'support': 6455}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000115904', 'ENSG00000159251', 'ENSG00000055118', 'ENSG00000078814', 'ENSG00000169032', 'ENSG00000197616', 'ENSG00000134571', 'ENSG00000134250', 'ENSG00000128591', 'ENSG00000129991', 'ENSG00000172399', 'ENSG00000120457', 'ENSG00000164107']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG00000198626', 'ENSG00000165280', 'ENSG00000123700', 'ENSG00000022267', 'ENSG00000118058', 'ENSG00000183873', 'ENSG0000010199

281      False
299      False
300      False
307      False
315      False
         ...  
67888    False
67894    False
67901    False
67931    False
67963    False
Name: Heart - Left Ventricle_disease_causing, Length: 6476, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920765435790103, 'recall': 1.0, 'f1-score': 0.9960225140712946, 'support': 6636}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920765435790103, 'macro avg': {'precision': 0.49603827178950516, 'recall': 0.5, 'f1-score': 0.4980112570356473, 'support': 6689}, 'weighted avg': {'precision': 0.984215868319676, 'recall': 0.9920765435790103, 'f1-score': 0.988130573086726, 'support': 6689}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  1
test_genes ['ENSG00000100345', 'ENSG00000117601', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000174227', 'ENSG00000255072']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000144659', 'ENSG00000151702', 'ENSG00000105610', 'ENSG00000179218', 'ENSG0000016739

10       False
12       False
28       False
30       False
54       False
         ...  
67891    False
67911    False
67916    False
67942    False
67959    False
Name: Whole Blood_disease_causing, Length: 6680, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992820819623093, 'recall': 1.0, 'f1-score': 0.9963974782347643, 'support': 6638}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 48}, 'accuracy': 0.992820819623093, 'macro avg': {'precision': 0.4964104098115465, 'recall': 0.5, 'f1-score': 0.49819873911738216, 'support': 6686}, 'weighted avg': {'precision': 0.9856931798770703, 'recall': 0.992820819623093, 'f1-score': 0.9892441610114218, 'support': 6686}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000166548', 'ENSG00000118762', 'ENSG00000107815', 'ENSG00000118971', 'ENSG00000105647', 'ENSG00000162337', 'ENSG00000115204', 'ENSG00000187049']
train_genes ['ENSG00000125871', 'ENSG00000164953', 'ENSG00000115486', 'ENSG00000048342', 'ENSG00000134250', 'ENSG00000049239', 'ENSG00000136143', 'ENSG00000242110', 'ENSG00000105697', 'ENSG00000163541', 'ENSG00000138449', 'ENSG00000025708', 'ENSG00000114956', 'ENSG00000198650', 'ENSG00000122787', 'ENSG00000117594', 'ENSG00000117020', 'ENSG00000103494', 'ENSG00000048392', 'ENSG00000106327', 'ENSG00000113971', 'ENSG00000184056', 'ENSG00000172817', 'ENSG00000167397', 'ENSG00000010704', 'ENSG00000151779', 'ENSG00000205084', 'ENSG00000011143', 'ENSG00000151729', 'ENSG00000198707', 'ENSG00000140521', 'ENSG00000158104', 'ENSG00000008710', 'ENSG00000103876', 'ENSG00000023839', 'ENSG00000151445', 'ENSG00000198677', 'ENSG00000099377', 'ENSG00000112234', 'ENSG00000025796', 'ENSG00000130175', 'ENSG00000168509', 'ENSG0000016830

18       False
24       False
27       False
29       False
33       False
         ...  
67931    False
67936    False
67941    False
67943    False
67961    False
Name: Liver_disease_causing, Length: 8886, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.973101752228712, 'recall': 1.0, 'f1-score': 0.9863675313546779, 'support': 6331}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 175}, 'accuracy': 0.973101752228712, 'macro avg': {'precision': 0.486550876114356, 'recall': 0.5, 'f1-score': 0.49318376567733896, 'support': 6506}, 'weighted avg': {'precision': 0.9469270201905895, 'recall': 0.973101752228712, 'f1-score': 0.959835973102746, 'support': 6506}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000115204', 'ENSG00000108556', 'ENSG00000136717', 'ENSG00000102119', 'ENSG00000108823', 'ENSG00000165280', 'ENSG00000140521', 'ENSG00000125871', 'ENSG00000168538', 'ENSG00000135902']
train_genes ['ENSG00000157119', 'ENSG00000167323', 'ENSG00000198380', 'ENSG00000151729', 'ENSG00000165410', 'ENSG00000170624', 'ENSG00000072195', 'ENSG00000165917', 'ENSG00000114956', 'ENSG00000112234', 'ENSG00000183091', 'ENSG00000163754', 'ENSG00000144406', 'ENSG00000163069', 'ENSG00000142173', 'ENSG00000139131', 'ENSG00000160789', 'ENSG00000143632', 'ENSG00000181027', 'ENSG00000074181', 'ENSG00000119523', 'ENSG00000130714', 'ENSG00000128591', 'ENSG00000119401', 'ENSG00000120729', 'ENSG00000126934', 'ENSG00000148459', 'ENSG00000206561', 'ENSG00000102683', 'ENSG00000175920', 'ENSG00000136143', 'ENSG00000009830', 'ENSG00000177192', 'ENSG00000170175', 'ENSG00000105993', 'ENSG00000196218', 'ENSG00000173085', 'ENSG00000109846', 'ENSG00000142156', 'ENSG00000030304', 'ENSG0000016338

4        False
18       False
24       False
28       False
52       False
         ...  
67931    False
67936    False
67937    False
67951    False
67958    False
Name: Muscle - Skeletal_disease_causing, Length: 6687, dtype: bool
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9742697139397608, 'recall': 1.0, 'f1-score': 0.9869671879791475, 'support': 6437}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9742697139397608, 'macro avg': {'precision': 0.4871348569698804, 'recall': 0.5, 'f1-score': 0.49348359398957375, 'support': 6607}, 'weighted avg': {'precision': 0.9492014755002635, 'recall': 0.9742697139397608, 'f1-score': 0.9615722399003742, 'support': 6607}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000141837', 'ENSG00000125877', 'ENSG00000107371', 'ENSG00000167552', 'ENSG00000182173', 'ENSG00000148384', 'ENSG00000091136', 'ENSG00000164751', 'ENSG00000030582', 'ENSG00000198860', 'ENSG00000088682', 'ENSG00000198689', 'ENSG00000198835', 'ENSG00000179029', 'ENSG00000130005', 'ENSG00000048342', 'ENSG00000006283']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000124788', 'ENSG00000100014', 'ENSG0000012259

23       False
41       False
42       False
49       False
57       False
         ...  
67910    False
67920    False
67921    False
67931    False
67951    False
Name: brain-1_disease_causing, Length: 6601, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9899594516315892, 'recall': 1.0, 'f1-score': 0.9949543954977682, 'support': 10254}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 104}, 'accuracy': 0.9899594516315892, 'macro avg': {'precision': 0.4949797258157946, 'recall': 0.5, 'f1-score': 0.4974771977488841, 'support': 10358}, 'weighted avg': {'precision': 0.9800197158747167, 'recall': 0.9899594516315892, 'f1-score': 0.9849645077654099, 'support': 10358}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 676
TN_t 0
FP_t 67292
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.009945856873822975
threshold:  1
TP_t 672
TN_t 10517
FP_t 56775
FN_t 4
FPR_t 0.8437109909053082
TPR_t 0.9940828402366864
precision 0.01169773878531516
threshold:  2
TP_t 672
TN_t 14463
FP_t 52829
FN_t 4
FPR_t 0.7850710337038578
TPR_t 0.9940828402366864
pr

TPR_t 0.0
precision 0.0
threshold:  57
TP_t 0
TN_t 67267
FP_t 25
FN_t 676
FPR_t 0.0003715151875408667
TPR_t 0.0
precision 0.0
threshold:  58
TP_t 0
TN_t 67270
FP_t 22
FN_t 676
FPR_t 0.0003269333650359627
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 67275
FP_t 17
FN_t 676
FPR_t 0.00025263032752778934
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67276
FP_t 16
FN_t 676
FPR_t 0.00023776972002615467
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67280
FP_t 12
FN_t 676
FPR_t 0.000178327290019616
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67280
FP_t 12
FN_t 676
FPR_t 0.000178327290019616
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67281
FP_t 11
FN_t 676
FPR_t 0.00016346668251798134
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67284
FP_t 8
FN_t 676
FPR_t 0.00011888486001307734
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67286
FP_t 6
FN_t 676
FPR_t 8.9163645009808e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67289
FP_t 3
FN_t 676
FPR_t 4.458

TPR_t 0.7249240121580547
precision 0.04590511019151189
threshold:  27
TP_t 476
TN_t 56452
FP_t 9851
FN_t 182
FPR_t 0.1485754792392501
TPR_t 0.723404255319149
precision 0.04609276653432749
threshold:  28
TP_t 471
TN_t 56519
FP_t 9784
FN_t 187
FPR_t 0.147564966894409
TPR_t 0.71580547112462
precision 0.04592881521209166
threshold:  29
TP_t 471
TN_t 56604
FP_t 9699
FN_t 187
FPR_t 0.14628297362110312
TPR_t 0.71580547112462
precision 0.04631268436578171
threshold:  30
TP_t 468
TN_t 56670
FP_t 9633
FN_t 190
FPR_t 0.1452875435500656
TPR_t 0.7112462006079028
precision 0.04633204633204633
threshold:  31
TP_t 468
TN_t 56734
FP_t 9569
FN_t 190
FPR_t 0.1443222780266353
TPR_t 0.7112462006079028
precision 0.04662747833017834
threshold:  32
TP_t 465
TN_t 56804
FP_t 9499
FN_t 193
FPR_t 0.14326651886038339
TPR_t 0.7066869300911854
precision 0.046668004817342436
threshold:  33
TP_t 463
TN_t 56872
FP_t 9431
FN_t 195
FPR_t 0.14224092424173868
TPR_t 0.7036474164133738
precision 0.04679603800283
threshold:  

TPR_t 0.4848024316109423
precision 0.05209013716525147
threshold:  88
TP_t 312
TN_t 60636
FP_t 5667
FN_t 346
FPR_t 0.08547124564499343
TPR_t 0.47416413373860183
precision 0.05218263923733066
threshold:  89
TP_t 305
TN_t 60776
FP_t 5527
FN_t 353
FPR_t 0.08335972731248963
TPR_t 0.4635258358662614
precision 0.05229766803840878
threshold:  90
TP_t 297
TN_t 60893
FP_t 5410
FN_t 361
FPR_t 0.08159510127746859
TPR_t 0.4513677811550152
precision 0.05204135272472402
threshold:  91
TP_t 292
TN_t 61050
FP_t 5253
FN_t 366
FPR_t 0.0792271842903036
TPR_t 0.44376899696048633
precision 0.05266005410279531
threshold:  92
TP_t 280
TN_t 61230
FP_t 5073
FN_t 378
FPR_t 0.07651237500565586
TPR_t 0.425531914893617
precision 0.05230711750420325
threshold:  93
TP_t 272
TN_t 61428
FP_t 4875
FN_t 386
FPR_t 0.07352608479254333
TPR_t 0.4133738601823708
precision 0.05284631824363707
threshold:  94
TP_t 249
TN_t 61694
FP_t 4609
FN_t 409
FPR_t 0.06951419996078609
TPR_t 0.378419452887538
precision 0.051255660765747224


threshold:  42
TP_t 247
TN_t 3257
FP_t 13745
FN_t 6
FPR_t 0.8084343018468415
TPR_t 0.9762845849802372
precision 0.01765294453973699
threshold:  43
TP_t 247
TN_t 3182
FP_t 13820
FN_t 6
FPR_t 0.8128455475826374
TPR_t 0.9762845849802372
precision 0.017558825620245966
threshold:  44
TP_t 247
TN_t 3109
FP_t 13893
FN_t 6
FPR_t 0.8171391600988119
TPR_t 0.9762845849802372
precision 0.01746817538896747
threshold:  45
TP_t 247
TN_t 3043
FP_t 13959
FN_t 6
FPR_t 0.8210210563463122
TPR_t 0.9762845849802372
precision 0.017387019569196114
threshold:  46
TP_t 247
TN_t 2964
FP_t 14038
FN_t 6
FPR_t 0.8256675685213505
TPR_t 0.9762845849802372
precision 0.01729086454322716
threshold:  47
TP_t 248
TN_t 2901
FP_t 14101
FN_t 5
FPR_t 0.8293730149394188
TPR_t 0.9802371541501976
precision 0.017283434385671474
threshold:  48
TP_t 248
TN_t 2848
FP_t 14154
FN_t 5
FPR_t 0.8324902952593812
TPR_t 0.9802371541501976
precision 0.01721983057908624
threshold:  49
TP_t 248
TN_t 2783
FP_t 14219
FN_t 5
FPR_t 0.8363133748970

 [1.         1.        ]]
AUC
0.7235189256971859
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 253
TN_t 0
FP_t 17064
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.014609920886989662
threshold:  1
TP_t 239
TN_t 4864
FP_t 12200
FN_t 14
FPR_t 0.7149554617909049
TPR_t 0.9446640316205533
precision 0.019213763164241497
threshold:  2
TP_t 235
TN_t 5784
FP_t 11280
FN_t 18
FP

precision 0.029110590988882388
threshold:  57
TP_t 198
TN_t 10457
FP_t 6607
FN_t 55
FPR_t 0.3871894045944679
TPR_t 0.782608695652174
precision 0.029096252755326967
threshold:  58
TP_t 197
TN_t 10499
FP_t 6565
FN_t 56
FPR_t 0.38472808251289264
TPR_t 0.7786561264822134
precision 0.029133392487429754
threshold:  59
TP_t 197
TN_t 10522
FP_t 6542
FN_t 56
FPR_t 0.3833802156586967
TPR_t 0.7786561264822134
precision 0.02923282386110699
threshold:  60
TP_t 196
TN_t 10556
FP_t 6508
FN_t 57
FPR_t 0.3813877168307548
TPR_t 0.7747035573122529
precision 0.029236276849642005
threshold:  61
TP_t 196
TN_t 10589
FP_t 6475
FN_t 57
FPR_t 0.3794538209095171
TPR_t 0.7747035573122529
precision 0.02938090241343127
threshold:  62
TP_t 196
TN_t 10632
FP_t 6432
FN_t 57
FPR_t 0.3769338959212377
TPR_t 0.7747035573122529
precision 0.029571514785757393
threshold:  63
TP_t 194
TN_t 10680
FP_t 6384
FN_t 59
FPR_t 0.37412095639943743
TPR_t 0.766798418972332
precision 0.029492246883551232
threshold:  64
TP_t 193
TN_t 1072

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958616612474135, 'recall': 1.0, 'f1-score': 0.9979265402843602, 'support': 6738}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958616612474135, 'macro avg': {'precision': 0.49793083062370674, 'recall': 0.5, 'f1-score': 0.4989632701421801, 'support': 6766}, 'weighted avg': {'precision': 0.9917404483424582, 'recall': 0.9958616612474135, 'f1-score': 0.9937967822104669, 'support': 6766}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000115839', 'ENSG00000107831', 'ENSG00000120008']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

19       False
20       False
24       False
26       False
27       False
         ...  
67933    False
67943    False
67945    False
67958    False
67966    False
Name: Ovary_disease_causing, Length: 6759, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.9464019851116625, 'recall': 0.9996505329372707, 'f1-score': 0.9722977566281441, 'support': 5723}, 'True': {'precision': 0.9925093632958801, 'recall': 0.44991511035653653, 'f1-score': 0.6191588785046729, 'support': 589}, 'accuracy': 0.9483523447401775, 'macro avg': {'precision': 0.9694556742037713, 'recall': 0.7247828216469037, 'f1-score': 0.7957283175664085, 'support': 6312}, 'weighted avg': {'precision': 0.950704463842731, 'recall': 0.9483523447401775, 'f1-score': 0.9393448416701713, 'support': 6312}}
@@@  precision: 0.9925093632958801 recall:  0.44991511035653653 f1_score:  0.6191588785046729


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000127980', 'ENSG00000085788', 'ENSG00000187566', 'ENSG00000205090', 'ENSG00000060642', 'ENSG00000108733', 'ENSG00000159082', 'ENSG00000196876', 'ENSG00000118162', 'ENSG00000100427', 'ENSG00000168036', 'ENSG00000114279', 'ENSG00000125877', 'ENSG00000145335', 'ENSG00000068120', 'ENSG00000181090', 'ENSG00000025708', 'ENSG00000165478', 'ENSG00000184156', 'ENSG00000131398', 'ENSG00000141736', 'ENSG00000178971', 'ENSG00000162735', 'ENSG00000013503', 'ENSG00000078369', 'ENSG00000101347', 'ENSG00000177239', 'ENSG00000166974', 'ENSG00000108518', 'ENSG00000122133', 'ENSG00000138185', 'ENSG00000135917', 'ENSG00000046651', 'ENSG00000150540', 'ENSG00000182220', 'ENSG00000113971', 'ENSG00000197563', 'ENSG00000112425', 'ENSG00000172269', 'ENSG00000150995', 'ENSG00000073584', 'ENSG00000166813', 'ENSG00000126012', 'ENSG00000105409', 'ENSG00000082458', 'ENSG00000140471', 'ENSG00000141252', 'ENSG00000165194', 'ENSG00000091136', 'ENSG00000010818', 'ENSG00000125779', 'ENSG0000

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

469      False
490      False
515      False
516      False
517      False
         ...  
67919    False
67928    False
67937    False
67946    False
67954    False
Name: brain_disease_causing, Length: 6301, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9906194163192377, 'recall': 1.0, 'f1-score': 0.9952876056548732, 'support': 6653}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 63}, 'accuracy': 0.9906194163192377, 'macro avg': {'precision': 0.49530970815961883, 'recall': 0.5, 'f1-score': 0.4976438028274366, 'support': 6716}, 'weighted avg': {'precision': 0.9813268279886671, 'recall': 0.9906194163192377, 'f1-score': 0.9859512269836019, 'support': 6716}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000001626', 'ENSG00000080572', 'ENSG00000168878', 'ENSG00000039139', 'ENSG00000168447', 'ENSG00000234602', 'ENSG00000105479', 'ENSG00000092621']
train_genes ['ENSG00000104450', 'ENSG00000258366', 'ENSG00000119661', 'ENSG00000187726', 'ENSG00000164818', 'ENSG00000198223', 'ENSG00000086288', 'ENSG00000165699', 'ENSG00000114841', 'ENSG00000135069', 'ENSG00000168484', 'ENSG00000198003', 'ENSG00000130363', 'ENSG00000105877', 'ENSG00000141013', 'ENSG00000140694', 'ENSG00000141519', 'ENSG00000004838', 'ENSG00000167131', 'ENSG00000256061', 'ENSG00000159079', 'ENSG00000167972', 'ENSG00000157423', 'ENSG00000167646', 'ENSG00000111319', 'ENSG00000103197', 'ENSG00000100368', 'ENSG00000129295', 'ENSG00000160188', 'ENSG00000172426', 'ENSG00000171595', 'ENSG00000165506', 'ENSG00000157856', 'ENSG00000152669', 'ENSG00000122735', 'ENSG00000154099', 'ENSG00000111834', 'ENSG00000169126', 'ENSG00000139537']
benign_genes 67335
test_fraction:  0.0995260663507109
train_fraction:  0

260      False
289      False
308      False
321      False
332      False
         ...  
67898    False
67904    False
67919    False
67964    False
67965    False
Name: Lung_disease_causing, Length: 6720, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9729325557050994, 'recall': 1.0, 'f1-score': 0.9862806033502615, 'support': 6506}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 181}, 'accuracy': 0.9729325557050994, 'macro avg': {'precision': 0.4864662778525497, 'recall': 0.5, 'f1-score': 0.49314030167513073, 'support': 6687}, 'weighted avg': {'precision': 0.9465977579508564, 'recall': 0.9729325557050994, 'f1-score': 0.9595845080599373, 'support': 6687}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 1771
TN_t 0
FP_t 66197
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.02605637947269303
threshold:  1
TP_t 1771
TN_t 10521
FP_t 55676
FN_t 0
FPR_t 0.841065305074248
TPR_t 1.0
precision 0.03082841575713266
threshold:  2
TP_t 1771
TN_t 14467
FP_t 51730
FN_t 0
FPR_t 0.7814553529616146
TPR_t 1.0
precision 0.03310218500588774
t

threshold:  56
TP_t 14
TN_t 66183
FP_t 14
FN_t 1757
FPR_t 0.0002114899466743206
TPR_t 0.007905138339920948
precision 0.5
threshold:  57
TP_t 14
TN_t 66186
FP_t 11
FN_t 1757
FPR_t 0.0001661706723869662
TPR_t 0.007905138339920948
precision 0.56
threshold:  58
TP_t 12
TN_t 66187
FP_t 10
FN_t 1759
FPR_t 0.0001510642476245147
TPR_t 0.006775832862789385
precision 0.5454545454545454
threshold:  59
TP_t 10
TN_t 66190
FP_t 7
FN_t 1761
FPR_t 0.0001057449733371603
TPR_t 0.00564652738565782
precision 0.5882352941176471
threshold:  60
TP_t 10
TN_t 66191
FP_t 6
FN_t 1761
FPR_t 9.063854857470882e-05
TPR_t 0.00564652738565782
precision 0.625
threshold:  61
TP_t 9
TN_t 66194
FP_t 3
FN_t 1762
FPR_t 4.531927428735441e-05
TPR_t 0.005081874647092038
precision 0.75
threshold:  62
TP_t 9
TN_t 66194
FP_t 3
FN_t 1762
FPR_t 4.531927428735441e-05
TPR_t 0.005081874647092038
precision 0.75
threshold:  63
TP_t 8
TN_t 66194
FP_t 3
FN_t 1763
FPR_t 4.531927428735441e-05
TPR_t 0.004517221908526256
precision 0.727272727

FP_t 9440
FN_t 341
FPR_t 0.14432485322896282
TPR_t 0.7804249839021249
precision 0.11378144949305295
threshold:  24
TP_t 1204
TN_t 56044
FP_t 9364
FN_t 349
FPR_t 0.14316291585127203
TPR_t 0.7752736638763683
precision 0.11392884178652536
threshold:  25
TP_t 1195
TN_t 56135
FP_t 9273
FN_t 358
FPR_t 0.14177164872798434
TPR_t 0.7694784288473921
precision 0.11415743217424532
threshold:  26
TP_t 1191
TN_t 56208
FP_t 9200
FN_t 362
FPR_t 0.14065557729941291
TPR_t 0.7669027688345138
precision 0.11461841978635358
threshold:  27
TP_t 1180
TN_t 56261
FP_t 9147
FN_t 373
FPR_t 0.13984527886497064
TPR_t 0.7598197037990985
precision 0.11426358090442529
threshold:  28
TP_t 1175
TN_t 56328
FP_t 9080
FN_t 378
FPR_t 0.13882093933463796
TPR_t 0.7566001287830006
precision 0.11457825450999512
threshold:  29
TP_t 1170
TN_t 56408
FP_t 9000
FN_t 383
FPR_t 0.13759784735812133
TPR_t 0.7533805537669027
precision 0.11504424778761062
threshold:  30
TP_t 1161
TN_t 56468
FP_t 8940
FN_t 392
FPR_t 0.13668052837573386
TPR

threshold:  84
TP_t 760
TN_t 59729
FP_t 5679
FN_t 793
FPR_t 0.08682424168297456
TPR_t 0.489375402446877
precision 0.11803075011647772
threshold:  85
TP_t 754
TN_t 59822
FP_t 5586
FN_t 799
FPR_t 0.08540239726027397
TPR_t 0.4855119124275596
precision 0.11892744479495268
threshold:  86
TP_t 741
TN_t 59913
FP_t 5495
FN_t 812
FPR_t 0.0840111301369863
TPR_t 0.4771410173857051
precision 0.11882617062219371
threshold:  87
TP_t 730
TN_t 60014
FP_t 5394
FN_t 823
FPR_t 0.08246697651663405
TPR_t 0.47005795235028974
precision 0.11920313520574788
threshold:  88
TP_t 717
TN_t 60146
FP_t 5262
FN_t 836
FPR_t 0.08044887475538161
TPR_t 0.4616870573084353
precision 0.11991971901655796
threshold:  89
TP_t 701
TN_t 60277
FP_t 5131
FN_t 852
FPR_t 0.07844606164383562
TPR_t 0.4513844172569221
precision 0.12019890260631001
threshold:  90
TP_t 690
TN_t 60391
FP_t 5017
FN_t 863
FPR_t 0.07670315557729941
TPR_t 0.44430135222150674
precision 0.12090415279481338
threshold:  91
TP_t 673
TN_t 60536
FP_t 4872
FN_t 880
F

FN_t 3
FPR_t 0.7852369017782551
TPR_t 0.993963782696177
precision 0.036182523987402036
threshold:  39
TP_t 494
TN_t 3520
FP_t 13238
FN_t 3
FPR_t 0.7899510681465569
TPR_t 0.993963782696177
precision 0.035974366443344015
threshold:  40
TP_t 494
TN_t 3421
FP_t 13337
FN_t 3
FPR_t 0.795858694354935
TPR_t 0.993963782696177
precision 0.035716867905429835
threshold:  41
TP_t 494
TN_t 3334
FP_t 13424
FN_t 3
FPR_t 0.8010502446592672
TPR_t 0.993963782696177
precision 0.035493605403075154
threshold:  42
TP_t 494
TN_t 3260
FP_t 13498
FN_t 3
FPR_t 0.8054660460675498
TPR_t 0.993963782696177
precision 0.03530588907947398
threshold:  43
TP_t 494
TN_t 3185
FP_t 13573
FN_t 3
FPR_t 0.8099415204678363
TPR_t 0.993963782696177
precision 0.03511765124049193
threshold:  44
TP_t 494
TN_t 3112
FP_t 13646
FN_t 3
FPR_t 0.814297648884115
TPR_t 0.993963782696177
precision 0.03493635077793494
threshold:  45
TP_t 494
TN_t 3046
FP_t 13712
FN_t 3
FPR_t 0.8182360663563671
TPR_t 0.993963782696177
precision 0.0347740391383

 [1.         1.        ]]
AUC
0.7652658401777173
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 498
TN_t 0
FP_t 16819
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.02875786799099151
threshold:  1
TP_t 493
TN_t 4873
FP_t 11946
FN_t 5
FPR_t 0.71026814911707
TPR_t 0.9899598393574297
precision 0.03963341104590401
threshold:  2
TP_t 489
TN_t 5793
FP_t 11026
FN_t 9
FPR_t 0.

threshold:  57
TP_t 418
TN_t 10432
FP_t 6387
FN_t 80
FPR_t 0.37974909328735357
TPR_t 0.8393574297188755
precision 0.06142542248346804
threshold:  58
TP_t 416
TN_t 10473
FP_t 6346
FN_t 82
FPR_t 0.37731137404126286
TPR_t 0.8353413654618473
precision 0.06152026027802426
threshold:  59
TP_t 416
TN_t 10496
FP_t 6323
FN_t 82
FPR_t 0.3759438730007729
TPR_t 0.8353413654618473
precision 0.061730227036652326
threshold:  60
TP_t 416
TN_t 10531
FP_t 6288
FN_t 82
FPR_t 0.37386289315654914
TPR_t 0.8353413654618473
precision 0.06205250596658711
threshold:  61
TP_t 414
TN_t 10562
FP_t 6257
FN_t 84
FPR_t 0.37201973958023665
TPR_t 0.8313253012048193
precision 0.06205966122020687
threshold:  62
TP_t 412
TN_t 10603
FP_t 6216
FN_t 86
FPR_t 0.3695820203341459
TPR_t 0.8273092369477911
precision 0.06216053108026554
threshold:  63
TP_t 411
TN_t 10652
FP_t 6167
FN_t 87
FPR_t 0.3666686485522326
TPR_t 0.8253012048192772
precision 0.06248099726360596
threshold:  64
TP_t 408
TN_t 10695
FP_t 6124
FN_t 90
FPR_t 0.364

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9927976592392528, 'recall': 1.0, 'f1-score': 0.9963858143212108, 'support': 8822}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 64}, 'accuracy': 0.9927976592392528, 'macro avg': {'precision': 0.4963988296196264, 'recall': 0.5, 'f1-score': 0.4981929071606054, 'support': 8886}, 'weighted avg': {'precision': 0.9856471921909395, 'recall': 0.9927976592392528, 'f1-score': 0.9892095041572947, 'support': 8886}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 484
TN_t 0
FP_t 67484
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.007120998116760828
threshold:  1
TP_t 484
TN_t 10521
FP_t 56963
FN_t 0
FPR_t 0.8440963784008061
TPR_t 1.0
precision 0.008425157101328181
threshold:  2
TP_t 484
TN_t 14467
FP_t 53017
FN_t 0
FPR_t 0.7856232588465414
TPR_t 1.0
precision 0.009046559877385469


TPR_t 0.002066115702479339
precision 0.03571428571428571
threshold:  57
TP_t 1
TN_t 67460
FP_t 24
FN_t 483
FPR_t 0.0003556398553731255
TPR_t 0.002066115702479339
precision 0.04
threshold:  58
TP_t 1
TN_t 67463
FP_t 21
FN_t 483
FPR_t 0.0003111848734514848
TPR_t 0.002066115702479339
precision 0.045454545454545456
threshold:  59
TP_t 1
TN_t 67468
FP_t 16
FN_t 483
FPR_t 0.000237093236915417
TPR_t 0.002066115702479339
precision 0.058823529411764705
threshold:  60
TP_t 1
TN_t 67469
FP_t 15
FN_t 483
FPR_t 0.00022227490960820344
TPR_t 0.002066115702479339
precision 0.0625
threshold:  61
TP_t 1
TN_t 67473
FP_t 11
FN_t 483
FPR_t 0.00016300160037934918
TPR_t 0.002066115702479339
precision 0.08333333333333333
threshold:  62
TP_t 1
TN_t 67473
FP_t 11
FN_t 483
FPR_t 0.00016300160037934918
TPR_t 0.002066115702479339
precision 0.08333333333333333
threshold:  63
TP_t 1
TN_t 67474
FP_t 10
FN_t 483
FPR_t 0.00014818327307213562
TPR_t 0.002066115702479339
precision 0.09090909090909091
threshold:  64
TP_t 0

precision 0.033402725208175624
threshold:  25
TP_t 348
TN_t 56370
FP_t 10120
FN_t 123
FPR_t 0.15220333884794707
TPR_t 0.7388535031847133
precision 0.03324417271685136
threshold:  26
TP_t 347
TN_t 56446
FP_t 10044
FN_t 124
FPR_t 0.15106030982102572
TPR_t 0.7367303609341825
precision 0.033394283514579924
threshold:  27
TP_t 347
TN_t 56510
FP_t 9980
FN_t 124
FPR_t 0.15009775906151301
TPR_t 0.7367303609341825
precision 0.03360123946935218
threshold:  28
TP_t 346
TN_t 56581
FP_t 9909
FN_t 125
FPR_t 0.1490299293126786
TPR_t 0.7346072186836518
precision 0.033739639200390054
threshold:  29
TP_t 342
TN_t 56662
FP_t 9828
FN_t 129
FPR_t 0.14781170100767033
TPR_t 0.7261146496815286
precision 0.033628318584070796
threshold:  30
TP_t 342
TN_t 56731
FP_t 9759
FN_t 129
FPR_t 0.14677395097007068
TPR_t 0.7261146496815286
precision 0.033858033858033855
threshold:  31
TP_t 337
TN_t 56790
FP_t 9700
FN_t 134
FPR_t 0.1458865994886449
TPR_t 0.7154989384288747
precision 0.03357576965228654
threshold:  32
TP_t 

FPR_t 0.09244999248007219
TPR_t 0.40976645435244163
precision 0.030441640378548896
threshold:  86
TP_t 191
TN_t 60445
FP_t 6045
FN_t 280
FPR_t 0.09091592720709882
TPR_t 0.40552016985138006
precision 0.030628608082103912
threshold:  87
TP_t 190
TN_t 60556
FP_t 5934
FN_t 281
FPR_t 0.08924650323356896
TPR_t 0.4033970276008493
precision 0.0310254735467015
threshold:  88
TP_t 185
TN_t 60696
FP_t 5794
FN_t 286
FPR_t 0.08714092344713491
TPR_t 0.39278131634819535
precision 0.03094162903495568
threshold:  89
TP_t 179
TN_t 60837
FP_t 5653
FN_t 292
FPR_t 0.08502030380508348
TPR_t 0.38004246284501064
precision 0.03069272976680384
threshold:  90
TP_t 176
TN_t 60959
FP_t 5531
FN_t 295
FPR_t 0.08318544141976238
TPR_t 0.37367303609341823
precision 0.03083932013316979
threshold:  91
TP_t 171
TN_t 61116
FP_t 5374
FN_t 300
FPR_t 0.08082418408783276
TPR_t 0.3630573248407643
precision 0.03083859332732191
threshold:  92
TP_t 165
TN_t 61302
FP_t 5188
FN_t 306
FPR_t 0.07802677094299895
TPR_t 0.350318471337579

TPR_t 0.9893617021276596
precision 0.013545004369356248
threshold:  40
TP_t 186
TN_t 3422
FP_t 13645
FN_t 2
FPR_t 0.7994961035917267
TPR_t 0.9893617021276596
precision 0.013448051478562649
threshold:  41
TP_t 186
TN_t 3335
FP_t 13732
FN_t 2
FPR_t 0.8045936602800726
TPR_t 0.9893617021276596
precision 0.013363989078890645
threshold:  42
TP_t 186
TN_t 3261
FP_t 13806
FN_t 2
FPR_t 0.8089295130954474
TPR_t 0.9893617021276596
precision 0.013293310463121783
threshold:  43
TP_t 186
TN_t 3186
FP_t 13881
FN_t 2
FPR_t 0.8133239585164352
TPR_t 0.9893617021276596
precision 0.013222435487310728
threshold:  44
TP_t 186
TN_t 3113
FP_t 13954
FN_t 2
FPR_t 0.8176012187261967
TPR_t 0.9893617021276596
precision 0.013154172560113154
threshold:  45
TP_t 186
TN_t 3047
FP_t 14020
FN_t 2
FPR_t 0.821468330696666
TPR_t 0.9893617021276596
precision 0.013093059270730676
threshold:  46
TP_t 186
TN_t 2968
FP_t 14099
FN_t 2
FPR_t 0.8260971465401067
TPR_t 0.9893617021276596
precision 0.013020651032551627
threshold:  47

 [1.         1.        ]]
AUC
0.7771357004745998
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 188
TN_t 0
FP_t 17129
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.010856383900213663
threshold:  1
TP_t 186
TN_t 4876
FP_t 12253
FN_t 2
FPR_t 0.7153365637223422
TPR_t 0.9893617021276596
precision 0.01495297049602058
threshold:  2
TP_t 184
TN_t 5798
FP_t 11331
FN_t 4
FPR_t

TPR_t 0.898936170212766
precision 0.024722059684025747
threshold:  57
TP_t 169
TN_t 10493
FP_t 6636
FN_t 19
FPR_t 0.38741315897016754
TPR_t 0.898936170212766
precision 0.024834680382072007
threshold:  58
TP_t 167
TN_t 10534
FP_t 6595
FN_t 21
FPR_t 0.3850195574756261
TPR_t 0.8882978723404256
precision 0.024696835255841466
threshold:  59
TP_t 167
TN_t 10557
FP_t 6572
FN_t 21
FPR_t 0.38367680541771265
TPR_t 0.8882978723404256
precision 0.024781124795963793
threshold:  60
TP_t 166
TN_t 10591
FP_t 6538
FN_t 22
FPR_t 0.38169186759297097
TPR_t 0.8829787234042553
precision 0.024761336515513127
threshold:  61
TP_t 165
TN_t 10623
FP_t 6506
FN_t 23
FPR_t 0.37982369081674355
TPR_t 0.8776595744680851
precision 0.024733922950082445
threshold:  62
TP_t 165
TN_t 10666
FP_t 6463
FN_t 23
FPR_t 0.3773133282736879
TPR_t 0.8776595744680851
precision 0.024894387447193723
threshold:  63
TP_t 165
TN_t 10716
FP_t 6413
FN_t 23
FPR_t 0.3743943020608325
TPR_t 0.8776595744680851
precision 0.02508361204013378
thres

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9799879626843214, 'recall': 1.0, 'f1-score': 0.9898928490006839, 'support': 6513}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.9799879626843214, 'macro avg': {'precision': 0.4899939813421607, 'recall': 0.5, 'f1-score': 0.49494642450034193, 'support': 6646}, 'weighted avg': {'precision': 0.9603764070061669, 'recall': 0.9799879626843214, 'f1-score': 0.9700830763679588, 'support': 6646}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000101347', 'ENSG00000179148', 'ENSG00000166147', 'ENSG00000186081', 'ENSG00000131174', 'ENSG00000104884', 'ENSG00000141499', 'ENSG00000128917', 'ENSG00000101310', 'ENSG00000132155', 'ENSG00000113721', 'ENSG00000180316', 'ENSG00000166974', 'ENSG00000100099', 'ENSG00000107521', 'ENSG00000145623', 'ENSG00000167508', 'ENSG00000188467']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG0000014865

277      False
278      False
282      False
284      False
285      False
         ...  
67933    False
67942    False
67958    False
67959    False
67967    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6661, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9916129998502321, 'recall': 1.0, 'f1-score': 0.9957888404271319, 'support': 6621}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9916129998502321, 'macro avg': {'precision': 0.49580649992511605, 'recall': 0.5, 'f1-score': 0.49789442021356595, 'support': 6677}, 'weighted avg': {'precision': 0.9832963414719764, 'recall': 0.9916129998502321, 'f1-score': 0.9874371592733324, 'support': 6677}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000104381', 'ENSG00000105227', 'ENSG00000060237', 'ENSG00000196549']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000144381', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000133812', 'ENSG00000135924', 'ENSG00000100596', 'ENSG00000169247', 'ENSG0000015888

20       False
24       False
26       False
27       False
32       False
         ...  
67932    False
67941    False
67949    False
67956    False
67958    False
Name: Nerve - Tibial_disease_causing, Length: 6655, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.976679104477612, 'recall': 1.0, 'f1-score': 0.9882019820670127, 'support': 6282}, 'True': {'precision': 1.0, 'recall': 0.2268041237113402, 'f1-score': 0.36974789915966383, 'support': 194}, 'accuracy': 0.9768375540457073, 'macro avg': {'precision': 0.988339552238806, 'recall': 0.6134020618556701, 'f1-score': 0.6789749406133383, 'support': 6476}, 'weighted avg': {'precision': 0.9773777230278502, 'recall': 0.9768375540457073, 'f1-score': 0.9696750994104306, 'support': 6476}}
@@@  precision: 1.0 recall:  0.2268041237113402 f1_score:  0.36974789915966383


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000161202', 'ENSG00000057294', 'ENSG00000072501', 'ENSG00000168509', 'ENSG00000169071', 'ENSG00000160789', 'ENSG00000102119', 'ENSG00000197563', 'ENSG00000138622', 'ENSG00000130283', 'ENSG00000141448', 'ENSG00000100485', 'ENSG00000120729', 'ENSG00000108061', 'ENSG00000174611', 'ENSG00000107201']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG00000198626', 'ENSG00000165280', 'ENSG00000123700', 'ENSG0000002226

277      False
278      False
282      False
290      False
314      False
         ...  
67922    False
67930    False
67933    False
67941    False
67956    False
Name: Heart - Left Ventricle_disease_causing, Length: 6444, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.932843137254902, 'recall': 0.9994747899159664, 'f1-score': 0.9650101419878296, 'support': 5712}, 'True': {'precision': 0.9834254143646409, 'recall': 0.30220713073005095, 'f1-score': 0.4623376623376623, 'support': 589}, 'accuracy': 0.9342961434692906, 'macro avg': {'precision': 0.9581342758097715, 'recall': 0.6508409603230086, 'f1-score': 0.713673902162746, 'support': 6301}, 'weighted avg': {'precision': 0.9375714281956473, 'recall': 0.9342961434692906, 'f1-score': 0.9180217130854413, 'support': 6301}}
@@@  precision: 0.9834254143646409 recall:  0.30220713073005095 f1_score:  0.4623376623376623


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000198835', 'ENSG00000156531', 'ENSG00000140650', 'ENSG00000143811', 'ENSG00000126091', 'ENSG00000004848', 'ENSG00000115275', 'ENSG00000113643', 'ENSG00000100478', 'ENSG00000144285', 'ENSG00000148826', 'ENSG00000072121', 'ENSG00000177030', 'ENSG00000099797', 'ENSG00000132773', 'ENSG00000105695', 'ENSG00000185963', 'ENSG00000204852', 'ENSG00000015479', 'ENSG00000196998', 'ENSG00000132326', 'ENSG00000115840', 'ENSG00000107147', 'ENSG00000112210', 'ENSG00000151835', 'ENSG00000087086', 'ENSG00000039650', 'ENSG00000189056', 'ENSG00000188021', 'ENSG00000124587', 'ENSG00000142655', 'ENSG00000136143', 'ENSG00000108055', 'ENSG00000003393', 'ENSG00000247626', 'ENSG00000118402', 'ENSG00000182621']
train_genes ['ENSG00000186487', 'ENSG00000143442', 'ENSG00000162928', 'ENSG00000198689', 'ENSG00000086062', 'ENSG00000086848', 'ENSG00000204843', 'ENSG00000104884', 'ENSG00000184009', 'ENSG00000179029', 'ENSG00000164751', 'ENSG00000163541', 'ENSG00000142186', 'ENSG0000016937

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

484      False
490      False
493      False
501      False
506      False
         ...  
67918    False
67926    False
67934    False
67939    False
67952    False
Name: brain_disease_causing, Length: 6287, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958573753513833, 'recall': 1.0, 'f1-score': 0.9979243884358784, 'support': 6731}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958573753513833, 'macro avg': {'precision': 0.49792868767569165, 'recall': 0.5, 'f1-score': 0.4989621942179392, 'support': 6759}, 'weighted avg': {'precision': 0.9917319120417459, 'recall': 0.9958573753513833, 'f1-score': 0.9937903622668882, 'support': 6759}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000010704', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000107831', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

1        False
9        False
12       False
13       False
24       False
         ...  
67920    False
67921    False
67938    False
67943    False
67967    False
Name: Ovary_disease_causing, Length: 6769, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9742463263141948, 'recall': 1.0, 'f1-score': 0.9869551872314304, 'support': 6431}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9742463263141948, 'macro avg': {'precision': 0.4871231631570974, 'recall': 0.5, 'f1-score': 0.4934775936157152, 'support': 6601}, 'weighted avg': {'precision': 0.9491559043367046, 'recall': 0.9742463263141948, 'f1-score': 0.9615374653969594, 'support': 6601}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000205084', 'ENSG00000182220', 'ENSG00000173898', 'ENSG00000007168', 'ENSG00000167113', 'ENSG00000125875', 'ENSG00000116337', 'ENSG00000225830', 'ENSG00000128881', 'ENSG00000129250', 'ENSG00000086848', 'ENSG00000102100', 'ENSG00000167716', 'ENSG00000146282', 'ENSG00000125454', 'ENSG00000197121', 'ENSG00000119977', 'ENSG00000168280']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000124788', 'ENSG0000010001

19       False
20       False
24       False
26       False
27       False
         ...  
67921    False
67933    False
67946    False
67958    False
67967    False
Name: brain-1_disease_causing, Length: 6566, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800330280738628, 'recall': 1.0, 'f1-score': 0.9899158389567063, 'support': 6528}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.9800330280738628, 'macro avg': {'precision': 0.4900165140369314, 'recall': 0.5, 'f1-score': 0.49495791947835316, 'support': 6661}, 'weighted avg': {'precision': 0.9604647361156248, 'recall': 0.9800330280738628, 'f1-score': 0.9701502171910191, 'support': 6661}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000132842', 'ENSG00000162736', 'ENSG00000100146', 'ENSG00000149418', 'ENSG00000092621', 'ENSG00000031081', 'ENSG00000047579', 'ENSG00000179477', 'ENSG00000163389', 'ENSG00000004961', 'ENSG00000092330', 'ENSG00000157764', 'ENSG00000233608', 'ENSG00000118965', 'ENSG00000121879', 'ENSG00000213689', 'ENSG00000272047']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG00000148655', 'ENSG0000014591

259      False
270      False
271      False
282      False
307      False
         ...  
67920    False
67936    False
67942    False
67955    False
67960    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6630, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.9505843071786311, 'recall': 0.9992979992979993, 'f1-score': 0.9743326488706366, 'support': 5698}, 'True': {'precision': 0.9865319865319865, 'recall': 0.49745331069609505, 'f1-score': 0.6613995485327313, 'support': 589}, 'accuracy': 0.952282487672976, 'macro avg': {'precision': 0.9685581468553088, 'recall': 0.7483756549970472, 'f1-score': 0.8178660987016839, 'support': 6287}, 'weighted avg': {'precision': 0.9539520792701097, 'recall': 0.952282487672976, 'f1-score': 0.9450153916574942, 'support': 6287}}
@@@  precision: 0.9865319865319865 recall:  0.49745331069609505 f1_score:  0.6613995485327313


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000165699', 'ENSG00000103148', 'ENSG00000164190', 'ENSG00000134851', 'ENSG00000104133', 'ENSG00000124140', 'ENSG00000141385', 'ENSG00000165288', 'ENSG00000164953', 'ENSG00000160710', 'ENSG00000169933', 'ENSG00000104450', 'ENSG00000102103', 'ENSG00000160695']
train_genes ['ENSG00000186487', 'ENSG00000143442', 'ENSG00000162928', 'ENSG00000198689', 'ENSG00000086062', 'ENSG00000086848', 'ENSG00000204843', 'ENSG00000104884', 'ENSG00000184009', 'ENSG00000179029', 'ENSG00000164751', 'ENSG00000163541', 'ENSG00000142186', 'ENSG00000169372', 'ENSG00000134262', 'ENSG00000146282', 'ENSG00000081189', 'ENSG00000163288', 'ENSG00000136908', 'ENSG00000102466', 'ENSG00000079215', 'ENSG00000146938', 'ENSG00000113448', 'ENSG00000168778', 'ENSG00000008086', 'ENSG00000187049', 'ENSG00000169432', 'ENSG00000088682', 'ENSG00000149196', 'ENSG00000106290', 'ENSG00000168280', 'ENSG00000012660', 'ENSG00000043355', 'ENSG00000169057', 'ENSG00000135454', 'ENSG00000157764', 'ENSG0000017292

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

438       True
439       True
440       True
441       True
442       True
         ...  
67922    False
67955    False
67962    False
67964    False
67967    False
Name: brain_disease_causing, Length: 6266, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915852742299023, 'recall': 1.0, 'f1-score': 0.9957748604194959, 'support': 6599}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9915852742299023, 'macro avg': {'precision': 0.49579263711495114, 'recall': 0.5, 'f1-score': 0.49788743020974796, 'support': 6655}, 'weighted avg': {'precision': 0.9832413560695906, 'recall': 0.9915852742299023, 'f1-score': 0.9873956880403085, 'support': 6655}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000143811', 'ENSG00000133812', 'ENSG00000135924']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000198513', 'ENSG00000100285', 'ENSG00000075785', 'ENSG00000198835', 'ENSG00000100596', 'ENSG00000169247', 'ENSG0000015888

1        False
12       False
13       False
24       False
49       False
         ...  
67920    False
67921    False
67933    False
67943    False
67963    False
Name: Nerve - Tibial_disease_causing, Length: 6647, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9698944754810677, 'recall': 1.0, 'f1-score': 0.9847171892232551, 'support': 6250}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 194}, 'accuracy': 0.9698944754810677, 'macro avg': {'precision': 0.48494723774053383, 'recall': 0.5, 'f1-score': 0.49235859461162756, 'support': 6444}, 'weighted avg': {'precision': 0.9406952935686953, 'recall': 0.9698944754810677, 'f1-score': 0.9550717617388803, 'support': 6444}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000118194', 'ENSG00000164532', 'ENSG00000092621', 'ENSG00000163754', 'ENSG00000122367', 'ENSG00000133703', 'ENSG00000140416', 'ENSG00000096696', 'ENSG00000151929', 'ENSG00000135069', 'ENSG00000092054', 'ENSG00000102554', 'ENSG00000038295', 'ENSG00000046604', 'ENSG00000173991', 'ENSG00000143933', 'ENSG00000010704', 'ENSG00000112319', 'ENSG00000114854']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG00000134755', 'ENSG00000170876', 'ENSG00000198523', 'ENSG00000136574', 'ENSG00000114251', 'ENSG00000164754', 'ENSG00000157764', 'ENSG00000077522', 'ENSG00000112769', 'ENSG00000155657', 'ENSG0000019862

259      False
270      False
271      False
282      False
307      False
         ...  
67927    False
67933    False
67936    False
67960    False
67967    False
Name: Heart - Left Ventricle_disease_causing, Length: 6469, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958634953464323, 'recall': 1.0, 'f1-score': 0.9979274611398964, 'support': 6741}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958634953464323, 'macro avg': {'precision': 0.49793174767321613, 'recall': 0.5, 'f1-score': 0.4989637305699482, 'support': 6769}, 'weighted avg': {'precision': 0.9917441013636135, 'recall': 0.9958634953464323, 'f1-score': 0.9937995295529681, 'support': 6769}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000163421', 'ENSG00000125875', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000179295', 'ENSG00000120008', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.09642857142857143
train_fraction:  0.9035714285714286
relevant_cols ['Whole_Brain_diff_net_m

18       False
27       False
35       False
45       False
49       False
         ...  
67925    False
67926    False
67928    False
67936    False
67955    False
Name: Ovary_disease_causing, Length: 6524, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.990625, 'recall': 1.0, 'f1-score': 0.9952904238618524, 'support': 6657}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 63}, 'accuracy': 0.990625, 'macro avg': {'precision': 0.4953125, 'recall': 0.5, 'f1-score': 0.4976452119309262, 'support': 6720}, 'weighted avg': {'precision': 0.981337890625, 'recall': 0.990625, 'f1-score': 0.9859595761381476, 'support': 6720}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000129295', 'ENSG00000160188', 'ENSG00000172426', 'ENSG00000171595', 'ENSG00000165506', 'ENSG00000157856', 'ENSG00000152669', 'ENSG00000122735', 'ENSG00000154099', 'ENSG00000111834', 'ENSG00000169126', 'ENSG00000139537']
train_genes ['ENSG00000104450', 'ENSG00000258366', 'ENSG00000119661', 'ENSG00000187726', 'ENSG00000164818', 'ENSG00000198223', 'ENSG00000086288', 'ENSG00000165699', 'ENSG00000114841', 'ENSG00000135069', 'ENSG00000168484', 'ENSG00000198003', 'ENSG00000130363', 'ENSG00000105877', 'ENSG00000141013', 'ENSG00000140694', 'ENSG00000141519', 'ENSG00000004838', 'ENSG00000167131', 'ENSG00000256061', 'ENSG00000159079', 'ENSG00000167972', 'ENSG00000157423', 'ENSG00000167646', 'ENSG00000001626', 'ENSG00000111319', 'ENSG00000103197', 'ENSG00000080572', 'ENSG00000168878', 'ENSG00000100368', 'ENSG00000039139', 'ENSG00000168447', 'ENSG00000234602', 'ENSG00000105479', 'ENSG00000092621']
benign_genes 67335
test_fraction:  0.10742496050552923
train_fraction:  

299      False
300      False
304      False
307      False
315      False
         ...  
67928    False
67941    False
67949    False
67959    False
67960    False
Name: Lung_disease_causing, Length: 7230, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9741090466037161, 'recall': 1.0, 'f1-score': 0.9868847400092579, 'support': 6396}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9741090466037161, 'macro avg': {'precision': 0.48705452330185806, 'recall': 0.5, 'f1-score': 0.4934423700046289, 'support': 6566}, 'weighted avg': {'precision': 0.9488884346752007, 'recall': 0.9741090466037161, 'f1-score': 0.9613333531981745, 'support': 6566}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000214160', 'ENSG00000131943', 'ENSG00000134899', 'ENSG00000012061', 'ENSG00000100578', 'ENSG00000039650', 'ENSG00000188603', 'ENSG00000070610', 'ENSG00000148826', 'ENSG00000163050', 'ENSG00000012660', 'ENSG00000128039', 'ENSG00000147133', 'ENSG00000020922', 'ENSG00000260230', 'ENSG00000151835', 'ENSG00000205090', 'ENSG00000146701', 'ENSG00000109618', 'ENSG00000137285', 'ENSG00000079215', 'ENSG00000114279', 'ENSG00000135486', 'ENSG00000107815']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG0000016937

1        False
12       False
13       False
24       False
49       False
         ...  
67938    False
67943    False
67955    False
67963    False
67967    False
Name: brain-1_disease_causing, Length: 6574, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800904977375565, 'recall': 1.0, 'f1-score': 0.989945155393053, 'support': 6498}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 132}, 'accuracy': 0.9800904977375565, 'macro avg': {'precision': 0.49004524886877826, 'recall': 0.5, 'f1-score': 0.4949725776965265, 'support': 6630}, 'weighted avg': {'precision': 0.9605773837554513, 'recall': 0.9800904977375565, 'f1-score': 0.9702358400820601, 'support': 6630}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000136104', 'ENSG00000163344', 'ENSG00000165699', 'ENSG00000160752', 'ENSG00000142208']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG00000104044', 'ENSG00000197859', 'ENSG00000163913', 'ENSG00000106327', 'ENSG00000126934', 'ENSG00000141527', 'ENSG00000049167', 'ENSG00000179295', 'ENSG00000092295', 'ENSG00000138449', 'ENSG0000020515

276      False
285      False
287      False
301      False
303      False
         ...  
67943    False
67946    False
67950    False
67956    False
67961    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 6664, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.9599120433017592, 'recall': 0.9996477012506606, 'f1-score': 0.9793769954266978, 'support': 5677}, 'True': {'precision': 0.9943502824858758, 'recall': 0.597623089983022, 'f1-score': 0.7465535524920467, 'support': 589}, 'accuracy': 0.9618576444302586, 'macro avg': {'precision': 0.9771311628938175, 'recall': 0.7986353956168413, 'f1-score': 0.8629652739593723, 'support': 6266}, 'weighted avg': {'precision': 0.9631492158008725, 'recall': 0.9618576444302586, 'f1-score': 0.9574917404173602, 'support': 6266}}
@@@  precision: 0.9943502824858758 recall:  0.597623089983022 f1_score:  0.7465535524920467


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000163050', 'ENSG00000135541', 'ENSG00000158445', 'ENSG00000083168', 'ENSG00000112234', 'ENSG00000157540', 'ENSG00000131238', 'ENSG00000148606', 'ENSG00000102003', 'ENSG00000100014', 'ENSG00000006016', 'ENSG00000142178', 'ENSG00000012061', 'ENSG00000059573', 'ENSG00000168575', 'ENSG00000116337', 'ENSG00000205084', 'ENSG00000133104', 'ENSG00000072501', 'ENSG00000131462', 'ENSG00000167552', 'ENSG00000197265', 'ENSG00000106976', 'ENSG00000273079', 'ENSG00000164588', 'ENSG00000197694', 'ENSG00000116288', 'ENSG00000100311', 'ENSG00000022355', 'ENSG00000215301', 'ENSG00000147140', 'ENSG00000177189', 'ENSG00000100393', 'ENSG00000155016', 'ENSG00000213380', 'ENSG00000180879', 'ENSG00000155816', 'ENSG00000068438', 'ENSG00000020922', 'ENSG00000070614', 'ENSG00000100225', 'ENSG00000100578', 'ENSG00000134759', 'ENSG00000113231', 'ENSG00000103089', 'ENSG00000124155', 'ENSG00000188706', 'ENSG00000101276', 'ENSG00000103494', 'ENSG00000138821', 'ENSG00000159363', 'ENSG0000

benign_genes 62076
test_fraction:  0.09996605566870333
train_fraction:  0.9000339443312967
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

459      False
464      False
489      False
495      False
501      False
         ...  
67913    False
67929    False
67934    False
67949    False
67952    False
Name: brain_disease_causing, Length: 6306, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.991575146682714, 'recall': 1.0, 'f1-score': 0.9957697537392355, 'support': 6591}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.991575146682714, 'macro avg': {'precision': 0.495787573341357, 'recall': 0.5, 'f1-score': 0.49788487686961774, 'support': 6647}, 'weighted avg': {'precision': 0.9832212715188458, 'recall': 0.991575146682714, 'f1-score': 0.9873805396261925, 'support': 6647}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000075785', 'ENSG00000198835', 'ENSG00000100596', 'ENSG00000169247', 'ENSG00000261609', 'ENSG00000099956', 'ENSG00000087053', 'ENSG00000165280']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG0000010028

18       False
27       False
35       False
43       False
45       False
         ...  
67939    False
67942    False
67943    False
67959    False
67962    False
Name: Nerve - Tibial_disease_causing, Length: 6641, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958614347026364, 'recall': 1.0, 'f1-score': 0.9979264265417402, 'support': 6497}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.9958614347026364, 'macro avg': {'precision': 0.4979307173513182, 'recall': 0.5, 'f1-score': 0.4989632132708701, 'support': 6524}, 'weighted avg': {'precision': 0.9917399971279934, 'recall': 0.9958614347026364, 'f1-score': 0.9937964428635325, 'support': 6524}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000179295', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000130385']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000138449', 'ENSG00000109163', 'ENSG00000171316']
benign_genes 67688
test_fraction:  0.09642857142857143
train_fraction:  0.9035714285714286
relevant_cols ['Whole_Brain_diff_net_m

2        False
17       False
36       False
46       False
57       False
         ...  
67949    False
67951    False
67961    False
67964    False
67966    False
Name: Ovary_disease_causing, Length: 6533, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920658682634731, 'recall': 1.0, 'f1-score': 0.9960171338393327, 'support': 6627}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920658682634731, 'macro avg': {'precision': 0.49603293413173655, 'recall': 0.5, 'f1-score': 0.49800856691966633, 'support': 6680}, 'weighted avg': {'precision': 0.9841946869733587, 'recall': 0.9920658682634731, 'f1-score': 0.9881146026876133, 'support': 6680}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  2
test_genes ['ENSG00000184500', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000197728', 'ENSG00000113013', 'ENSG00000179218']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000107521', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000255072', 'ENSG00000144659', 'ENSG00000151702', 'ENSG00000105610', 'ENSG0000016739

2        False
31       False
50       False
63       False
74       False
         ...  
67899    False
67904    False
67954    False
67960    False
67965    False
Name: Whole Blood_disease_causing, Length: 6681, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}
{'False': {'precision': 0.9765017117958295, 'recall': 1.0, 'f1-score': 0.988111172348634, 'support': 6275}, 'True': {'precision': 1.0, 'recall': 0.22164948453608246, 'f1-score': 0.36286919831223624, 'support': 194}, 'accuracy': 0.9766579069407946, 'macro avg': {'precision': 0.9882508558979147, 'recall': 0.6108247422680413, 'f1-score': 0.6754901853304351, 'support': 6469}, 'weighted avg': {'precision': 0.9772064061707884, 'recall': 0.9766579069407946, 'f1-score': 0.969360678769555, 'support': 6469}}
@@@  precision: 1.0 recall:  0.22164948453608246 f1_score:  0.36286919831223624


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000165699', 'ENSG00000089225', 'ENSG00000137834', 'ENSG00000203867', 'ENSG00000198668', 'ENSG00000125826', 'ENSG00000213281', 'ENSG00000142611', 'ENSG00000151067', 'ENSG00000138449', 'ENSG00000175206', 'ENSG00000131018', 'ENSG00000183230', 'ENSG00000173801', 'ENSG00000183072', 'ENSG00000119699', 'ENSG00000108055', 'ENSG00000106617', 'ENSG00000170624', 'ENSG00000105711', 'ENSG00000124155', 'ENSG00000099949', 'ENSG00000186439', 'ENSG00000106290', 'ENSG00000180509', 'ENSG00000147099', 'ENSG00000129170', 'ENSG00000115267', 'ENSG00000123066']
train_genes ['ENSG00000160808', 'ENSG00000134769', 'ENSG00000149575', 'ENSG00000069431', 'ENSG00000126934', 'ENSG00000106692', 'ENSG00000177098', 'ENSG00000143622', 'ENSG00000130037', 'ENSG00000109846', 'ENSG00000118729', 'ENSG00000149596', 'ENSG00000166147', 'ENSG00000105697', 'ENSG00000164961', 'ENSG00000107404', 'ENSG00000130529', 'ENSG00000132155', 'ENSG00000166341', 'ENSG00000165474', 'ENSG00000073578', 'ENSG0000013475

262      False
276      False
282      False
285      False
287      False
         ...  
67936    False
67938    False
67942    False
67943    False
67959    False
Name: Heart - Left Ventricle_disease_causing, Length: 10236, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.9180854481207839, 'recall': 0.9998250830855344, 'f1-score': 0.9572134304613581, 'support': 5717}, 'True': {'precision': 0.9875, 'recall': 0.13412563667232597, 'f1-score': 0.2361733931240657, 'support': 589}, 'accuracy': 0.9189660640659689, 'macro avg': {'precision': 0.952792724060392, 'recall': 0.5669753598789302, 'f1-score': 0.5966934117927118, 'support': 6306}, 'weighted avg': {'precision': 0.9245689830172092, 'recall': 0.9189660640659689, 'f1-score': 0.8898660498727654, 'support': 6306}}
@@@  precision: 0.9875 recall:  0.13412563667232597 f1_score:  0.2361733931240657


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000133103', 'ENSG00000015171', 'ENSG00000102302', 'ENSG00000164961', 'ENSG00000183735', 'ENSG00000168303', 'ENSG00000140854', 'ENSG00000136104', 'ENSG00000005339', 'ENSG00000225830', 'ENSG00000107566', 'ENSG00000130821', 'ENSG00000123810', 'ENSG00000174227', 'ENSG00000143614', 'ENSG00000144381', 'ENSG00000092621', 'ENSG00000134871', 'ENSG00000132024', 'ENSG00000074047', 'ENSG00000177426', 'ENSG00000133884', 'ENSG00000166341', 'ENSG00000185760', 'ENSG00000157911', 'ENSG00000154743', 'ENSG00000197102', 'ENSG00000164754', 'ENSG00000104218', 'ENSG00000181038', 'ENSG00000165195', 'ENSG00000141837', 'ENSG00000166123', 'ENSG00000131263', 'ENSG00000113163', 'ENSG00000115839', 'ENSG00000048392', 'ENSG00000112367', 'ENSG00000131089', 'ENSG00000141510', 'ENSG00000116675', 'ENSG00000108231', 'ENSG00000214274', 'ENSG00000112282', 'ENSG00000165078', 'ENSG00000125454', 'ENSG00000077264', 'ENSG00000144406']
train_genes ['ENSG00000186487', 'ENSG00000143442', 'ENSG0000016292

benign_genes 62076
test_fraction:  0.10030549898167006
train_fraction:  0.8996945010183299
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max', 'Vagina_diff_net_max', 'Uterus_diff_net_max', 'Thyroid_diff_net_max', 'Testis_diff_net_max', 'Stomach_diff_net_max', 'Skin_-_Sun_Exposed_(Lower_leg)_diff_net_max', 'Skin_-_Not_Sun_Exposed_(Suprapubic)_diff_net_max', 'Prostate_diff_net_max', 'Pituitary_diff_net_max', 'Ovary_diff_net_max', 'Nerve_-_Tibial_diff_net_max', 'Muscle_-_Skeletal_diff_net_max', 'Minor_Salivary_Gland_diff_net_max', 'Lung_diff_net_max', 'Liver_diff_net_max', 'Heart_-_Left_Ventricle_diff_net_max', 'Heart_-_Atrial_Appendage_diff_net_max', 'Esophagus_-_Muscularis_diff_net_max', 'Esophagus_-_Mucosa_diff_net_max', 'Esophagus_-_Gastroesophageal_Junction_diff_net_max', 'Colon_-_Sigmoid_diff_net_max', 'Cells_-_Transformed_fibroblasts_diff_net_max', 'Cells_-_EBV-transformed_lymphocytes_diff_net_max', 'Breast_-_Mammary_Tissue_diff_net_max', 'Brain_-_Substantia_nigr

267       True
268       True
269       True
270       True
271       True
         ...  
67910    False
67920    False
67957    False
67958    False
67965    False
Name: brain_disease_causing, Length: 6323, dtype: bool
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800420168067226, 'recall': 1.0, 'f1-score': 0.989920424403183, 'support': 6531}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 133}, 'accuracy': 0.9800420168067226, 'macro avg': {'precision': 0.4900210084033613, 'recall': 0.5, 'f1-score': 0.4949602122015915, 'support': 6664}, 'weighted avg': {'precision': 0.9604823547065885, 'recall': 0.9800420168067226, 'f1-score': 0.9701636092102623, 'support': 6664}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000172548', 'ENSG00000189114', 'ENSG00000130826', 'ENSG00000110921', 'ENSG00000107165', 'ENSG00000197265', 'ENSG00000104889', 'ENSG00000135903', 'ENSG00000101194', 'ENSG00000105697', 'ENSG00000163755', 'ENSG00000137845', 'ENSG00000124205', 'ENSG00000166401', 'ENSG00000133703']
train_genes ['ENSG00000148400', 'ENSG00000166189', 'ENSG00000130158', 'ENSG00000077498', 'ENSG00000163161', 'ENSG00000136695', 'ENSG00000084073', 'ENSG00000168303', 'ENSG00000186832', 'ENSG00000205420', 'ENSG00000187098', 'ENSG00000168509', 'ENSG00000163945', 'ENSG00000144452', 'ENSG00000182117', 'ENSG00000107201', 'ENSG00000135069', 'ENSG00000225830', 'ENSG00000010704', 'ENSG00000110756', 'ENSG00000128422', 'ENSG00000088002', 'ENSG00000136160', 'ENSG00000115267', 'ENSG00000147123', 'ENSG00000119650', 'ENSG00000115657', 'ENSG00000101346', 'ENSG00000160789', 'ENSG00000185479', 'ENSG00000154227', 'ENSG00000172922', 'ENSG00000148655', 'ENSG00000145912', 'ENSG00000168214', 'ENSG0000010404

259      False
260      False
270      False
275      False
286      False
         ...  
67944    False
67949    False
67951    False
67963    False
67964    False
Name: Skin - Not Sun Exposed_disease_causing, Length: 7459, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9741405536963796, 'recall': 1.0, 'f1-score': 0.9869009092310063, 'support': 6404}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9741405536963796, 'macro avg': {'precision': 0.4870702768481898, 'recall': 0.5, 'f1-score': 0.49345045461550313, 'support': 6574}, 'weighted avg': {'precision': 0.9489498183558891, 'recall': 0.9741405536963796, 'f1-score': 0.961380198161753, 'support': 6574}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000261609', 'ENSG00000140521', 'ENSG00000138083', 'ENSG00000197563', 'ENSG00000129159', 'ENSG00000150995', 'ENSG00000121680', 'ENSG00000129255', 'ENSG00000105695', 'ENSG00000072121', 'ENSG00000108641', 'ENSG00000135541', 'ENSG00000139174', 'ENSG00000243335']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000124788', 'ENSG00000100014', 'ENSG00000122591', 'ENSG00000184381', 'ENSG00000101997', 'ENSG0000016449

18       False
27       False
29       False
40       False
43       False
         ...  
67932    False
67936    False
67943    False
67946    False
67964    False
Name: brain-1_disease_causing, Length: 6580, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958671360783713, 'recall': 1.0, 'f1-score': 0.9979292890559092, 'support': 6506}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 27}, 'accuracy': 0.9958671360783713, 'macro avg': {'precision': 0.49793356803918565, 'recall': 0.5, 'f1-score': 0.4989646445279546, 'support': 6533}, 'weighted avg': {'precision': 0.9917513527209374, 'recall': 0.9958671360783713, 'f1-score': 0.9938049831008334, 'support': 6533}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000109163']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000138449', 'ENSG00000171316', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.1
train_fraction:  0.9
relevant_cols ['Whole_Brain_diff_net_max', 'Whole_Blood_diff_net_max'

2        False
23       False
43       False
45       False
54       False
         ...  
67915    False
67927    False
67940    False
67941    False
67967    False
Name: Ovary_disease_causing, Length: 6769, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915675350097877, 'recall': 1.0, 'f1-score': 0.995765915620747, 'support': 6585}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9915675350097877, 'macro avg': {'precision': 0.49578376750489384, 'recall': 0.5, 'f1-score': 0.4978829578103735, 'support': 6641}, 'weighted avg': {'precision': 0.9832061764853866, 'recall': 0.9915675350097877, 'f1-score': 0.9873691543988283, 'support': 6641}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000158887', 'ENSG00000156515', 'ENSG00000168356', 'ENSG00000186575', 'ENSG00000170445', 'ENSG00000185313', 'ENSG00000130711']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG0000014381

2        False
17       False
36       False
46       False
57       False
         ...  
67873    False
67889    False
67896    False
67916    False
67918    False
Name: Nerve - Tibial_disease_causing, Length: 6644, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
{'False': {'precision': 0.9557964970809008, 'recall': 0.9996510816468946, 'f1-score': 0.9772320286518292, 'support': 5732}, 'True': {'precision': 0.9939024390243902, 'recall': 0.5516074450084603, 'f1-score': 0.7094668117519042, 'support': 591}, 'accuracy': 0.9577732089198165, 'macro avg': {'precision': 0.9748494680526455, 'recall': 0.7756292633276775, 'f1-score': 0.8433494202018668, 'support': 6323}, 'weighted avg': {'precision': 0.9593581943272399, 'recall': 0.9577732089198165, 'f1-score': 0.9522044716080438, 'support': 6323}}
@@@  precision: 0.9939024390243902 recall:  0.5516074450084603 f1_score:  0.7094668117519042


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 5892
TN_t 0
FP_t 62076
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.08668785310734463
threshold:  1
TP_t 5878
TN_t 10507
FP_t 51569
FN_t 14
FPR_t 0.8307397383852052
TPR_t 0.9976238968092329
precision 0.10232039967274183
threshold:  2
TP_t 5877
TN_t 14452
FP_t 47624
FN_t 15
FPR_t 0.7671886075133707
TPR_t 0.997454175152749

precision 0.25
threshold:  56
TP_t 7
TN_t 62055
FP_t 21
FN_t 5885
FPR_t 0.00033829499323410016
TPR_t 0.0011880515953835709
precision 0.25
threshold:  57
TP_t 5
TN_t 62056
FP_t 20
FN_t 5887
FPR_t 0.00032218570784200013
TPR_t 0.0008486082824168364
precision 0.2
threshold:  58
TP_t 4
TN_t 62058
FP_t 18
FN_t 5888
FPR_t 0.00028996713705780014
TPR_t 0.0006788866259334691
precision 0.18181818181818182
threshold:  59
TP_t 2
TN_t 62061
FP_t 15
FN_t 5890
FPR_t 0.0002416392808815001
TPR_t 0.00033944331296673454
precision 0.11764705882352941
threshold:  60
TP_t 1
TN_t 62061
FP_t 15
FN_t 5891
FPR_t 0.0002416392808815001
TPR_t 0.00016972165648336727
precision 0.0625
threshold:  61
TP_t 1
TN_t 62065
FP_t 11
FN_t 5891
FPR_t 0.00017720213931310007
TPR_t 0.00016972165648336727
precision 0.08333333333333333
threshold:  62
TP_t 1
TN_t 62065
FP_t 11
FN_t 5891
FPR_t 0.00017720213931310007
TPR_t 0.00016972165648336727
precision 0.08333333333333333
threshold:  63
TP_t 1
TN_t 62066
FP_t 10
FN_t 5891
FPR_t 0.00

FN_t 1232
FPR_t 0.10029893656990706
TPR_t 0.7855153203342619
precision 0.42358242583552386
threshold:  24
TP_t 4489
TN_t 55138
FP_t 6079
FN_t 1255
FPR_t 0.09930248133688355
TPR_t 0.7815111420612814
precision 0.42477289931869794
threshold:  25
TP_t 4455
TN_t 55204
FP_t 6013
FN_t 1289
FPR_t 0.09822434944541548
TPR_t 0.775591922005571
precision 0.4255827283148643
threshold:  26
TP_t 4430
TN_t 55256
FP_t 5961
FN_t 1314
FPR_t 0.09737491219759217
TPR_t 0.7712395543175488
precision 0.42633047829852755
threshold:  27
TP_t 4414
TN_t 55304
FP_t 5913
FN_t 1330
FPR_t 0.0965908162765245
TPR_t 0.7684540389972145
precision 0.42742325941706205
threshold:  28
TP_t 4388
TN_t 55350
FP_t 5867
FN_t 1356
FPR_t 0.09583939101883464
TPR_t 0.7639275766016713
precision 0.4278888347147733
threshold:  29
TP_t 4362
TN_t 55409
FP_t 5808
FN_t 1382
FPR_t 0.09487560644918895
TPR_t 0.7594011142061281
precision 0.4289085545722714
threshold:  30
TP_t 4346
TN_t 55462
FP_t 5755
FN_t 1398
FPR_t 0.09400983386967672
TPR_t 0.75

TPR_t 0.5168871866295265
precision 0.45272949069838364
threshold:  84
TP_t 2920
TN_t 57698
FP_t 3519
FN_t 2824
FPR_t 0.05748403221327409
TPR_t 0.5083565459610028
precision 0.45348656623699335
threshold:  85
TP_t 2878
TN_t 57755
FP_t 3462
FN_t 2866
FPR_t 0.05655291830700623
TPR_t 0.5010445682451253
precision 0.45394321766561513
threshold:  86
TP_t 2831
TN_t 57812
FP_t 3405
FN_t 2913
FPR_t 0.055621804400738356
TPR_t 0.49286211699164345
precision 0.45397690827453496
threshold:  87
TP_t 2778
TN_t 57871
FP_t 3346
FN_t 2966
FPR_t 0.05465801983109267
TPR_t 0.48363509749303624
precision 0.45362508164598303
threshold:  88
TP_t 2718
TN_t 57956
FP_t 3261
FN_t 3026
FPR_t 0.053269516637535326
TPR_t 0.47318941504178275
precision 0.4545910687405921
threshold:  89
TP_t 2651
TN_t 58036
FP_t 3181
FN_t 3093
FPR_t 0.05196269010242253
TPR_t 0.46152506963788303
precision 0.4545610425240055
threshold:  90
TP_t 2591
TN_t 58101
FP_t 3116
FN_t 3153
FPR_t 0.050900893542643384
TPR_t 0.45107938718662954
precision 

FPR_t 0.761873737702782
TPR_t 0.9821615949632738
precision 0.13799203892083148
threshold:  38
TP_t 1875
TN_t 3571
FP_t 11778
FN_t 31
FPR_t 0.7673464069320477
TPR_t 0.9837355718782791
precision 0.13733245440562514
threshold:  39
TP_t 1876
TN_t 3493
FP_t 11856
FN_t 30
FPR_t 0.7724281712163659
TPR_t 0.9842602308499475
precision 0.13661520535974367
threshold:  40
TP_t 1878
TN_t 3396
FP_t 11953
FN_t 28
FPR_t 0.7787478011596847
TPR_t 0.9853095487932844
precision 0.13578193912226158
threshold:  41
TP_t 1878
TN_t 3309
FP_t 12040
FN_t 28
FPR_t 0.7844159228614241
TPR_t 0.9853095487932844
precision 0.13493318005460556
threshold:  42
TP_t 1878
TN_t 3235
FP_t 12114
FN_t 28
FPR_t 0.7892370838491107
TPR_t 0.9853095487932844
precision 0.1342195540308748
threshold:  43
TP_t 1878
TN_t 3160
FP_t 12189
FN_t 28
FPR_t 0.7941233956609551
TPR_t 0.9853095487932844
precision 0.13350394540413735
threshold:  44
TP_t 1878
TN_t 3087
FP_t 12262
FN_t 28
FPR_t 0.7988794058244837
TPR_t 0.9853095487932844
precision 0.13

threshold:  99
TP_t 1899
TN_t 1036
FP_t 14313
FN_t 7
FPR_t 0.9325037461723891
TPR_t 0.9963273871983211
precision 0.11713545521835678
threshold:  100
TP_t 1906
TN_t 0
FP_t 15349
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.11046073601854535
ROC
[[0.         0.        ]
 [0.21160988 0.68520462]
 [0.28523031 0.79328437]
 [0.33116164 0.8363064 ]
 [0.36523552 0.86935992]
 [0.39377158 0.88929696]
 [0.41898495 0.9003148 ]
 [0.44198319 0.91238195]
 [0.44198319 0.91238195]
 [0.48107369 0.92339979]
 [0.4984038  0.92864638]
 [0.5141703  0.93231899]
 [0.52935045 0.93546695]
 [0.54420483 0.93913956]
 [0.55743045 0.94018888]
 [0.55743045 0.94018888]
 [0.5813408  0.9475341 ]
 [0.59378461 0.95120672]
 [0.60551176 0.95278069]
 [0.61678285 0.95435467]
 [0.62642517 0.95592865]
 [0.6363281  0.95855194]
 [0.64675223 0.96012592]
 [0.6559385  0.96327387]
 [0.66349599 0.96484785]
 [0.6715747  0.96537251]
 [0.6796534  0.96852046]
 [0.68753665 0.96904512]
 [0.6955502  0.97009444]
 [0.6955502  0.97009444]
 [0.71138185

precision 0.22191745036572622
threshold:  36
TP_t 1697
TN_t 9488
FP_t 5923
FN_t 209
FPR_t 0.3843358639932516
TPR_t 0.8903462749213011
precision 0.22270341207349081
threshold:  37
TP_t 1692
TN_t 9536
FP_t 5875
FN_t 214
FPR_t 0.3812212056323405
TPR_t 0.887722980062959
precision 0.2236024844720497
threshold:  38
TP_t 1690
TN_t 9568
FP_t 5843
FN_t 216
FPR_t 0.37914476672506653
TPR_t 0.8866736621196223
precision 0.22434621000929245
threshold:  39
TP_t 1685
TN_t 9606
FP_t 5805
FN_t 221
FPR_t 0.3766789955226786
TPR_t 0.8840503672612802
precision 0.22496662216288385
threshold:  40
TP_t 1679
TN_t 9641
FP_t 5770
FN_t 227
FPR_t 0.3744078904678476
TPR_t 0.8809024134312696
precision 0.2253993824674453
threshold:  41
TP_t 1678
TN_t 9672
FP_t 5739
FN_t 228
FPR_t 0.37239634027642593
TPR_t 0.8803777544596013
precision 0.2262370230551436
threshold:  42
TP_t 1672
TN_t 9718
FP_t 5693
FN_t 234
FPR_t 0.36941145934721953
TPR_t 0.8772298006295908
precision 0.22701968771215206
threshold:  43
TP_t 1669
TN_t 974

TPR_t 0.6584470094438615
precision 0.28797613584212944
threshold:  97
TP_t 1215
TN_t 12472
FP_t 2939
FN_t 691
FPR_t 0.19070793588994875
TPR_t 0.6374606505771249
precision 0.29248916706788636
threshold:  98
TP_t 1144
TN_t 12728
FP_t 2683
FN_t 762
FPR_t 0.17409642463175654
TPR_t 0.6002098635886673
precision 0.29892866475045726
threshold:  99
TP_t 1013
TN_t 13098
FP_t 2313
FN_t 893
FPR_t 0.15008759976640063
TPR_t 0.531479538300105
precision 0.30457005411906196
threshold:  100
TP_t 393
TN_t 14579
FP_t 832
FN_t 1513
FPR_t 0.05398741158912465
TPR_t 0.2061909758656873
precision 0.32081632653061226
threshold:  101
TP_t 0
TN_t 15411
FP_t 0
FN_t 1906
FPR_t 0.0
TPR_t 0.0
precision 0


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9702032043767097, 'recall': 1.0, 'f1-score': 0.9848762830366441, 'support': 9931}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 305}, 'accuracy': 0.9702032043767097, 'macro avg': {'precision': 0.48510160218835485, 'recall': 0.5, 'f1-score': 0.49243814151832205, 'support': 10236}, 'weighted avg': {'precision': 0.9412942577828355, 'recall': 0.9702032043767097, 'f1-score': 0.9555301257167753, 'support': 10236}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 1946
TN_t 0
FP_t 66022
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.02863112052730697
threshold:  1
TP_t 1942
TN_t 10517
FP_t 55505
FN_t 4
FPR_t 0.8407046136136439
TPR_t 0.9979445015416238
precision 0.03380507250161018
threshold:  2
TP_t 1942
TN_t 14463
FP_t 51559
FN_t 4
FPR_t 0.7809366574778104
TPR_t 0.9979445015416238


precision 0.3333333333333333
threshold:  56
TP_t 9
TN_t 66003
FP_t 19
FN_t 1937
FPR_t 0.00028778286025870167
TPR_t 0.0046248715313463515
precision 0.32142857142857145
threshold:  57
TP_t 9
TN_t 66006
FP_t 16
FN_t 1937
FPR_t 0.0002423434612704856
TPR_t 0.0046248715313463515
precision 0.36
threshold:  58
TP_t 9
TN_t 66009
FP_t 13
FN_t 1937
FPR_t 0.00019690406228226954
TPR_t 0.0046248715313463515
precision 0.4090909090909091
threshold:  59
TP_t 8
TN_t 66013
FP_t 9
FN_t 1938
FPR_t 0.00013631819696464814
TPR_t 0.0041109969167523125
precision 0.47058823529411764
threshold:  60
TP_t 8
TN_t 66014
FP_t 8
FN_t 1938
FPR_t 0.0001211717306352428
TPR_t 0.0041109969167523125
precision 0.5
threshold:  61
TP_t 6
TN_t 66016
FP_t 6
FN_t 1940
FPR_t 9.08787979764321e-05
TPR_t 0.003083247687564234
precision 0.5
threshold:  62
TP_t 6
TN_t 66016
FP_t 6
FN_t 1940
FPR_t 9.08787979764321e-05
TPR_t 0.003083247687564234
precision 0.5
threshold:  63
TP_t 5
TN_t 66016
FP_t 6
FN_t 1941
FPR_t 9.08787979764321e-05
TPR_

TPR_t 0.8078271028037384
precision 0.12983477281261735
threshold:  24
TP_t 1373
TN_t 56054
FP_t 9195
FN_t 339
FPR_t 0.14092169994942452
TPR_t 0.8019859813084113
precision 0.1299205147615443
threshold:  25
TP_t 1363
TN_t 56144
FP_t 9105
FN_t 349
FPR_t 0.1395423684654171
TPR_t 0.7961448598130841
precision 0.13020634314100116
threshold:  26
TP_t 1358
TN_t 56216
FP_t 9033
FN_t 354
FPR_t 0.13843890327821115
TPR_t 0.7932242990654206
precision 0.13069002020979695
threshold:  27
TP_t 1350
TN_t 56272
FP_t 8977
FN_t 362
FPR_t 0.137580652577051
TPR_t 0.7885514018691588
precision 0.1307252832381137
threshold:  28
TP_t 1345
TN_t 56339
FP_t 8910
FN_t 367
FPR_t 0.13655381691673435
TPR_t 0.7856308411214953
precision 0.1311555338859093
threshold:  29
TP_t 1335
TN_t 56414
FP_t 8835
FN_t 377
FPR_t 0.13540437401339483
TPR_t 0.7797897196261683
precision 0.13126843657817108
threshold:  30
TP_t 1329
TN_t 56477
FP_t 8772
FN_t 383
FPR_t 0.13443884197458966
TPR_t 0.7762850467289719
precision 0.13157113157113157

TN_t 59827
FP_t 5422
FN_t 695
FPR_t 0.08309705895875799
TPR_t 0.5940420560747663
precision 0.1579437800900761
threshold:  85
TP_t 1008
TN_t 59917
FP_t 5332
FN_t 704
FPR_t 0.08171772747475058
TPR_t 0.5887850467289719
precision 0.1589905362776025
threshold:  86
TP_t 998
TN_t 60011
FP_t 5238
FN_t 714
FPR_t 0.08027709236923171
TPR_t 0.5829439252336449
precision 0.1600384862091084
threshold:  87
TP_t 988
TN_t 60113
FP_t 5136
FN_t 724
FPR_t 0.07871385002068997
TPR_t 0.5771028037383178
precision 0.16133246244284782
threshold:  88
TP_t 969
TN_t 60239
FP_t 5010
FN_t 743
FPR_t 0.07678278594307959
TPR_t 0.5660046728971962
precision 0.1620672353236327
threshold:  89
TP_t 954
TN_t 60371
FP_t 4878
FN_t 758
FPR_t 0.07475976643320204
TPR_t 0.5572429906542056
precision 0.16358024691358025
threshold:  90
TP_t 942
TN_t 60484
FP_t 4765
FN_t 770
FPR_t 0.07302793912550384
TPR_t 0.5502336448598131
precision 0.1650604520763974
threshold:  91
TP_t 919
TN_t 60623
FP_t 4626
FN_t 793
FPR_t 0.07089763827798128
TPR

precision 0.03889255108767304
threshold:  39
TP_t 531
TN_t 3510
FP_t 13201
FN_t 13
FPR_t 0.7899587098318472
TPR_t 0.9761029411764706
precision 0.038668802796388
threshold:  40
TP_t 531
TN_t 3411
FP_t 13300
FN_t 13
FPR_t 0.7958829513494106
TPR_t 0.9761029411764706
precision 0.0383920179307353
threshold:  41
TP_t 531
TN_t 3324
FP_t 13387
FN_t 13
FPR_t 0.8010891029860571
TPR_t 0.9761029411764706
precision 0.038152033338123295
threshold:  42
TP_t 531
TN_t 3250
FP_t 13461
FN_t 13
FPR_t 0.8055173239183772
TPR_t 0.9761029411764706
precision 0.037950257289879934
threshold:  43
TP_t 531
TN_t 3175
FP_t 13536
FN_t 13
FPR_t 0.8100053856741068
TPR_t 0.9761029411764706
precision 0.037747920665387076
threshold:  44
TP_t 531
TN_t 3102
FP_t 13609
FN_t 13
FPR_t 0.8143737657830171
TPR_t 0.9761029411764706
precision 0.03755304101838755
threshold:  45
TP_t 531
TN_t 3036
FP_t 13675
FN_t 13
FPR_t 0.8183232601280593
TPR_t 0.9761029411764706
precision 0.03737857243418274
threshold:  46
TP_t 533
TN_t 2959
FP_t 

ROC
[[0.         0.        ]
 [0.25414398 0.56433824]
 [0.3296631  0.70036765]
 [0.37502244 0.75367647]
 [0.40901203 0.78676471]
 [0.43671833 0.81066176]
 [0.46023577 0.83823529]
 [0.48195799 0.86213235]
 [0.48195799 0.86213235]
 [0.51816169 0.89154412]
 [0.53431871 0.90257353]
 [0.54891987 0.91176471]
 [0.56298247 0.91911765]
 [0.57674586 0.92830882]
 [0.58895338 0.93014706]
 [0.58895338 0.93014706]
 [0.61163306 0.93382353]
 [0.6234217  0.93566176]
 [0.63437257 0.93566176]
 [0.64484471 0.9375    ]
 [0.65358147 0.94669118]
 [0.66291664 0.94852941]
 [0.6726707  0.94852941]
 [0.6814673  0.94852941]
 [0.68852851 0.95036765]
 [0.69600862 0.95036765]
 [0.70372808 0.95220588]
 [0.71090898 0.95588235]
 [0.71820956 0.96139706]
 [0.71820956 0.96139706]
 [0.73305009 0.96139706]
 [0.74005146 0.96139706]
 [0.74615523 0.96139706]
 [0.75219915 0.96139706]
 [0.75878164 0.96139706]
 [0.76446652 0.96875   ]
 [0.77009156 0.96875   ]
 [0.77571659 0.97242647]
 [0.78008497 0.97426471]
 [0.78523128 0.976102

precision 0.061451037399233514
threshold:  38
TP_t 463
TN_t 9702
FP_t 7070
FN_t 82
FPR_t 0.42153589315525875
TPR_t 0.8495412844036697
precision 0.06146289658834462
threshold:  39
TP_t 459
TN_t 9741
FP_t 7031
FN_t 86
FPR_t 0.4192105890770331
TPR_t 0.8422018348623853
precision 0.061281708945260346
threshold:  40
TP_t 457
TN_t 9780
FP_t 6992
FN_t 88
FPR_t 0.41688528499880756
TPR_t 0.8385321100917431
precision 0.06135051684789904
threshold:  41
TP_t 457
TN_t 9812
FP_t 6960
FN_t 88
FPR_t 0.4149773431910327
TPR_t 0.8385321100917431
precision 0.06161520830524471
threshold:  42
TP_t 452
TN_t 9859
FP_t 6913
FN_t 93
FPR_t 0.4121750536608633
TPR_t 0.8293577981651377
precision 0.061371350984385606
threshold:  43
TP_t 451
TN_t 9890
FP_t 6882
FN_t 94
FPR_t 0.41032673503458145
TPR_t 0.8275229357798165
precision 0.061502795581617344
threshold:  44
TP_t 449
TN_t 9927
FP_t 6845
FN_t 96
FPR_t 0.40812067731934176
TPR_t 0.8238532110091743
precision 0.061557444474910884
threshold:  45
TP_t 448
TN_t 9972
FP_

TP_t 239
TN_t 13685
FP_t 3087
FN_t 306
FPR_t 0.18405676126878132
TPR_t 0.43853211009174314
precision 0.07185808779314491
threshold:  100
TP_t 96
TN_t 15643
FP_t 1129
FN_t 449
FPR_t 0.06731457190555688
TPR_t 0.1761467889908257
precision 0.0783673469387755
threshold:  101
TP_t 0
TN_t 16772
FP_t 0
FN_t 545
FPR_t 0.0
TPR_t 0.0
precision 0
@ Heart - Left Ventricle  finished 2023-01-02 09:51:44
@ brain  finished 2023-01-02 09:51:17


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9800241319211691, 'recall': 1.0, 'f1-score': 0.9899113006974067, 'support': 7310}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 149}, 'accuracy': 0.9800241319211691, 'macro avg': {'precision': 0.49001206596058455, 'recall': 0.5, 'f1-score': 0.49495565034870337, 'support': 7459}, 'weighted avg': {'precision': 0.960447299147841, 'recall': 0.9800241319211691, 'f1-score': 0.9701369631449314, 'support': 7459}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 1337
TN_t 0
FP_t 66631
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.01967102165725047
threshold:  1
TP_t 1337
TN_t 10521
FP_t 56110
FN_t 0
FPR_t 0.8421005237802224
TPR_t 1.0
precision 0.023273626124949955
threshold:  2
TP_t 1337
TN_t 14467
FP_t 52164
FN_t 0
FPR_t 0.782878840179496
TPR_t 1.0
precision 0.024990187099306557

TP_t 2
TN_t 66605
FP_t 26
FN_t 1335
FPR_t 0.0003902087616875028
TPR_t 0.0014958863126402393
precision 0.07142857142857142
threshold:  57
TP_t 2
TN_t 66608
FP_t 23
FN_t 1335
FPR_t 0.00034518467380048324
TPR_t 0.0014958863126402393
precision 0.08
threshold:  58
TP_t 2
TN_t 66611
FP_t 20
FN_t 1335
FPR_t 0.0003001605859134637
TPR_t 0.0014958863126402393
precision 0.09090909090909091
threshold:  59
TP_t 2
TN_t 66616
FP_t 15
FN_t 1335
FPR_t 0.00022512043943509778
TPR_t 0.0014958863126402393
precision 0.11764705882352941
threshold:  60
TP_t 2
TN_t 66617
FP_t 14
FN_t 1335
FPR_t 0.0002101124101394246
TPR_t 0.0014958863126402393
precision 0.125
threshold:  61
TP_t 1
TN_t 66620
FP_t 11
FN_t 1336
FPR_t 0.00016508832225240505
TPR_t 0.0007479431563201197
precision 0.08333333333333333
threshold:  62
TP_t 1
TN_t 66620
FP_t 11
FN_t 1336
FPR_t 0.00016508832225240505
TPR_t 0.0007479431563201197
precision 0.08333333333333333
threshold:  63
TP_t 1
TN_t 66621
FP_t 10
FN_t 1336
FPR_t 0.00015008029295673184
T

precision 0.09171986481411941
threshold:  24
TP_t 973
TN_t 56069
FP_t 9595
FN_t 324
FPR_t 0.14612268518518517
TPR_t 0.7501927525057825
precision 0.09207040121120363
threshold:  25
TP_t 967
TN_t 56163
FP_t 9501
FN_t 330
FPR_t 0.14469115497076024
TPR_t 0.7455666923670008
precision 0.09237676729079099
threshold:  26
TP_t 963
TN_t 56236
FP_t 9428
FN_t 334
FPR_t 0.14357943469785575
TPR_t 0.7424826522744795
precision 0.0926763545375806
threshold:  27
TP_t 960
TN_t 56297
FP_t 9367
FN_t 337
FPR_t 0.14265046296296297
TPR_t 0.7401696222050886
precision 0.09296020141376973
threshold:  28
TP_t 955
TN_t 56364
FP_t 9300
FN_t 342
FPR_t 0.14163011695906433
TPR_t 0.7363145720894372
precision 0.0931253047294003
threshold:  29
TP_t 947
TN_t 56441
FP_t 9223
FN_t 350
FPR_t 0.1404574805068226
TPR_t 0.7301464919043947
precision 0.09311701081612586
threshold:  30
TP_t 943
TN_t 56506
FP_t 9158
FN_t 354
FPR_t 0.1394675925925926
TPR_t 0.7270624518118736
precision 0.09335709335709336
threshold:  31
TP_t 938
TN_t 

precision 0.09504581456747942
threshold:  85
TP_t 605
TN_t 59929
FP_t 5735
FN_t 692
FPR_t 0.08733857212475633
TPR_t 0.4664610639938319
precision 0.09542586750788644
threshold:  86
TP_t 599
TN_t 60027
FP_t 5637
FN_t 698
FPR_t 0.08584612573099415
TPR_t 0.4618350038550501
precision 0.09605516356638871
threshold:  87
TP_t 590
TN_t 60130
FP_t 5534
FN_t 707
FPR_t 0.08427753411306042
TPR_t 0.4548959136468774
precision 0.09634225996080993
threshold:  88
TP_t 569
TN_t 60254
FP_t 5410
FN_t 728
FPR_t 0.08238913255360623
TPR_t 0.4387047031611411
precision 0.09516641578859342
threshold:  89
TP_t 561
TN_t 60393
FP_t 5271
FN_t 736
FPR_t 0.08027229532163743
TPR_t 0.4325366229760987
precision 0.09619341563786009
threshold:  90
TP_t 545
TN_t 60502
FP_t 5162
FN_t 752
FPR_t 0.07861232943469786
TPR_t 0.4202004626060139
precision 0.09549675836691782
threshold:  91
TP_t 530
TN_t 60649
FP_t 5015
FN_t 767
FPR_t 0.07637365984405459
TPR_t 0.40863531225905936
precision 0.09558160504959423
threshold:  92
TP_t 516


TP_t 517
TN_t 3513
FP_t 13215
FN_t 10
FPR_t 0.7899928263988523
TPR_t 0.9810246679316889
precision 0.037649286338479464
threshold:  40
TP_t 517
TN_t 3414
FP_t 13314
FN_t 10
FPR_t 0.7959110473457676
TPR_t 0.9810246679316889
precision 0.037379799002241344
threshold:  41
TP_t 517
TN_t 3327
FP_t 13401
FN_t 10
FPR_t 0.8011119081779053
TPR_t 0.9810246679316889
precision 0.037146141687024
threshold:  42
TP_t 517
TN_t 3253
FP_t 13475
FN_t 10
FPR_t 0.8055356288857006
TPR_t 0.9810246679316889
precision 0.036949685534591194
threshold:  43
TP_t 517
TN_t 3178
FP_t 13550
FN_t 10
FPR_t 0.8100191296030608
TPR_t 0.9810246679316889
precision 0.03675268358569702
threshold:  44
TP_t 517
TN_t 3105
FP_t 13623
FN_t 10
FPR_t 0.8143830703012912
TPR_t 0.9810246679316889
precision 0.03656294200848656
threshold:  45
TP_t 517
TN_t 3039
FP_t 13689
FN_t 10
FPR_t 0.8183285509325682
TPR_t 0.9810246679316889
precision 0.03639307334928903
threshold:  46
TP_t 518
TN_t 2961
FP_t 13767
FN_t 9
FPR_t 0.8229913916786227
TPR_t 

 [1.         1.        ]]
AUC
0.7618246447002927
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                     True
29                    98.6                     True
30                    99.8                     True
32                    99.9                     True
33                    94.5                     True
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 527
TN_t 0
FP_t 16790
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.030432522954322343
threshold:  1
TP_t 517
TN_t 4868
FP_t 11922
FN_t 10
FPR_t 0.7100655151876116
TPR_t 0.9810246679316889
precision 0.041562826593777635
threshold:  2
TP_t 509
TN_t 5784
FP_t 11006
FN_t 18
FP

threshold:  57
TP_t 436
TN_t 10421
FP_t 6369
FN_t 91
FPR_t 0.37933293627159026
TPR_t 0.8273244781783681
precision 0.06407053637031594
threshold:  58
TP_t 436
TN_t 10464
FP_t 6326
FN_t 91
FPR_t 0.37677188802858846
TPR_t 0.8273244781783681
precision 0.06447796509908311
threshold:  59
TP_t 435
TN_t 10486
FP_t 6304
FN_t 92
FPR_t 0.37546158427635495
TPR_t 0.825426944971537
precision 0.06454963644457634
threshold:  60
TP_t 430
TN_t 10516
FP_t 6274
FN_t 97
FPR_t 0.37367480643240025
TPR_t 0.8159392789373814
precision 0.06414081145584725
threshold:  61
TP_t 427
TN_t 10546
FP_t 6244
FN_t 100
FPR_t 0.3718880285884455
TPR_t 0.8102466793168881
precision 0.06400839454354669
threshold:  62
TP_t 426
TN_t 10588
FP_t 6202
FN_t 101
FPR_t 0.36938653960690887
TPR_t 0.8083491461100569
precision 0.06427278213639107
threshold:  63
TP_t 423
TN_t 10635
FP_t 6155
FN_t 104
FPR_t 0.36658725431804645
TPR_t 0.8026565464895635
precision 0.06430525995743387
threshold:  64
TP_t 422
TN_t 10680
FP_t 6110
FN_t 105
FPR_t 0

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9905947441217151, 'recall': 1.0, 'f1-score': 0.9952751528627015, 'support': 7162}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 68}, 'accuracy': 0.9905947441217151, 'macro avg': {'precision': 0.49529737206085755, 'recall': 0.5, 'f1-score': 0.49763757643135076, 'support': 7230}, 'weighted avg': {'precision': 0.9812779470815661, 'recall': 0.9905947441217151, 'f1-score': 0.9859143353807287, 'support': 7230}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                     True
1                   54.000                     True
2                   33.000                     True
3                   33.000                     True
4                   20.500                     True
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 633
TN_t 0
FP_t 67335
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.009313206214689266
threshold:  1
TP_t 633
TN_t 10521
FP_t 56814
FN_t 0
FPR_t 0.84375139229227
TPR_t 1.0
precision 0.011018852159381691
threshold:  2
TP_t 632
TN_t 14466
FP_t 52869
FN_t 1
FPR_t 0.7851637335709513
TPR_t 0.9984202211690363
precision 0.011812

TPR_t 0.001579778830963665
precision 0.03571428571428571
threshold:  57
TP_t 1
TN_t 67311
FP_t 24
FN_t 632
FPR_t 0.00035642682111828916
TPR_t 0.001579778830963665
precision 0.04
threshold:  58
TP_t 1
TN_t 67314
FP_t 21
FN_t 632
FPR_t 0.000311873468478503
TPR_t 0.001579778830963665
precision 0.045454545454545456
threshold:  59
TP_t 0
TN_t 67318
FP_t 17
FN_t 633
FPR_t 0.0002524689982921215
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67319
FP_t 16
FN_t 633
FPR_t 0.0002376178807455261
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67323
FP_t 12
FN_t 633
FPR_t 0.00017821341055914458
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67323
FP_t 12
FN_t 633
FPR_t 0.00017821341055914458
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67324
FP_t 11
FN_t 633
FPR_t 0.0001633622930125492
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67327
FP_t 8
FN_t 633
FPR_t 0.00011880894037276305
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67329
FP_t 6
FN_t 633
FPR_t 8.910670527957229e

threshold:  26
TP_t 504
TN_t 56462
FP_t 9887
FN_t 108
FPR_t 0.14901505674539178
TPR_t 0.8235294117647058
precision 0.04850351265518237
threshold:  27
TP_t 502
TN_t 56524
FP_t 9825
FN_t 110
FPR_t 0.14808060407843374
TPR_t 0.8202614379084967
precision 0.04861043865595042
threshold:  28
TP_t 498
TN_t 56592
FP_t 9757
FN_t 114
FPR_t 0.1470557205082217
TPR_t 0.8137254901960784
precision 0.04856167723061921
threshold:  29
TP_t 495
TN_t 56674
FP_t 9675
FN_t 117
FPR_t 0.1458198314970836
TPR_t 0.8088235294117647
precision 0.048672566371681415
threshold:  30
TP_t 492
TN_t 56740
FP_t 9609
FN_t 120
FPR_t 0.14482509156128953
TPR_t 0.803921568627451
precision 0.04870804870804871
threshold:  31
TP_t 490
TN_t 56802
FP_t 9547
FN_t 122
FPR_t 0.14389063889433149
TPR_t 0.8006535947712419
precision 0.04881936833715254
threshold:  32
TP_t 489
TN_t 56874
FP_t 9475
FN_t 123
FPR_t 0.14280546805528344
TPR_t 0.7990196078431373
precision 0.0490766760337214
threshold:  33
TP_t 488
TN_t 56943
FP_t 9406
FN_t 124
FPR_

precision 0.05660679923027582
threshold:  87
TP_t 348
TN_t 60573
FP_t 5776
FN_t 264
FPR_t 0.08705481619918913
TPR_t 0.5686274509803921
precision 0.05682560418027433
threshold:  88
TP_t 341
TN_t 60711
FP_t 5638
FN_t 271
FPR_t 0.08497490542434702
TPR_t 0.5571895424836601
precision 0.057032948653621005
threshold:  89
TP_t 335
TN_t 60852
FP_t 5497
FN_t 277
FPR_t 0.08284977919787789
TPR_t 0.5473856209150327
precision 0.05744170096021948
threshold:  90
TP_t 332
TN_t 60974
FP_t 5375
FN_t 280
FPR_t 0.08101101749837979
TPR_t 0.5424836601307189
precision 0.05817417206938847
threshold:  91
TP_t 326
TN_t 61130
FP_t 5219
FN_t 286
FPR_t 0.07865981401377564
TPR_t 0.5326797385620915
precision 0.0587917042380523
threshold:  92
TP_t 316
TN_t 61312
FP_t 5037
FN_t 296
FPR_t 0.07591674328173748
TPR_t 0.5163398692810458
precision 0.05903231832617224
threshold:  93
TP_t 307
TN_t 61509
FP_t 4840
FN_t 305
FPR_t 0.0729475952915643
TPR_t 0.5016339869281046
precision 0.05964639595881096
threshold:  94
TP_t 297
TN

FN_t 2
FPR_t 0.8058450253245619
TPR_t 0.9743589743589743
precision 0.005460554677396178
threshold:  42
TP_t 76
TN_t 3261
FP_t 13916
FN_t 2
FPR_t 0.8101531117191594
TPR_t 0.9743589743589743
precision 0.005431675242995998
threshold:  43
TP_t 76
TN_t 3186
FP_t 13991
FN_t 2
FPR_t 0.8145194154974675
TPR_t 0.9743589743589743
precision 0.005402715575460297
threshold:  44
TP_t 76
TN_t 3113
FP_t 14064
FN_t 2
FPR_t 0.8187692845083542
TPR_t 0.9743589743589743
precision 0.005374823196605375
threshold:  45
TP_t 76
TN_t 3047
FP_t 14130
FN_t 2
FPR_t 0.8226116318332655
TPR_t 0.9743589743589743
precision 0.005349852175137266
threshold:  46
TP_t 77
TN_t 2969
FP_t 14208
FN_t 1
FPR_t 0.827152587762706
TPR_t 0.9871794871794872
precision 0.005390269513475674
threshold:  47
TP_t 77
TN_t 2905
FP_t 14272
FN_t 1
FPR_t 0.8308785003201956
TPR_t 0.9871794871794872
precision 0.005366227611680256
threshold:  48
TP_t 77
TN_t 2852
FP_t 14325
FN_t 1
FPR_t 0.8339640216568668
TPR_t 0.9871794871794872
precision 0.00534647

 [1.         1.        ]]
AUC
0.7872210603624703
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                     True
29                    98.6                     True
30                    99.8                     True
32                    99.9                     True
33                    94.5                     True
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 78
TN_t 0
FP_t 17239
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.004504244384131201
threshold:  1
TP_t 78
TN_t 4878
FP_t 12361
FN_t 0
FPR_t 0.7170369510992517
TPR_t 1.0
precision 0.006270600530589276
threshold:  2
TP_t 78
TN_t 5802
FP_t 11437
FN_t 0
FPR_t 0.66343755438250

TPR_t 0.8589743589743589
precision 0.009845701689933872
threshold:  58
TP_t 67
TN_t 10544
FP_t 6695
FN_t 11
FPR_t 0.3883635941759963
TPR_t 0.8589743589743589
precision 0.009908311150547175
threshold:  59
TP_t 67
TN_t 10567
FP_t 6672
FN_t 11
FPR_t 0.3870294100585881
TPR_t 0.8589743589743589
precision 0.009942127912153138
threshold:  60
TP_t 67
TN_t 10602
FP_t 6637
FN_t 11
FPR_t 0.3849991298799234
TPR_t 0.8589743589743589
precision 0.009994033412887827
threshold:  61
TP_t 67
TN_t 10635
FP_t 6604
FN_t 11
FPR_t 0.38308486571146816
TPR_t 0.8589743589743589
precision 0.01004347174336681
threshold:  62
TP_t 67
TN_t 10678
FP_t 6561
FN_t 11
FPR_t 0.38059052149196587
TPR_t 0.8589743589743589
precision 0.010108630054315027
threshold:  63
TP_t 67
TN_t 10728
FP_t 6511
FN_t 11
FPR_t 0.3776901212367307
TPR_t 0.8589743589743589
precision 0.010185466707205838
threshold:  64
TP_t 67
TN_t 10774
FP_t 6465
FN_t 11
FPR_t 0.37502175300191426
TPR_t 0.8589743589743589
precision 0.010257195345988977
threshold: 

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958634953464323, 'recall': 1.0, 'f1-score': 0.9979274611398964, 'support': 6741}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 28}, 'accuracy': 0.9958634953464323, 'macro avg': {'precision': 0.49793174767321613, 'recall': 0.5, 'f1-score': 0.4989637305699482, 'support': 6769}, 'weighted avg': {'precision': 0.9917441013636135, 'recall': 0.9958634953464323, 'f1-score': 0.9937995295529681, 'support': 6769}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000138449', 'ENSG00000171316']
train_genes ['ENSG00000157764', 'ENSG00000125848', 'ENSG00000168509', 'ENSG00000184895', 'ENSG00000104826', 'ENSG00000111877', 'ENSG00000092621', 'ENSG00000131470', 'ENSG00000106327', 'ENSG00000169297', 'ENSG00000139318', 'ENSG00000011201', 'ENSG00000115839', 'ENSG00000203908', 'ENSG00000131808', 'ENSG00000149506', 'ENSG00000214413', 'ENSG00000169836', 'ENSG00000010704', 'ENSG00000107831', 'ENSG00000101292', 'ENSG00000077782', 'ENSG00000099246', 'ENSG00000013503', 'ENSG00000166863', 'ENSG00000187678', 'ENSG00000163421', 'ENSG00000125875', 'ENSG00000179295', 'ENSG00000132155', 'ENSG00000105697', 'ENSG00000120008', 'ENSG00000095015', 'ENSG00000151632', 'ENSG00000170820', 'ENSG00000169946', 'ENSG00000158815', 'ENSG00000139549', 'ENSG00000135069', 'ENSG00000118873', 'ENSG00000109163', 'ENSG00000130385']
benign_genes 67688
test_fraction:  0.10714285714285714
train_fraction:  0.8928571428571429
relevant_cols ['Whole_Brain_diff_net_m

4        False
24       False
28       False
29       False
52       False
         ...  
67950    False
67952    False
67956    False
67958    False
67965    False
Name: Ovary_disease_causing, Length: 7254, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9741641337386018, 'recall': 1.0, 'f1-score': 0.9869130100076982, 'support': 6410}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9741641337386018, 'macro avg': {'precision': 0.4870820668693009, 'recall': 0.5, 'f1-score': 0.4934565050038491, 'support': 6580}, 'weighted avg': {'precision': 0.9489957594626804, 'recall': 0.9741641337386018, 'f1-score': 0.9614152574695053, 'support': 6580}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000157911', 'ENSG00000145348', 'ENSG00000059573', 'ENSG00000138760', 'ENSG00000133104', 'ENSG00000221838', 'ENSG00000124486', 'ENSG00000066427', 'ENSG00000149483', 'ENSG00000159363', 'ENSG00000172817', 'ENSG00000081307', 'ENSG00000011143', 'ENSG00000204852', 'ENSG00000130638', 'ENSG00000166974', 'ENSG00000172269', 'ENSG00000102466', 'ENSG00000103723', 'ENSG00000131165', 'ENSG00000115839', 'ENSG00000166948', 'ENSG00000133103', 'ENSG00000128973', 'ENSG00000155755']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG0000010488

1        False
2        False
12       False
17       False
28       False
         ...  
67897    False
67914    False
67935    False
67946    False
67962    False
Name: brain-1_disease_causing, Length: 6601, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.99157134256472, 'recall': 1.0, 'f1-score': 0.9957678355501813, 'support': 6588}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.99157134256472, 'macro avg': {'precision': 0.49578567128236, 'recall': 0.5, 'f1-score': 0.49788391777509067, 'support': 6644}, 'weighted avg': {'precision': 0.9832137273956013, 'recall': 0.99157134256472, 'f1-score': 0.9873748495792586, 'support': 6644}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000104833', 'ENSG00000123560', 'ENSG00000122591', 'ENSG00000104419']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG00000100285', 'ENSG00000143811', 'ENSG00000075785', 'ENSG00000198835', 'ENSG0000013381

2        False
23       False
45       False
54       False
56       False
         ...  
67897    False
67903    False
67928    False
67942    False
67962    False
Name: Nerve - Tibial_disease_causing, Length: 6509, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9958643507030603, 'recall': 1.0, 'f1-score': 0.9979278905926232, 'support': 7224}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 30}, 'accuracy': 0.9958643507030603, 'macro avg': {'precision': 0.49793217535153017, 'recall': 0.5, 'f1-score': 0.4989639452963116, 'support': 7254}, 'weighted avg': {'precision': 0.991745805001228, 'recall': 0.9958643507030603, 'f1-score': 0.9938008108134974, 'support': 7254}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 280
TN_t 0
FP_t 67688
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.004119585687382297
threshold:  1
TP_t 279
TN_t 10520
FP_t 57168
FN_t 1
FPR_t 0.8445810187921049
TPR_t 0.9964285714285714
precision 0.004856650477831741
threshold:  2
TP_t 279
TN_t 14466
FP_t 53222
FN_t 1
FPR_t 0.7862841271717291
TPR_t 0.9964285714285714
p

precision 0.03571428571428571
threshold:  57
TP_t 1
TN_t 67664
FP_t 24
FN_t 279
FPR_t 0.00035456801796477955
TPR_t 0.0035714285714285713
precision 0.04
threshold:  58
TP_t 1
TN_t 67667
FP_t 21
FN_t 279
FPR_t 0.0003102470157191821
TPR_t 0.0035714285714285713
precision 0.045454545454545456
threshold:  59
TP_t 1
TN_t 67672
FP_t 16
FN_t 279
FPR_t 0.00023637867864318638
TPR_t 0.0035714285714285713
precision 0.058823529411764705
threshold:  60
TP_t 1
TN_t 67673
FP_t 15
FN_t 279
FPR_t 0.00022160501122798723
TPR_t 0.0035714285714285713
precision 0.0625
threshold:  61
TP_t 1
TN_t 67677
FP_t 11
FN_t 279
FPR_t 0.00016251034156719064
TPR_t 0.0035714285714285713
precision 0.08333333333333333
threshold:  62
TP_t 1
TN_t 67677
FP_t 11
FN_t 279
FPR_t 0.00016251034156719064
TPR_t 0.0035714285714285713
precision 0.08333333333333333
threshold:  63
TP_t 1
TN_t 67678
FP_t 10
FN_t 279
FPR_t 0.00014773667415199148
TPR_t 0.0035714285714285713
precision 0.09090909090909091
threshold:  64
TP_t 0
TN_t 67680
FP_t 

precision 0.013720666161998485
threshold:  25
TP_t 144
TN_t 56367
FP_t 10324
FN_t 126
FPR_t 0.15480349672369584
TPR_t 0.5333333333333333
precision 0.013756209400076424
threshold:  26
TP_t 143
TN_t 56443
FP_t 10248
FN_t 127
FPR_t 0.15366391267187476
TPR_t 0.5296296296296297
precision 0.013761909344625156
threshold:  27
TP_t 141
TN_t 56505
FP_t 10186
FN_t 129
FPR_t 0.1527342519980207
TPR_t 0.5222222222222223
precision 0.01365352958264743
threshold:  28
TP_t 141
TN_t 56577
FP_t 10114
FN_t 129
FPR_t 0.1516546460541902
TPR_t 0.5222222222222223
precision 0.013749390541199415
threshold:  29
TP_t 140
TN_t 56661
FP_t 10030
FN_t 130
FPR_t 0.15039510578638796
TPR_t 0.5185185185185185
precision 0.01376597836774828
threshold:  30
TP_t 138
TN_t 56728
FP_t 9963
FN_t 132
FPR_t 0.1493904724775457
TPR_t 0.5111111111111111
precision 0.013662013662013662
threshold:  31
TP_t 137
TN_t 56791
FP_t 9900
FN_t 133
FPR_t 0.148445817276694
TPR_t 0.5074074074074074
precision 0.013649496861612036
threshold:  32
TP_t

FPR_t 0.09388073353226073
TPR_t 0.29259259259259257
precision 0.012460567823343848
threshold:  86
TP_t 77
TN_t 60532
FP_t 6159
FN_t 193
FPR_t 0.09235129177850085
TPR_t 0.2851851851851852
precision 0.012347658755612572
threshold:  87
TP_t 76
TN_t 60643
FP_t 6048
FN_t 194
FPR_t 0.09068689928176216
TPR_t 0.2814814814814815
precision 0.012410189418680601
threshold:  88
TP_t 74
TN_t 60786
FP_t 5905
FN_t 196
FPR_t 0.08854268192109879
TPR_t 0.2740740740740741
precision 0.012376651613982272
threshold:  89
TP_t 71
TN_t 60930
FP_t 5761
FN_t 199
FPR_t 0.0863834700334378
TPR_t 0.26296296296296295
precision 0.012174211248285322
threshold:  90
TP_t 69
TN_t 61053
FP_t 5638
FN_t 201
FPR_t 0.08453914321272736
TPR_t 0.25555555555555554
precision 0.01209041527948134
threshold:  91
TP_t 66
TN_t 61212
FP_t 5479
FN_t 204
FPR_t 0.08215501342010166
TPR_t 0.24444444444444444
precision 0.011902614968440035
threshold:  92
TP_t 62
TN_t 61400
FP_t 5291
FN_t 208
FPR_t 0.07933604234454424
TPR_t 0.22962962962962963
p

TPR_t 0.9527027027027027
precision 0.010267987183221671
threshold:  40
TP_t 141
TN_t 3417
FP_t 13690
FN_t 7
FPR_t 0.800257204653066
TPR_t 0.9527027027027027
precision 0.01019449063697491
threshold:  41
TP_t 141
TN_t 3330
FP_t 13777
FN_t 7
FPR_t 0.8053428421114164
TPR_t 0.9527027027027027
precision 0.010130765914642908
threshold:  42
TP_t 141
TN_t 3256
FP_t 13851
FN_t 7
FPR_t 0.8096685567311627
TPR_t 0.9527027027027027
precision 0.010077186963979417
threshold:  43
TP_t 141
TN_t 3181
FP_t 13926
FN_t 7
FPR_t 0.8140527269538785
TPR_t 0.9527027027027027
precision 0.010023459159735551
threshold:  44
TP_t 141
TN_t 3108
FP_t 13999
FN_t 7
FPR_t 0.8183199859706553
TPR_t 0.9527027027027027
precision 0.009971711456859971
threshold:  45
TP_t 141
TN_t 3042
FP_t 14065
FN_t 7
FPR_t 0.8221780557666453
TPR_t 0.9527027027027027
precision 0.00992538364071519
threshold:  46
TP_t 141
TN_t 2963
FP_t 14144
FN_t 7
FPR_t 0.8267960484012392
TPR_t 0.9527027027027027
precision 0.009870493524676234
threshold:  47
T

 [1.         1.        ]]
AUC
0.7083276720925046
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 149
TN_t 0
FP_t 17168
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.008604261708148063
threshold:  1
TP_t 138
TN_t 4867
FP_t 12301
FN_t 11
FPR_t 0.7165074557315937
TPR_t 0.9261744966442953
precision 0.011094139400273335
threshold:  2
TP_t 137
TN_t 5790
FP_t 11378
FN_t 12
FP

TPR_t 0.7583892617449665
precision 0.016530134581626683
threshold:  57
TP_t 113
TN_t 10476
FP_t 6692
FN_t 36
FPR_t 0.38979496738117425
TPR_t 0.7583892617449665
precision 0.016605437178545186
threshold:  58
TP_t 113
TN_t 10519
FP_t 6649
FN_t 36
FPR_t 0.38729030754892824
TPR_t 0.7583892617449665
precision 0.01671103223898255
threshold:  59
TP_t 113
TN_t 10542
FP_t 6626
FN_t 36
FPR_t 0.38595060577819196
TPR_t 0.7583892617449665
precision 0.016768066478706038
threshold:  60
TP_t 112
TN_t 10576
FP_t 6592
FN_t 37
FPR_t 0.38397017707362535
TPR_t 0.7516778523489933
precision 0.016706443914081145
threshold:  61
TP_t 111
TN_t 10608
FP_t 6560
FN_t 38
FPR_t 0.3821062441752097
TPR_t 0.7449664429530202
precision 0.016639184530055463
threshold:  62
TP_t 111
TN_t 10651
FP_t 6517
FN_t 38
FPR_t 0.37960158434296365
TPR_t 0.7449664429530202
precision 0.016747133373566685
threshold:  63
TP_t 109
TN_t 10699
FP_t 6469
FN_t 40
FPR_t 0.3768056849953402
TPR_t 0.7315436241610739
precision 0.016570386135603528
th

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915501613151022, 'recall': 1.0, 'f1-score': 0.9957571549795572, 'support': 6454}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 55}, 'accuracy': 0.9915501613151022, 'macro avg': {'precision': 0.4957750806575511, 'recall': 0.5, 'f1-score': 0.4978785774897786, 'support': 6509}, 'weighted avg': {'precision': 0.9831717224040052, 'recall': 0.9915501613151022, 'f1-score': 0.9873431676506471, 'support': 6509}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000148606', 'ENSG00000148356', 'ENSG00000112367', 'ENSG00000156709', 'ENSG00000113643', 'ENSG00000164022', 'ENSG00000139132', 'ENSG00000154153']
train_genes ['ENSG00000149196', 'ENSG00000184743', 'ENSG00000152137', 'ENSG00000160695', 'ENSG00000090054', 'ENSG00000130294', 'ENSG00000169562', 'ENSG00000122877', 'ENSG00000099940', 'ENSG00000111199', 'ENSG00000109654', 'ENSG00000079805', 'ENSG00000116688', 'ENSG00000171680', 'ENSG00000160789', 'ENSG00000197102', 'ENSG00000106105', 'ENSG00000198400', 'ENSG00000169432', 'ENSG00000134259', 'ENSG00000147224', 'ENSG00000109099', 'ENSG00000104133', 'ENSG00000189067', 'ENSG00000132740', 'ENSG00000130816', 'ENSG00000133422', 'ENSG00000100241', 'ENSG00000166986', 'ENSG00000090861', 'ENSG00000134684', 'ENSG00000104381', 'ENSG00000105227', 'ENSG00000144381', 'ENSG00000060237', 'ENSG00000196549', 'ENSG00000106211', 'ENSG00000070061', 'ENSG00000171453', 'ENSG00000198513', 'ENSG00000148290', 'ENSG00000013503', 'ENSG0000010028

4        False
18       False
24       False
28       False
29       False
         ...  
67931    False
67937    False
67938    False
67939    False
67950    False
Name: Nerve - Tibial_disease_causing, Length: 7119, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9742463263141948, 'recall': 1.0, 'f1-score': 0.9869551872314304, 'support': 6431}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 170}, 'accuracy': 0.9742463263141948, 'macro avg': {'precision': 0.4871231631570974, 'recall': 0.5, 'f1-score': 0.4934775936157152, 'support': 6601}, 'weighted avg': {'precision': 0.9491559043367046, 'recall': 0.9742463263141948, 'f1-score': 0.9615374653969594, 'support': 6601}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000008086', 'ENSG00000164073', 'ENSG00000162695', 'ENSG00000099956', 'ENSG00000101327', 'ENSG00000090487', 'ENSG00000174080', 'ENSG00000187049', 'ENSG00000160213']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000124788', 'ENSG00000100014', 'ENSG00000122591', 'ENSG00000184381', 'ENSG00000101997', 'ENSG00000164494', 'ENSG00000140650', 'ENSG00000113971', 'ENSG00000173085', 'ENSG00000046651', 'ENSG0000019769

7        False
23       False
45       False
54       False
56       False
         ...  
67907    False
67916    False
67950    False
67958    False
67962    False
Name: brain-1_disease_causing, Length: 6551, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920670558299656, 'recall': 1.0, 'f1-score': 0.9960177323615599, 'support': 6628}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920670558299656, 'macro avg': {'precision': 0.4960335279149828, 'recall': 0.5, 'f1-score': 0.49800886618077994, 'support': 6681}, 'weighted avg': {'precision': 0.9841970432631361, 'recall': 0.9920670558299656, 'f1-score': 0.9881163792983714, 'support': 6681}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  3
test_genes ['ENSG00000107521', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000047579', 'ENSG00000167397']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000197728', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000255072', 'ENSG00000144659', 'ENSG00000151702', 'ENSG00000105610', 'ENSG0000017921

41       False
42       False
46       False
49       False
57       False
         ...  
67884    False
67890    False
67905    False
67944    False
67948    False
Name: Whole Blood_disease_causing, Length: 6685, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9915718499789297, 'recall': 1.0, 'f1-score': 0.9957680914092256, 'support': 7059}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 60}, 'accuracy': 0.9915718499789297, 'macro avg': {'precision': 0.49578592498946483, 'recall': 0.5, 'f1-score': 0.4978840457046128, 'support': 7119}, 'weighted avg': {'precision': 0.9832147336706369, 'recall': 0.9915718499789297, 'f1-score': 0.9873756085486338, 'support': 7119}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 563
TN_t 0
FP_t 67405
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.00828330979284369
threshold:  1
TP_t 562
TN_t 10520
FP_t 56885
FN_t 1
FPR_t 0.8439284919516357
TPR_t 0.9982238010657194
precision 0.00978293035319512
threshold:  2
TP_t 562
TN_t 14466
FP_t 52939
FN_t 1
FPR_t 0.7853868407388176
TPR_t 0.9982238010657194
pre

precision 0.0
threshold:  57
TP_t 0
TN_t 67380
FP_t 25
FN_t 563
FPR_t 0.0003708923670350864
TPR_t 0.0
precision 0.0
threshold:  58
TP_t 0
TN_t 67383
FP_t 22
FN_t 563
FPR_t 0.00032638528299087607
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 67388
FP_t 17
FN_t 563
FPR_t 0.00025220680958385876
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67389
FP_t 16
FN_t 563
FPR_t 0.0002373711149024553
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67393
FP_t 12
FN_t 563
FPR_t 0.0001780283361768415
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67393
FP_t 12
FN_t 563
FPR_t 0.0001780283361768415
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67394
FP_t 11
FN_t 563
FPR_t 0.00016319264149543803
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67397
FP_t 8
FN_t 563
FPR_t 0.00011868555745122765
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67399
FP_t 6
FN_t 563
FPR_t 8.901416808842074e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67402
FP_t 3
FN_t 563
FPR_t 4.450708404

TPR_t 0.7114337568058077
precision 0.03772495428736407
threshold:  27
TP_t 391
TN_t 56474
FP_t 9936
FN_t 160
FPR_t 0.1496160216834814
TPR_t 0.7096188747731398
precision 0.0378619153674833
threshold:  28
TP_t 388
TN_t 56543
FP_t 9867
FN_t 163
FPR_t 0.14857702153290167
TPR_t 0.7041742286751361
precision 0.0378352023403218
threshold:  29
TP_t 383
TN_t 56623
FP_t 9787
FN_t 168
FPR_t 0.14737238367715705
TPR_t 0.6950998185117967
precision 0.037659783677482794
threshold:  30
TP_t 380
TN_t 56689
FP_t 9721
FN_t 171
FPR_t 0.14637855744616773
TPR_t 0.6896551724137931
precision 0.03762003762003762
threshold:  31
TP_t 375
TN_t 56748
FP_t 9662
FN_t 176
FPR_t 0.1454901370275561
TPR_t 0.6805807622504537
precision 0.037361761482514695
threshold:  32
TP_t 373
TN_t 56819
FP_t 9591
FN_t 178
FPR_t 0.14442102093058273
TPR_t 0.676950998185118
precision 0.037434765154556404
threshold:  33
TP_t 370
TN_t 56886
FP_t 9524
FN_t 181
FPR_t 0.14341213672639663
TPR_t 0.6715063520871143
precision 0.037396401859712956
t

FN_t 363
FPR_t 0.08938412889625057
TPR_t 0.3411978221415608
precision 0.03069888961463096
threshold:  88
TP_t 183
TN_t 60614
FP_t 5796
FN_t 368
FPR_t 0.08727601264869748
TPR_t 0.33212341197822143
precision 0.030607124937280482
threshold:  89
TP_t 179
TN_t 60757
FP_t 5653
FN_t 372
FPR_t 0.08512272248155399
TPR_t 0.3248638838475499
precision 0.03069272976680384
threshold:  90
TP_t 175
TN_t 60878
FP_t 5532
FN_t 376
FPR_t 0.08330070772474026
TPR_t 0.3176043557168784
precision 0.030664096723322237
threshold:  91
TP_t 171
TN_t 61036
FP_t 5374
FN_t 380
FPR_t 0.08092154795964464
TPR_t 0.3103448275862069
precision 0.03083859332732191
threshold:  92
TP_t 167
TN_t 61224
FP_t 5186
FN_t 384
FPR_t 0.07809064899864478
TPR_t 0.3030852994555354
precision 0.031197459368578366
threshold:  93
TP_t 156
TN_t 61419
FP_t 4991
FN_t 395
FPR_t 0.07515434422526727
TPR_t 0.2831215970961887
precision 0.030308917816203613
threshold:  94
TP_t 141
TN_t 61693
FP_t 4717
FN_t 410
FPR_t 0.07102845956934196
TPR_t 0.2558983

TN_t 3185
FP_t 13770
FN_t 3
FPR_t 0.8121498083161309
TPR_t 0.99
precision 0.02111324376199616
threshold:  44
TP_t 297
TN_t 3112
FP_t 13843
FN_t 3
FPR_t 0.8164553229135948
TPR_t 0.99
precision 0.021004243281471005
threshold:  45
TP_t 297
TN_t 3046
FP_t 13909
FN_t 3
FPR_t 0.8203479799469183
TPR_t 0.99
precision 0.02090665915810221
threshold:  46
TP_t 297
TN_t 2967
FP_t 13988
FN_t 3
FPR_t 0.8250073724565025
TPR_t 0.99
precision 0.0207910395519776
threshold:  47
TP_t 297
TN_t 2903
FP_t 14052
FN_t 3
FPR_t 0.8287820701857859
TPR_t 0.99
precision 0.020698306502195275
threshold:  48
TP_t 297
TN_t 2850
FP_t 14105
FN_t 3
FPR_t 0.8319079917428487
TPR_t 0.99
precision 0.020622135814470213
threshold:  49
TP_t 297
TN_t 2785
FP_t 14170
FN_t 3
FPR_t 0.8357416691241522
TPR_t 0.99
precision 0.020529480887537154
threshold:  50
TP_t 297
TN_t 2711
FP_t 14244
FN_t 3
FPR_t 0.8401061633736361
TPR_t 0.99
precision 0.020425005157829586
threshold:  51
TP_t 298
TN_t 2648
FP_t 14307
FN_t 2
FPR_t 0.8438218814508994

 [1.         1.        ]]
AUC
0.7645673842524325
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 300
TN_t 0
FP_t 17017
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.01732401686204308
threshold:  1
TP_t 294
TN_t 4872
FP_t 12145
FN_t 6
FPR_t 0.7136980666392431
TPR_t 0.98
precision 0.023635340461451885
threshold:  2
TP_t 291
TN_t 5793
FP_t 11224
FN_t 9
FPR_t 0.65957571839

FPR_t 0.3829699712052653
TPR_t 0.8166666666666667
precision 0.036231884057971016
threshold:  59
TP_t 245
TN_t 10523
FP_t 6494
FN_t 55
FPR_t 0.38161838161838163
TPR_t 0.8166666666666667
precision 0.036355542365336106
threshold:  60
TP_t 245
TN_t 10558
FP_t 6459
FN_t 55
FPR_t 0.3795616148557325
TPR_t 0.8166666666666667
precision 0.03654534606205251
threshold:  61
TP_t 245
TN_t 10591
FP_t 6426
FN_t 55
FPR_t 0.3776223776223776
TPR_t 0.8166666666666667
precision 0.03672612801678909
threshold:  62
TP_t 244
TN_t 10633
FP_t 6384
FN_t 56
FPR_t 0.37515425750719866
TPR_t 0.8133333333333334
precision 0.036813518406759206
threshold:  63
TP_t 244
TN_t 10683
FP_t 6334
FN_t 56
FPR_t 0.3722160192748428
TPR_t 0.8133333333333334
precision 0.03709334144116753
threshold:  64
TP_t 242
TN_t 10727
FP_t 6290
FN_t 58
FPR_t 0.3696303696303696
TPR_t 0.8066666666666666
precision 0.037048377219840785
threshold:  65
TP_t 241
TN_t 10770
FP_t 6247
FN_t 59
FPR_t 0.36710348475054355
TPR_t 0.8033333333333333
precision 0.

Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9742024118455198, 'recall': 1.0, 'f1-score': 0.9869326529034254, 'support': 6382}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 169}, 'accuracy': 0.9742024118455198, 'macro avg': {'precision': 0.4871012059227599, 'recall': 0.5, 'f1-score': 0.4934663264517127, 'support': 6551}, 'weighted avg': {'precision': 0.9490703392456278, 'recall': 0.9742024118455198, 'f1-score': 0.9614721707876142, 'support': 6551}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  9
test_genes ['ENSG00000164953', 'ENSG00000197603', 'ENSG00000196998', 'ENSG00000117984', 'ENSG00000104218', 'ENSG00000112234']
train_genes ['ENSG00000118402', 'ENSG00000123560', 'ENSG00000092621', 'ENSG00000247626', 'ENSG00000171453', 'ENSG00000147403', 'ENSG00000130294', 'ENSG00000168778', 'ENSG00000147140', 'ENSG00000182287', 'ENSG00000100749', 'ENSG00000101361', 'ENSG00000197102', 'ENSG00000171385', 'ENSG00000168958', 'ENSG00000138821', 'ENSG00000104833', 'ENSG00000147044', 'ENSG00000164961', 'ENSG00000148459', 'ENSG00000198707', 'ENSG00000154277', 'ENSG00000116198', 'ENSG00000013503', 'ENSG00000125676', 'ENSG00000104889', 'ENSG00000169379', 'ENSG00000154743', 'ENSG00000197912', 'ENSG00000124155', 'ENSG00000032444', 'ENSG00000124788', 'ENSG00000100014', 'ENSG00000122591', 'ENSG00000184381', 'ENSG00000101997', 'ENSG00000164494', 'ENSG00000140650', 'ENSG00000113971', 'ENSG00000173085', 'ENSG00000046651', 'ENSG00000197694', 'ENSG00000185344', 'ENSG00000073584', 'ENSG0000017853

4        False
18       False
24       False
28       False
52       False
         ...  
67937    False
67939    False
67954    False
67956    False
67967    False
Name: brain-1_disease_causing, Length: 6840, dtype: bool
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9741228070175438, 'recall': 1.0, 'f1-score': 0.9868918018218173, 'support': 6663}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 177}, 'accuracy': 0.9741228070175438, 'macro avg': {'precision': 0.4870614035087719, 'recall': 0.5, 'f1-score': 0.4934459009109087, 'support': 6840}, 'weighted avg': {'precision': 0.9489152431517389, 'recall': 0.9741228070175438, 'f1-score': 0.9613538122132703, 'support': 6840}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 1706
TN_t 0
FP_t 66262
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.025100047080979283
threshold:  1
TP_t 1699
TN_t 10514
FP_t 55748
FN_t 7
FPR_t 0.8413268540038031
TPR_t 0.9958968347010551
precision 0.02957508660156318
threshold:  2
TP_t 1698
TN_t 14459
FP_t 51803
FN_t 8
FPR_t 0.7817904681416197
TPR_t 0.9953106682297772

precision 0.027777777777777776
threshold:  56
TP_t 1
TN_t 66235
FP_t 27
FN_t 1705
FPR_t 0.000407473363315324
TPR_t 0.0005861664712778429
precision 0.03571428571428571
threshold:  57
TP_t 1
TN_t 66238
FP_t 24
FN_t 1705
FPR_t 0.0003621985451691769
TPR_t 0.0005861664712778429
precision 0.04
threshold:  58
TP_t 1
TN_t 66241
FP_t 21
FN_t 1705
FPR_t 0.00031692372702302977
TPR_t 0.0005861664712778429
precision 0.045454545454545456
threshold:  59
TP_t 1
TN_t 66246
FP_t 16
FN_t 1705
FPR_t 0.00024146569677945128
TPR_t 0.0005861664712778429
precision 0.058823529411764705
threshold:  60
TP_t 1
TN_t 66247
FP_t 15
FN_t 1705
FPR_t 0.00022637409073073556
TPR_t 0.0005861664712778429
precision 0.0625
threshold:  61
TP_t 1
TN_t 66251
FP_t 11
FN_t 1705
FPR_t 0.00016600766653587274
TPR_t 0.0005861664712778429
precision 0.08333333333333333
threshold:  62
TP_t 1
TN_t 66251
FP_t 11
FN_t 1705
FPR_t 0.00016600766653587274
TPR_t 0.0005861664712778429
precision 0.08333333333333333
threshold:  63
TP_t 1
TN_t 66252

TP_t 1323
TN_t 55969
FP_t 9329
FN_t 340
FPR_t 0.14286808171766363
TPR_t 0.7955502104630187
precision 0.12420202778820878
threshold:  24
TP_t 1318
TN_t 56048
FP_t 9250
FN_t 345
FPR_t 0.14165824374406566
TPR_t 0.7925435959110042
precision 0.12471612414837245
threshold:  25
TP_t 1311
TN_t 56141
FP_t 9157
FN_t 352
FPR_t 0.14023400410426046
TPR_t 0.788334335538184
precision 0.12523882307986245
threshold:  26
TP_t 1308
TN_t 56215
FP_t 9083
FN_t 355
FPR_t 0.13910073815430793
TPR_t 0.7865303668069753
precision 0.12587816379559233
threshold:  27
TP_t 1306
TN_t 56277
FP_t 9021
FN_t 357
FPR_t 0.1381512450611045
TPR_t 0.7853277209861695
precision 0.12646460733998258
threshold:  28
TP_t 1301
TN_t 56344
FP_t 8954
FN_t 362
FPR_t 0.13712517994425558
TPR_t 0.7823211064341551
precision 0.12686494392979034
threshold:  29
TP_t 1294
TN_t 56422
FP_t 8876
FN_t 369
FPR_t 0.13593065637538668
TPR_t 0.778111846061335
precision 0.1272369714847591
threshold:  30
TP_t 1288
TN_t 56485
FP_t 8813
FN_t 375
FPR_t 0.1349

precision 0.13510216529429706
threshold:  84
TP_t 876
TN_t 59735
FP_t 5563
FN_t 787
FPR_t 0.08519403350791754
TPR_t 0.5267588695129285
precision 0.136045969871098
threshold:  85
TP_t 866
TN_t 59824
FP_t 5474
FN_t 797
FPR_t 0.0838310514870287
TPR_t 0.5207456404088996
precision 0.1365930599369085
threshold:  86
TP_t 852
TN_t 59914
FP_t 5384
FN_t 811
FPR_t 0.08245275506141077
TPR_t 0.5123271196632592
precision 0.13662604233483003
threshold:  87
TP_t 838
TN_t 60012
FP_t 5286
FN_t 825
FPR_t 0.08095194339796012
TPR_t 0.5039085989176187
precision 0.13683866753755716
threshold:  88
TP_t 817
TN_t 60136
FP_t 5162
FN_t 846
FPR_t 0.07905295721155318
TPR_t 0.49128081779915816
precision 0.13664492390031777
threshold:  89
TP_t 798
TN_t 60264
FP_t 5034
FN_t 865
FPR_t 0.0770927134062299
TPR_t 0.4798556825015033
precision 0.1368312757201646
threshold:  90
TP_t 781
TN_t 60372
FP_t 4926
FN_t 882
FPR_t 0.07543875769548837
TPR_t 0.4696331930246542
precision 0.13684948309094094
threshold:  91
TP_t 763
TN_t 6

TN_t 3598
FP_t 13103
FN_t 4
FPR_t 0.7845637985749356
TPR_t 0.9927797833935018
precision 0.04028418662565004
threshold:  39
TP_t 550
TN_t 3519
FP_t 13182
FN_t 4
FPR_t 0.7892940542482486
TPR_t 0.9927797833935018
precision 0.04005243227497815
threshold:  40
TP_t 550
TN_t 3420
FP_t 13281
FN_t 4
FPR_t 0.7952218430034129
TPR_t 0.9927797833935018
precision 0.039765743619405686
threshold:  41
TP_t 550
TN_t 3333
FP_t 13368
FN_t 4
FPR_t 0.8004311119094665
TPR_t 0.9927797833935018
precision 0.03951717200747234
threshold:  42
TP_t 550
TN_t 3259
FP_t 13442
FN_t 4
FPR_t 0.8048619843123166
TPR_t 0.9927797833935018
precision 0.03930817610062893
threshold:  43
TP_t 550
TN_t 3184
FP_t 13517
FN_t 4
FPR_t 0.8093527333692593
TPR_t 0.9927797833935018
precision 0.03909859955925215
threshold:  44
TP_t 550
TN_t 3111
FP_t 13590
FN_t 4
FPR_t 0.8137237291180169
TPR_t 0.9927797833935018
precision 0.038896746817538894
threshold:  45
TP_t 550
TN_t 3045
FP_t 13656
FN_t 4
FPR_t 0.8176755882881265
TPR_t 0.9927797833935

TP_t 554
TN_t 0
FP_t 16701
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.03210663575775138
ROC
[[0.         0.        ]
 [0.24944614 0.70036101]
 [0.3259685  0.80505415]
 [0.37123526 0.86101083]
 [0.40542482 0.88808664]
 [0.43302796 0.91516245]
 [0.45721813 0.92238267]
 [0.47925274 0.9368231 ]
 [0.47925274 0.9368231 ]
 [0.51613676 0.94584838]
 [0.53254296 0.94945848]
 [0.54727262 0.95487365]
 [0.56152326 0.9566787 ]
 [0.5755344  0.95848375]
 [0.58780911 0.95848375]
 [0.58780911 0.95848375]
 [0.61032274 0.96750903]
 [0.62187893 0.9765343 ]
 [0.63277648 0.97833935]
 [0.64331477 0.97833935]
 [0.65235615 0.97833935]
 [0.66175678 0.97833935]
 [0.6714568  0.9801444 ]
 [0.68019879 0.98194946]
 [0.68732411 0.98194946]
 [0.69474882 0.98375451]
 [0.70235315 0.98916968]
 [0.7096581  0.98916968]
 [0.71714269 0.98916968]
 [0.71714269 0.98916968]
 [0.73193222 0.99097473]
 [0.73893779 0.99097473]
 [0.74504521 0.99097473]
 [0.75109275 0.99097473]
 [0.75767918 0.99097473]
 [0.76354709 0.99277978]
 [0.7691755 

TP_t 494
TN_t 9690
FP_t 7073
FN_t 60
FPR_t 0.42194117997971725
TPR_t 0.8916967509025271
precision 0.06528346768864808
threshold:  38
TP_t 494
TN_t 9724
FP_t 7039
FN_t 60
FPR_t 0.41991290341824256
TPR_t 0.8916967509025271
precision 0.06557812292579318
threshold:  39
TP_t 492
TN_t 9765
FP_t 6998
FN_t 62
FPR_t 0.417467040505876
TPR_t 0.8880866425992779
precision 0.06568758344459279
threshold:  40
TP_t 490
TN_t 9804
FP_t 6959
FN_t 64
FPR_t 0.4151404879794786
TPR_t 0.8844765342960289
precision 0.06578064169687206
threshold:  41
TP_t 489
TN_t 9835
FP_t 6928
FN_t 65
FPR_t 0.4132911769969576
TPR_t 0.8826714801444043
precision 0.0659296211406229
threshold:  42
TP_t 489
TN_t 9887
FP_t 6876
FN_t 65
FPR_t 0.41018910696176103
TPR_t 0.8826714801444043
precision 0.06639511201629328
threshold:  43
TP_t 489
TN_t 9919
FP_t 6844
FN_t 65
FPR_t 0.40828014078625546
TPR_t 0.8826714801444043
precision 0.06668484931133234
threshold:  44
TP_t 487
TN_t 9956
FP_t 6807
FN_t 67
FPR_t 0.40607289864582713
TPR_t 0.879

TPR_t 0.5902527075812274
precision 0.08544551868304155
threshold:  99
TP_t 294
TN_t 13731
FP_t 3032
FN_t 260
FPR_t 0.1808745451291535
TPR_t 0.5306859205776173
precision 0.08839446782922429
threshold:  100
TP_t 101
TN_t 15639
FP_t 1124
FN_t 453
FPR_t 0.06705243691463342
TPR_t 0.18231046931407943
precision 0.08244897959183674
threshold:  101
TP_t 0
TN_t 16763
FP_t 0
FN_t 554
FPR_t 0.0
TPR_t 0.0
precision 0
@ brain-1  finished 2023-01-02 10:33:35
@ Skin - Not Sun Exposed  finished 2023-01-02 09:55:29
@ Artery - Tibial  finished 2023-01-02 08:39:30
@ Pituitary  finished 2023-01-02 08:49:51
@ Ovary  finished 2023-01-02 10:06:38
@ brain-3  finished 2023-01-02 09:14:06
@ Thyroid  finished 2023-01-02 08:39:30
@ Testis  finished 2023-01-02 09:05:36


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920718025430068, 'recall': 1.0, 'f1-score': 0.9960201246526995, 'support': 6632}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920718025430068, 'macro avg': {'precision': 0.4960359012715034, 'recall': 0.5, 'f1-score': 0.49801006232634976, 'support': 6685}, 'weighted avg': {'precision': 0.9842064614009305, 'recall': 0.9920718025430068, 'f1-score': 0.9881234804333138, 'support': 6685}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  4
test_genes ['ENSG00000142676', 'ENSG00000101981', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000174227', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000255072', 'ENSG00000144659', 'ENSG00000151702', 'ENSG00000105610', 'ENSG00000179218', 'ENSG0000016739

20       False
24       False
26       False
27       False
32       False
         ...  
67905    False
67910    False
67912    False
67932    False
67953    False
Name: Whole Blood_disease_causing, Length: 6676, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920611144397843, 'recall': 1.0, 'f1-score': 0.9960147379502218, 'support': 6623}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920611144397843, 'macro avg': {'precision': 0.49603055721989214, 'recall': 0.5, 'f1-score': 0.4980073689751109, 'support': 6676}, 'weighted avg': {'precision': 0.9841852547835068, 'recall': 0.9920611144397843, 'f1-score': 0.9881074909293467, 'support': 6676}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  5
test_genes ['ENSG00000160796', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000151702']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000047579', 'ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000255072', 'ENSG00000144659', 'ENSG00000105610', 'ENSG00000179218', 'ENSG0000016739

1        False
12       False
13       False
24       False
49       False
         ...  
67920    False
67942    False
67943    False
67954    False
67967    False
Name: Whole Blood_disease_causing, Length: 6684, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920706163973668, 'recall': 1.0, 'f1-score': 0.9960195268494179, 'support': 6631}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920706163973668, 'macro avg': {'precision': 0.4960353081986834, 'recall': 0.5, 'f1-score': 0.49800976342470893, 'support': 6684}, 'weighted avg': {'precision': 0.9842041079190513, 'recall': 0.9920706163973668, 'f1-score': 0.9881217059453157, 'support': 6684}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  6
test_genes ['ENSG00000005961', 'ENSG00000105372', 'ENSG00000158578', 'ENSG00000072110', 'ENSG00000144659']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG00000163050', 'ENSG00000255072', 'ENSG00000151702', 'ENSG00000105610', 'ENSG00000179218', 'ENSG0000016739

18       False
27       False
35       False
43       False
45       False
         ...  
67930    False
67936    False
67939    False
67962    False
67966    False
Name: Whole Blood_disease_causing, Length: 6692, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.992080095636581, 'recall': 1.0, 'f1-score': 0.9960243042532443, 'support': 6639}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.992080095636581, 'macro avg': {'precision': 0.4960400478182905, 'recall': 0.5, 'f1-score': 0.49801215212662214, 'support': 6692}, 'weighted avg': {'precision': 0.9842229161582877, 'recall': 0.992080095636581, 'f1-score': 0.9881358870199176, 'support': 6692}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  7
test_genes ['ENSG00000105610', 'ENSG00000259207', 'ENSG00000101310', 'ENSG00000111252', 'ENSG00000096968', 'ENSG00000135218', 'ENSG00000166189', 'ENSG00000178726', 'ENSG00000173085', 'ENSG00000189114', 'ENSG00000164494', 'ENSG00000187266']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG0000018289

2        False
17       False
36       False
46       False
57       False
         ...  
67927    False
67935    False
67944    False
67951    False
67963    False
Name: Whole Blood_disease_causing, Length: 6694, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920824619061847, 'recall': 1.0, 'f1-score': 0.9960254968128983, 'support': 6641}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 53}, 'accuracy': 0.9920824619061847, 'macro avg': {'precision': 0.4960412309530923, 'recall': 0.5, 'f1-score': 0.49801274840644916, 'support': 6694}, 'weighted avg': {'precision': 0.9842276112218363, 'recall': 0.9920824619061847, 'f1-score': 0.9881394269994708, 'support': 6694}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


 Fold:  8
test_genes ['ENSG00000163755', 'ENSG00000138326', 'ENSG00000132842', 'ENSG00000119723', 'ENSG00000167113', 'ENSG00000110911', 'ENSG00000122406', 'ENSG00000171863', 'ENSG00000158526', 'ENSG00000165702']
train_genes ['ENSG00000165195', 'ENSG00000185245', 'ENSG00000165282', 'ENSG00000169313', 'ENSG00000099937', 'ENSG00000135766', 'ENSG00000182512', 'ENSG00000134086', 'ENSG00000100345', 'ENSG00000117601', 'ENSG00000116016', 'ENSG00000088682', 'ENSG00000124614', 'ENSG00000184500', 'ENSG00000169704', 'ENSG00000213741', 'ENSG00000113905', 'ENSG00000233927', 'ENSG00000180210', 'ENSG00000148459', 'ENSG00000161395', 'ENSG00000115718', 'ENSG00000107521', 'ENSG00000197728', 'ENSG00000141510', 'ENSG00000140326', 'ENSG00000142676', 'ENSG00000101981', 'ENSG00000174227', 'ENSG00000100099', 'ENSG00000148985', 'ENSG00000110756', 'ENSG00000113013', 'ENSG00000160796', 'ENSG00000047579', 'ENSG00000198734', 'ENSG00000115486', 'ENSG00000060642', 'ENSG00000182899', 'ENSG00000236320', 'ENSG0000016305

2        False
23       False
35       False
45       False
54       False
         ...  
67928    False
67931    False
67943    False
67944    False
67965    False
Name: Whole Blood_disease_causing, Length: 7060, dtype: bool
{'random_state': 1234, 'n_estimators': 2000, 'min_samples_split': 2, 'min_samples_leaf': 3, 'max_depth': 40, 'bootstrap': False}


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


{'False': {'precision': 0.9920679886685553, 'recall': 1.0, 'f1-score': 0.9960182025028441, 'support': 7004}, 'True': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 56}, 'accuracy': 0.9920679886685553, 'macro avg': {'precision': 0.49603399433427764, 'recall': 0.5, 'f1-score': 0.49800910125142206, 'support': 7060}, 'weighted avg': {'precision': 0.9841988941408726, 'recall': 0.9920679886685553, 'f1-score': 0.9881177748342663, 'support': 7060}}
@@@  precision: 0.0 recall:  0.0 f1_score:  0.0


scipy.interp is deprecated and will be removed in SciPy 2.0.0, use numpy.interp instead


CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
0                   33.000                    False
1                   54.000                    False
2                   33.000                    False
3                   33.000                    False
4                   20.500                    False
...                    ...                      ...
67963                0.141                    False
67964                2.164                    False
67965                1.611                    False
67966                2.151                    False
67967               22.700                    False

[67968 rows x 2 columns]
threshold:  0
TP_t 536
TN_t 0
FP_t 67432
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.007886064030131828
threshold:  1
TP_t 536
TN_t 10521
FP_t 56911
FN_t 0
FPR_t 0.8439761537548938
TPR_t 1.0
precision 0.009330339269239473
threshold:  2
TP_t 536
TN_t 14467
FP_t 52965
FN_t 0
FPR_t 0.785457942816467
TPR_t 1.0
precision 0.010018504327021924
t

FPR_t 0.00032625459722386996
TPR_t 0.0
precision 0.0
threshold:  59
TP_t 0
TN_t 67415
FP_t 17
FN_t 536
FPR_t 0.0002521058251275359
TPR_t 0.0
precision 0.0
threshold:  60
TP_t 0
TN_t 67416
FP_t 16
FN_t 536
FPR_t 0.00023727607070826907
TPR_t 0.0
precision 0.0
threshold:  61
TP_t 0
TN_t 67420
FP_t 12
FN_t 536
FPR_t 0.00017795705303120182
TPR_t 0.0
precision 0.0
threshold:  62
TP_t 0
TN_t 67420
FP_t 12
FN_t 536
FPR_t 0.00017795705303120182
TPR_t 0.0
precision 0.0
threshold:  63
TP_t 0
TN_t 67421
FP_t 11
FN_t 536
FPR_t 0.00016312729861193498
TPR_t 0.0
precision 0.0
threshold:  64
TP_t 0
TN_t 67424
FP_t 8
FN_t 536
FPR_t 0.00011863803535413454
TPR_t 0.0
precision 0.0
threshold:  65
TP_t 0
TN_t 67426
FP_t 6
FN_t 536
FPR_t 8.897852651560091e-05
TPR_t 0.0
precision 0.0
threshold:  66
TP_t 0
TN_t 67429
FP_t 3
FN_t 536
FPR_t 4.4489263257800454e-05
TPR_t 0.0
precision 0.0
threshold:  67
TP_t 0
TN_t 67430
FP_t 2
FN_t 536
FPR_t 2.9659508838533634e-05
TPR_t 0.0
precision 0.0
threshold:  68
TP_t 0
TN_t

precision 0.03195506923598335
threshold:  28
TP_t 329
TN_t 56513
FP_t 9926
FN_t 193
FPR_t 0.14940020168876714
TPR_t 0.6302681992337165
precision 0.032081911262798635
threshold:  29
TP_t 326
TN_t 56595
FP_t 9844
FN_t 196
FPR_t 0.14816598684507593
TPR_t 0.6245210727969349
precision 0.032055063913470995
threshold:  30
TP_t 320
TN_t 56658
FP_t 9781
FN_t 202
FPR_t 0.1472177486115083
TPR_t 0.6130268199233716
precision 0.03168003168003168
threshold:  31
TP_t 318
TN_t 56720
FP_t 9719
FN_t 204
FPR_t 0.1462845617784735
TPR_t 0.6091954022988506
precision 0.03168277373717246
threshold:  32
TP_t 316
TN_t 56791
FP_t 9648
FN_t 206
FPR_t 0.1452159123406433
TPR_t 0.6053639846743295
precision 0.031714171015656364
threshold:  33
TP_t 313
TN_t 56858
FP_t 9581
FN_t 209
FPR_t 0.1442074685049444
TPR_t 0.5996168582375478
precision 0.03163533454618961
threshold:  34
TP_t 309
TN_t 56921
FP_t 9518
FN_t 213
FPR_t 0.14325923027137674
TPR_t 0.5919540229885057
precision 0.03144398086903429
threshold:  35
TP_t 305
TN

FN_t 382
FPR_t 0.08788512771113352
TPR_t 0.2681992337164751
precision 0.023415286837263755
threshold:  89
TP_t 138
TN_t 60745
FP_t 5694
FN_t 384
FPR_t 0.08570267463387468
TPR_t 0.26436781609195403
precision 0.023662551440329218
threshold:  90
TP_t 135
TN_t 60867
FP_t 5572
FN_t 387
FPR_t 0.0838664037688707
TPR_t 0.25862068965517243
precision 0.02365516032942001
threshold:  91
TP_t 132
TN_t 61026
FP_t 5413
FN_t 390
FPR_t 0.08147323108415239
TPR_t 0.25287356321839083
precision 0.02380522993688007
threshold:  92
TP_t 127
TN_t 61213
FP_t 5226
FN_t 395
FPR_t 0.07865861918451512
TPR_t 0.24329501915708812
precision 0.023725014010835047
threshold:  93
TP_t 124
TN_t 61416
FP_t 5023
FN_t 398
FPR_t 0.07560318487635274
TPR_t 0.23754789272030652
precision 0.024091703905187487
threshold:  94
TP_t 116
TN_t 61697
FP_t 4742
FN_t 406
FPR_t 0.07137374132663045
TPR_t 0.2222222222222222
precision 0.02387813915191437
threshold:  95
TP_t 106
TN_t 61982
FP_t 4457
FN_t 416
FPR_t 0.06708409217477686
TPR_t 0.2030

precision 0.01786735277301315
threshold:  43
TP_t 250
TN_t 3187
FP_t 13817
FN_t 1
FPR_t 0.8125735121147966
TPR_t 0.9960159362549801
precision 0.01777209070875098
threshold:  44
TP_t 250
TN_t 3114
FP_t 13890
FN_t 1
FPR_t 0.8168666196189132
TPR_t 0.9960159362549801
precision 0.01768033946251768
threshold:  45
TP_t 250
TN_t 3048
FP_t 13956
FN_t 1
FPR_t 0.8207480592801694
TPR_t 0.9960159362549801
precision 0.01759819794453048
threshold:  46
TP_t 250
TN_t 2969
FP_t 14035
FN_t 1
FPR_t 0.8253940249353093
TPR_t 0.9960159362549801
precision 0.01750087504375219
threshold:  47
TP_t 250
TN_t 2905
FP_t 14099
FN_t 1
FPR_t 0.829157845212891
TPR_t 0.9960159362549801
precision 0.017422816921039794
threshold:  48
TP_t 250
TN_t 2852
FP_t 14152
FN_t 1
FPR_t 0.8322747588802635
TPR_t 0.9960159362549801
precision 0.01735870018053048
threshold:  49
TP_t 250
TN_t 2787
FP_t 14217
FN_t 1
FPR_t 0.8360973888496824
TPR_t 0.9960159362549801
precision 0.017280707817792215
threshold:  50
TP_t 250
TN_t 2713
FP_t 14291


 [1.         1.        ]]
AUC
0.7655411053972775
CADD_Scores_df        Pathogenicity_Score  tissue_specific_disease
25                    99.9                    False
29                    98.6                    False
30                    99.8                    False
32                    99.9                    False
33                    94.5                    False
...                    ...                      ...
67945                  5.3                    False
67946                  1.5                    False
67958                 16.8                    False
67959                 67.1                    False
67967                 50.9                    False

[17317 rows x 2 columns]
threshold:  0
TP_t 251
TN_t 0
FP_t 17066
FN_t 0
FPR_t 1.0
TPR_t 1.0
precision 0.01449442744124271
threshold:  1
TP_t 248
TN_t 4875
FP_t 12191
FN_t 3
FPR_t 0.7143443103246221
TPR_t 0.9880478087649402
precision 0.019937293994694107
threshold:  2
TP_t 248
TN_t 5799
FP_t 11267
FN_t 3
FPR_t

precision 0.0326214160327677
threshold:  57
TP_t 222
TN_t 10483
FP_t 6583
FN_t 29
FPR_t 0.3857377241298488
TPR_t 0.8844621513944223
precision 0.032623071271124175
threshold:  58
TP_t 221
TN_t 10525
FP_t 6541
FN_t 30
FPR_t 0.3832766904957225
TPR_t 0.8804780876494024
precision 0.03268263827270038
threshold:  59
TP_t 221
TN_t 10548
FP_t 6518
FN_t 30
FPR_t 0.38192898160084376
TPR_t 0.8804780876494024
precision 0.03279418311322155
threshold:  60
TP_t 219
TN_t 10581
FP_t 6485
FN_t 32
FPR_t 0.3799953123168874
TPR_t 0.8725099601593626
precision 0.03266706443914081
threshold:  61
TP_t 218
TN_t 10613
FP_t 6453
FN_t 33
FPR_t 0.3781202390718387
TPR_t 0.8685258964143426
precision 0.03267875880677559
threshold:  62
TP_t 216
TN_t 10654
FP_t 6412
FN_t 35
FPR_t 0.3757178014766202
TPR_t 0.8605577689243028
precision 0.032589016294508145
threshold:  63
TP_t 216
TN_t 10704
FP_t 6362
FN_t 35
FPR_t 0.3727879995312317
TPR_t 0.8605577689243028
precision 0.0328367284889024
threshold:  64
TP_t 215
TN_t 10749
FP_