In [1]:
"""
The code for patient pathogenic variant prioritization, and shap explanation plot for the pathogenic variant (Fig. 5B-D). 
Due to privacy concerns, we cannot disclose patient data.
"""

In [None]:
import time
import matplotlib.pyplot as plt
import os
import pandas as pd
import ast
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import shap  # package used to calculate Shap values
pd.options.mode.chained_assignment = None
import pickle
import seaborn as sns

In [2]:
"---------------------------- Load Data ------------------------------"

path = os.path.join('..', '..', 'Data', 'Full_Slim_Dataset_hg37-v1.6.csv')
Slim_dataset = pd.read_csv(path, engine='python')#low_memory=False,
print(Slim_dataset)

path = os.path.join('..', '..', 'Data', 'Gene list with OMIM, ACMG, HPO_Dataset_Edited.csv')
Metadata = pd.read_csv(path)
print(Metadata)

path = os.path.join('..', '..', 'Results', 'Best_Parameters', 'Best_Parameters_New_17.csv')
Best_param = pd.read_csv(path, engine='python')#low_memory=False,
print(Best_param)

path = os.path.join('..', '..', 'Data', 'Relevant_Columns_Names_Edited_2.csv')
Relevant_Cols_df = pd.read_csv(path)
overlap_cols = Relevant_Cols_df['Feature'].tolist()
print(overlap_cols)
rename_dict = dict(zip(overlap_cols, Relevant_Cols_df['Feature Name'].tolist()))
# print(rename_dict)
overlap_cols_names  = Relevant_Cols_df['Feature Name'].tolist()



       VariationID   OMIMs                                 Manifested_Tissues  \
0           535972  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
1           535875  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
2           535979  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
3           567376  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
4           565912  613254  brain-0,kidney,Skin - Sun Exposed (Lower leg),...   
...            ...     ...                                                ...   
67963       873299     NaN                                                NaN   
67964       873211     NaN                                                NaN   
67965       873240     NaN                                                NaN   
67966       873216     NaN                                                NaN   
67967       915562     NaN                                                NaN   

      #Chr        Pos      

In [3]:
"-------------------------- Data PreProcessing ------------------------"


def preprocessing_data2(Relevant_Data, medians_dict):

    one_hot_columns = ['Type', 'AnnoType', 'Consequence', 'Domain', 'Dst2SplType']  # , 'EnsembleRegulatoryFeature'

    # Get one hot encoding of columns B
    one_hot = pd.get_dummies(Relevant_Data[one_hot_columns])
    # Drop column B as it is now encoded
    Relevant_Data = Relevant_Data.drop(one_hot_columns, axis=1)
    # Join the encoded df
    Relevant_Data = Relevant_Data.join(one_hot)
    # print(relevant_data_1)
    cHmm_columns = Slim_dataset.columns[Slim_dataset.columns.str.contains(pat='cHmm_E')].tolist()
    fill_zero_columns = ['motifECount', 'motifEHIPos', 'motifEScoreChng', 'mirSVR-Score', 'mirSVR-E', 'mirSVR-Aln',
                         'tOverlapMotifs', 'motifDist'] + cHmm_columns  # motifs with high number of nan 97%
    Relevant_Data[fill_zero_columns] = Relevant_Data[fill_zero_columns].fillna(value=0)
    fill_common_columns = ['cDNApos', 'relcDNApos', 'CDSpos', 'relCDSpos', 'protPos', 'relProtPos', 'Dst2Splice',
                           'SIFTval', 'PolyPhenVal', 'GerpRS', 'GerpRSpval', 'GerpN', 'GerpS', 'all Enc',
                           'Grantham', 'All SpliceAI', 'All MMSp', 'Dist2Mutation', 'All 00bp', 'dbscSNV',
                           'RemapOverlapTF', 'RemapOverlapCL', 'Trace Features']  # Locations, is this right?
    common_cols = [c for c in list(Relevant_Data) if c in medians_dict]
    for cl in common_cols:
        Relevant_Data[cl] = Relevant_Data[cl].fillna(medians_dict[cl])
    return Relevant_Data

def preprocessing_new(Relevant_Data):
    
    "---------------------- One Hot Columns -------------------------"
    
    one_hot_columns = ['Type', 'AnnoType', 'Consequence', 'Domain', 'Dst2SplType'] 

    one_hot = pd.get_dummies(Relevant_Data[one_hot_columns])
    Relevant_Data = Relevant_Data.drop(one_hot_columns, axis=1)
    Relevant_Data = Relevant_Data.join(one_hot)
    
    "---------------------- Missing Values Imputation ---------------"
    
    special_imputation_cols = {'SIFTval':1, 'GC':0.42, 'CpG':0.02, 'priPhCons':0.115, 'mamPhCons':0.079, 'verPhCons':0.094,'priPhyloP':-0.033, 'mamPhyloP':-0.038, 'verPhyloP':0.017, 'GerpN':1.91, 'GerpS':-0.2}
    
    for cl in special_imputation_cols:
        Relevant_Data[cl] = Relevant_Data[cl].fillna(special_imputation_cols[cl])
        
    Relevant_Data.fillna(0, inplace=True)
    
    return Relevant_Data

In [4]:
"-------------------------  SHAP ------------------------------------"

def shap_plot(j, S, model, short_name, tissue):
    explainerModel = shap.TreeExplainer(model)
    shap_values_Model = explainerModel.shap_values(S)
    print(shap_values_Model)
    print(explainerModel.expected_value)

    fig = plt.gcf()
    
    plot_title = short_name + '_' + tissue.strip()
    file_name = plot_title + '_SHAP.jpg'
    out_path = os.path.join('..', '..', 'Results', 'Patients Analysis Results New Imputation2', file_name)
    p = 0.08  # Probability 0.4
    new_base_value = np.log(p / (1 - p))  # the logit function
    shap.decision_plot(explainerModel.expected_value[1], shap_values_Model[1][j], S.iloc[[j]], show=False, highlight=0)  # Rf : explainer.expected_value[1],link='logit', new_base_value=new_base_value
    plt.title(plot_title + ' SHAP Decision Plot', x=0.5, y=1.1)
    all_axes = fig.get_axes()
    ax = all_axes[0]
    plt.tight_layout()
    plt.savefig(out_path, dpi=100)
    plt.close()


In [5]:
y_columns = Slim_dataset.columns[Slim_dataset.columns.str.contains(pat = 'disease_causing')].tolist()
print(y_columns)
non_relevant_columns = ['VariationID', 'OMIMs', 'Manifested_Tissues', '#Chr', 'Pos', 'ConsDetail', 'motifEName', 'GeneID_y', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway']# it will be good to replace oAA and nAA with blssuom64 matrix. What bStatistic doing?
non_relevant_columns = non_relevant_columns + y_columns
non_relevant_patient = ['#Chr', 'Pos', 'ConsDetail', 'motifEName', 'GeneID_y', 'FeatureID', 'GeneName', 'CCDS', 'Intron', 'Exon', 'SIFTcat', 'PolyPhenCat', 'bStatistic', 'targetScan', 'dbscSNV-rf_score', 'oAA', 'Ref', 'nAA', 'Alt', 'Segway']# it will be good to replace oAA and nAA with blssuom64 matrix. What bStatistic doing?

cols = Slim_dataset.columns
relevant_columns = [x for x in cols if x not in non_relevant_columns]
print(relevant_columns)
Slim_Relevant = Slim_dataset[relevant_columns]

Slim_Relevant = preprocessing_new(Slim_Relevant)

['Lung_disease_causing', 'Muscle - Skeletal_disease_causing', 'Skin - Sun Exposed_disease_causing', 'Adipose - Subcutaneous_disease_causing', 'Artery - Aorta_disease_causing', 'Heart - Left Ventricle_disease_causing', 'Artery - Coronary_disease_causing', 'brain-0_disease_causing', 'Liver_disease_causing', 'Nerve - Tibial_disease_causing', 'Colon - Sigmoid_disease_causing', 'kidney_disease_causing', 'Heart - Atrial Appendage_disease_causing', 'Breast - Mammary Tissue_disease_causing', 'Uterus_disease_causing', 'Adipose - Visceral_disease_causing', 'Esophagus - Gastroesophageal Junction_disease_causing', 'Esophagus - Mucosa_disease_causing', 'brain-1_disease_causing', 'Skin - Not Sun Exposed_disease_causing', 'Artery - Tibial_disease_causing', 'Pituitary_disease_causing', 'Ovary_disease_causing', 'brain-3_disease_causing', 'Thyroid_disease_causing', 'Testis_disease_causing', 'Whole Blood_disease_causing', 'brain-2_disease_causing', 'brain_disease_causing']
['Type', 'Length', 'AnnoType', 

In [6]:
import ast
from time import gmtime, strftime

"-------------------------------- Create Ptient Specific Model --------------------------------------------"
patient_files_list = Metadata['Dataset File'].tolist()

print(patient_files_list)
patient_files_list = [x for x in patient_files_list if pd.notnull(x)]

def patient_variants_prioritization(file):

    short_name = file.split('_')[0]
    tissues = Metadata['Tissue_Model'][Metadata['Dataset File'] == file].values[0]
    disease_gene = Metadata['Gene'][Metadata['Dataset File'] == file].values[0]
    disease_gene = disease_gene.replace("?", "")

    print('---------------------------', short_name, '------------------------')
    print('Tissues: ', tissues)

    patient_path = os.path.join('..', '..', 'Data', 'Patients_Datasets', file)
    
    Patient_data = pd.read_csv(patient_path)

    Patient_Relevant = Patient_data[relevant_columns]
    Patient_Relevant = preprocessing_new(Patient_Relevant)

    patient_genes = Patient_data['GeneID_y'].tolist()
    tissues = tissues.split(',')
    for tissue in tissues:
        print('------------------', tissue, short_name, '----------------')

        y = tissue.strip() + '_' + 'disease_causing'
        X_trian = Slim_Relevant[~Slim_dataset['GeneID_y'].isin(patient_genes)]
        y_train = Slim_dataset[y][~Slim_dataset['GeneID_y'].isin(patient_genes)]
        X_trian.rename(columns=rename_dict, inplace=True)
        Patient_Relevant.rename(columns=rename_dict, inplace=True)

        best_parameters = Best_param['Best_Parameters'][(Best_param['Dataset'] == 'Full Trace')&(Best_param['Tissue'] == tissue.strip())&(Best_param['ML_Model'] == 'Random Forest')].values[0]
        best_parameters = ast.literal_eval(best_parameters)
        print(best_parameters)
        
        model = RandomForestClassifier(**best_parameters)
        common_features = [x for x in list(Patient_Relevant) if (x in list(X_trian)) and (x in overlap_cols_names)]
        X_trian[common_features] = X_trian[common_features].fillna(0)
        model.fit(X_trian[common_features], y_train)
        Patient_Relevant[common_features] = Patient_Relevant[common_features].fillna(0)
        patient_predict_proba = model.predict_proba(Patient_Relevant[common_features])
        # print(patient_predict_proba)
        patient_predictions = patient_predict_proba[:, 1]
        prediction_df = pd.DataFrame(patient_predictions, columns=['Pathological_probability'])
        results_df = pd.concat([Patient_data, prediction_df], axis=1)
        Relevant_Results = results_df[['GeneName', 'GeneID_y', '#Chr', 'Pos', 'Ref', 'Alt', 'Type', 'Length','SIFTval', 'PolyPhenVal', 'PHRED', 'Pathological_probability']]
        Relevant_Results = Relevant_Results.sort_values('Pathological_probability', ascending=False)
        Relevant_Results['Is_pathogenic'] = False
        Relevant_Results.loc[Relevant_Results.GeneName == disease_gene, 'Is_pathogenic'] = True
        print(Relevant_Results)

        out_path = os.path.join('..', '..', 'Results', 'Patients Analysis Results New Imputation2', short_name + '_' + tissue.strip()  + '_Rank.csv')
        print('out_path', out_path)
        Relevant_Results.to_csv(out_path)

        if disease_gene in Relevant_Results['GeneName'].tolist():

            print(Relevant_Results['Pathological_probability'][Relevant_Results['GeneName'] == disease_gene])


            "Shap"
            j = Relevant_Results[Relevant_Results['Is_pathogenic'] == True].index.tolist()[0]
            print(j)
            shap_plot(j, Patient_Relevant[common_features], model, short_name, tissue)

        else:
            print('* Disease Gene: ', disease_gene, ' do not exist in prioritization results')
            
    return short_name, strftime("%Y-%m-%d %H:%M:%S", gmtime())


['AzoospermiaPMRRP28085_TRACE_CADD-v1.6_GRCh37.csv', 'AFOtB1305_TRACE_CADD-v1.6_GRCh37.csv', 'CardiacNeuralsyndromeOtA46052_TRACE_CADD-v1.6_GRCh37.csv', 'CardiomyopathyOtB0551_TRACE_CADD-v1.6_GRCh37.csv', 'HSP13393_TRACE_CADD-v1.6_GRCh37.csv', 'CardiacNeuralSyndrome41621_TRACE_CADD-v1.6_GRCh37.csv', 'CardiacNeuralSyndrome41621_TRACE_CADD-v1.6_GRCh37.csv', 'Autism13498_TRACE_CADD-v1.6_GRCh37.csv', 'Musculardystrophy13879_TRACE_CADD-v1.6_GRCh37.csv', 'Zellwegersyndrome13414_TRACE_CADD-v1.6_GRCh37.csv', 'SeckelSyndromeSS2_TRACE_CADD-v1.6_GRCh37.csv', 'Leighsyndrome766M_TRACE_CADD-v1.6_GRCh37.csv', 'HSP16390_TRACE_CADD-v1.6_GRCh37.csv', 'Skin16264_TRACE_CADD-v1.6_GRCh37.csv', 'MicrocephalyOt1097_TRACE_CADD-v1.6_GRCh37.csv', 'PCCAOt1103_TRACE_CADD-v1.6_GRCh37.csv', 'PCCAOt1103_TRACE_CADD-v1.6_GRCh37.csv', 'PCCAOt1104_TRACE_CADD-v1.6_GRCh37.csv', 'PCCAOt1104_TRACE_CADD-v1.6_GRCh37.csv', 'NPHPOt1114_TRACE_CADD-v1.6_GRCh37.csv', 'HypotoniaOt2812_TRACE_CADD-v1.6_GRCh37.csv', 'diaphragmaticherni

In [7]:
import multiprocessing as mp

def driver_func_shap():
    PROCESSES = 40
    df_list = []
    
    with mp.Pool(PROCESSES) as pool:
        results = [pool.apply_async(patient_variants_prioritization(file, )) for file in patient_files_list]
        for r in results:
            results_tuple = r.get(timeout=None)
            print('@', results_tuple[0], ' finished', results_tuple[1])

            
if __name__ == '__main__':
    print(strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    driver_func_shap()    

2022-12-04 10:49:58
--------------------------- AzoospermiaPMRRP28085 ------------------------
Tissues:  brain, Testis
------------------ brain AzoospermiaPMRRP28085 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
     GeneName         GeneID_y #Chr        Pos  \
97     SCAPER  ENSG00000140386   15   76866531   
124     CEP89  ENSG00000121289   19   33444610   
70     NDUFS3  ENSG00000213619   11   47593021   
49   KIAA1549  ENSG00000122778    7  138554407   
133   ARFGAP1  ENSG00000101199   20   61919110   
..        ...              ...  ...        ...   
16      DNAH6  ENSG00000115423    2   84806635   
137     BEND2  ENSG00000177324    X   18221974   
8        CR1L  ENSG00000197721    1  207850792   
63       EXD3  ENSG00000187609    9  140287591   
146     TBX22  ENSG00000122145    X   79283556   

                                                   Ref Alt Type  Length  \
97         

--------------------------- AFOtB1305 ------------------------
Tissues:  Heart - Left Ventricle
------------------ Heart - Left Ventricle AFOtB1305 ----------------


Columns (0,13,113) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos  Ref Alt Type  Length  SIFTval  \
95     XIRP1  ENSG00000168334    3   39227886  ATG   A  DEL       2      NaN   
416     ENO3  ENSG00000108515   17    4859921    G   A  SNV       0     0.00   
187     TRDN  ENSG00000186439    6  123696752    G   A  SNV       0     0.00   
489     BMP7  ENSG00000101144   20   55777618    C   T  SNV       0     0.01   
44     OBSCN  ENSG00000154358    1  228538629    C   T  SNV       0     0.00   
..       ...              ...  ...        ...  ...  ..  ...     ...      ...   
429     CDC6  ENSG00000094804   17   38445837    C   T  SNV       0      NaN   
252   RNF208  ENSG00000212864    9  140115273    G   A  SNV       0     0.26   
325    RPAP3  ENSG00000005175   12   48063983    T   G  SNV       0     0.18   
262   ENTPD1  ENSG00000138185   10   97605282    G   A  SNV       0   

[array([[ 5.44804562e-04, -3.18449113e-03,  1.71331798e-06, ...,
         6.41864409e-06, -2.74309292e-05,  4.33326634e-06],
       [-9.14160416e-04, -1.55561740e-03,  2.57657854e-05, ...,
         1.08403628e-05,  2.00126097e-06,  1.30006085e-06],
       [ 2.86186673e-04, -2.70486057e-03,  2.79719987e-05, ...,
         2.67711129e-06,  4.33967934e-06,  2.99152382e-06],
       ...,
       [-1.60926758e-03, -2.29677962e-03,  7.32214417e-05, ...,
         6.38134600e-06,  4.68308900e-06,  1.86390827e-06],
       [ 2.44402024e-04, -1.72804496e-03,  7.61325620e-06, ...,
         8.54784507e-06,  3.31090456e-06,  2.36576531e-06],
       [ 2.47455158e-04, -2.98892396e-03,  9.23728593e-05, ...,
         7.95307015e-06,  3.95128387e-06,  3.78760429e-07]]), array([[-5.44804562e-04,  3.18449113e-03, -1.71331798e-06, ...,
        -6.41864409e-06,  2.74309292e-05, -4.33326634e-06],
       [ 9.14160416e-04,  1.55561740e-03, -2.57657854e-05, ...,
        -1.08403628e-05, -2.00126097e-06, -1.30006085

------------------  Heart - Left Ventricle CardiacNeuralSyndrome41621 ----------------
{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
     GeneName         GeneID_y #Chr        Pos Ref Alt Type  Length  SIFTval  \
13        TTN  ENSG00000155657    2  179600360   A   G  SNV       0      NaN   
12        TTN  ENSG00000155657    2  179554549   G   C  SNV       0      NaN   
34        DSP  ENSG00000096696    6    7575687   C   T  SNV       0     0.00   
2     PDE4DIP  ENSG00000178104    1  144879387   G   A  SNV       0      NaN   
6        DYSF  ENSG00000135636    2   71753455   C   T  SNV       0     0.00   
..        ...              ...  ...        ...  ..  ..  ...     ...      ...   
71     IQSEC3  ENSG00000120645   12     176120   C   G  SNV       0     0.00   
73       PRR4  ENSG00000111215   12   10999891   G   C  SNV       0     0.00   
53      RP1L1  ENSG00000183638    8   10470817   G   A  SNV    

[array([[ 7.91225468e-05,  1.29047244e-03, -2.92216352e-05, ...,
         7.92058549e-05,  3.09956039e-06, -1.45092279e-06],
       [ 1.01507998e-04, -2.51571957e-03,  1.08862839e-04, ...,
        -8.28812939e-05, -4.89821522e-04, -4.48992278e-06],
       [ 1.38418612e-04, -1.29627044e-02, -3.46669145e-05, ...,
         1.67679763e-04, -7.65461799e-06, -7.39680718e-05],
       ...,
       [ 8.84399136e-05, -2.57900103e-03,  1.31256231e-04, ...,
         8.21399095e-05,  7.79119228e-06, -1.97193230e-06],
       [ 1.64880923e-05, -2.03898920e-03,  1.12007432e-04, ...,
         5.59708163e-05,  1.50110449e-05, -4.18636149e-05],
       [ 8.30145036e-05, -2.14716812e-03,  3.71377411e-05, ...,
         7.13603581e-05,  3.64043921e-06,  3.82824050e-06]]), array([[-7.91225468e-05, -1.29047244e-03,  2.92216352e-05, ...,
        -7.92058549e-05, -3.09956039e-06,  1.45092279e-06],
       [-1.01507998e-04,  2.51571957e-03, -1.08862839e-04, ...,
         8.28812939e-05,  4.89821522e-04,  4.48992278

[array([[ 2.33913460e-04, -2.41044121e-03,  3.13467627e-06, ...,
         8.13269066e-07,  2.55348225e-06, -4.09178654e-05],
       [ 2.87288706e-04, -2.33448001e-03,  3.56308811e-05, ...,
         6.13826005e-07,  4.70109755e-06, -4.18601771e-05],
       [ 3.35180622e-04, -2.43534945e-03,  4.45747208e-05, ...,
         6.47044742e-07, -4.04845324e-05,  5.50853507e-06],
       ...,
       [ 2.73312424e-04, -4.50797642e-03,  6.01385645e-06, ...,
         7.41410425e-07,  3.53312906e-06,  3.94742328e-06],
       [ 2.35158542e-04,  1.32695368e-03,  2.27202882e-04, ...,
         7.37095708e-07,  7.79306506e-06, -1.11569907e-04],
       [ 2.34388486e-04,  3.60665506e-04,  3.11775634e-04, ...,
         5.48912465e-07,  4.52756706e-06, -5.41862735e-05]]), array([[-2.33913460e-04,  2.41044121e-03, -3.13467627e-06, ...,
        -8.13269066e-07, -2.55348225e-06,  4.09178654e-05],
       [-2.87288706e-04,  2.33448001e-03, -3.56308811e-05, ...,
        -6.13826005e-07, -4.70109755e-06,  4.18601771

--------------------------- Leighsyndrome766M ------------------------
Tissues:  brain
------------------ brain Leighsyndrome766M ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos Ref Alt Type  Length  SIFTval  \
72     AUTS2  ENSG00000158321    7   70229815   A  AC  INS       1      NaN   
57    FAM8A1  ENSG00000137414    6   17608511   C   T  SNV       0      NaN   
103   SPTBN2  ENSG00000173898   11   66460141   G   A  SNV       0     0.00   
7       DAB1  ENSG00000173406    1   57480699   G   A  SNV       0     0.01   
8       DAB1  ENSG00000173406    1   57611093   C   T  SNV       0     0.00   
..       ...              ...  ...        ...  ..  ..  ...     ...      ...   
159    CENPM  ENSG00000100162   22   42341214   G   A  SNV       0      NaN   
87      ZER1  ENSG00000160445    9  131513506   G   A  SNV       0      NaN   
151  CEACAM5

[array([[ 8.70789473e-05, -1.79377911e-03, -1.85990772e-05, ...,
         2.43264209e-07, -4.03026893e-06, -6.56390914e-07],
       [ 5.23783390e-05, -1.07961754e-03,  2.80731485e-05, ...,
         1.41465630e-06,  2.49650165e-06, -2.61680694e-05],
       [ 2.48042699e-04,  1.90348342e-03,  3.57417063e-05, ...,
         2.81993562e-07,  6.76117546e-06, -2.58674659e-06],
       ...,
       [ 1.58548501e-04, -2.62159303e-03,  2.44547714e-05, ...,
         1.35741556e-06,  1.46740794e-06,  4.33843413e-06],
       [ 8.81196464e-05, -2.22066626e-03,  2.98816136e-05, ...,
         2.21806360e-07,  5.73963959e-06,  3.81638244e-06],
       [ 1.43675941e-04, -2.28750933e-03,  8.56412122e-05, ...,
         2.86699685e-07,  9.04954596e-06,  4.13489882e-06]]), array([[-8.70789473e-05,  1.79377911e-03,  1.85990772e-05, ...,
        -2.43264206e-07,  4.03026893e-06,  6.56390914e-07],
       [-5.23783390e-05,  1.07961754e-03, -2.80731485e-05, ...,
        -1.41465632e-06, -2.49650165e-06,  2.61680694

Columns (0,13,113) have mixed types.Specify dtype option on import or set low_memory=False.


------------------ brain MicrocephalyOt1097 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
        GeneName         GeneID_y #Chr        Pos     Ref    Alt Type  Length  \
476      CACNA1B  ENSG00000148408    9  140772669       A  ATCCA  INS       4   
319      SYNGAP1  ENSG00000197283    6   33410680  ACTCGC      A  DEL       5   
478      CACNA1B  ENSG00000148408    9  140772671       T      C  SNV       0   
477      CACNA1B  ENSG00000148408    9  140772670       A      T  SNV       0   
704       FAM98B  ENSG00000171262   15   38776571       G      A  SNV       0   
...          ...              ...  ...        ...     ...    ...  ...     ...   
696      EXOC3L4  ENSG00000205436   14  103568588       G      T  SNV       0   
921        TPRX1  ENSG00000178928   19   48305586       G      A  SNV       0   
406        RP1L1  ENSG00000183638    8   10480189       T      C  SNV       0   

[array([[-1.38959604e-02, -7.33036146e-03,  1.02344284e-04, ...,
         1.54630156e-08,  6.08416742e-06,  5.24237126e-06],
       [ 1.47259436e-03, -4.39840752e-03,  1.86223358e-05, ...,
         2.32454942e-08,  6.46140211e-06,  3.08094280e-06],
       [ 2.00252320e-03, -9.55210441e-03,  4.52884178e-06, ...,
         1.50835267e-08,  3.07281201e-06,  5.92157025e-07],
       ...,
       [ 1.42198720e-03,  6.54657899e-03, -8.17753675e-07, ...,
         2.32454942e-08,  8.41595710e-06,  7.83033049e-06],
       [-2.99637293e-02, -1.77571219e-02, -7.76564183e-04, ...,
         1.52883171e-08,  1.14239803e-05, -7.72134607e-07],
       [ 1.60556746e-03,  1.45931665e-02,  3.66808934e-05, ...,
         1.08407179e-08,  3.31536023e-05,  7.74357459e-06]]), array([[ 1.38959604e-02,  7.33036146e-03, -1.02344284e-04, ...,
        -1.54630156e-08, -6.08416742e-06, -5.24237126e-06],
       [-1.47259436e-03,  4.39840752e-03, -1.86223358e-05, ...,
        -2.32454942e-08, -6.46140211e-06, -3.08094280

--------------------------- NPHPOt1114 ------------------------
Tissues:  brain, kidney
------------------ brain NPHPOt1114 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos Ref     Alt Type  Length  \
138  CACNA1B  ENSG00000148408    9  140772669   A   ATCCA  INS       4   
97    BCLAF1  ENSG00000029363    6  136589299   C       G  SNV       0   
139  CACNA1B  ENSG00000148408    9  140772670   A       T  SNV       0   
219    SMPD3  ENSG00000103056   16   68395158   A  AGGGGG  INS       5   
318     RBMX  ENSG00000147274    X  135957644   A       T  SNV       0   
..       ...              ...  ...        ...  ..     ...  ...     ...   
113     MGAM  ENSG00000257335    7  141766398   C       T  SNV       0   
10    UBXN11  ENSG00000158062    1   26628229   G       A  SNV       0   
78    FAM71B  ENSG00000170613    5  156589660   C       T  SNV 

Columns (0,113) have mixed types.Specify dtype option on import or set low_memory=False.


------------------ Muscle - Skeletal HypotoniaOt2812 ----------------
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}
     GeneName         GeneID_y #Chr        Pos Ref  Alt Type  Length  SIFTval  \
1529   MYBPC2  ENSG00000086967   19   50963368   A  AGC  INS       2      NaN   
1528   MYBPC2  ENSG00000086967   19   50963366   A   AG  INS       1      NaN   
220       NEB  ENSG00000183091    2  152348641   C    T  SNV       0     0.01   
241       TTN  ENSG00000155657    2  179447097   C    T  SNV       0      NaN   
631      FLNC  ENSG00000128591    7  128497213   A   AC  INS       1      NaN   
...       ...              ...  ...        ...  ..  ...  ...     ...      ...   
866      MUC6  ENSG00000184956   11    1017036   G    A  SNV       0     0.04   
865      MUC6  ENSG00000184956   11    1016605   C    G  SNV       0     0.35   
864      MUC6  ENSG00000184956   11    1016604   A    G  SNV       0   

[array([[ 4.30779889e-03, -1.28049904e-02,  2.28723112e-07, ...,
         1.80503673e-07,  5.28984892e-06, -1.94780822e-05],
       [-1.03270428e-02, -9.40329680e-03,  5.48798099e-05, ...,
         2.08840435e-07,  2.47136378e-06, -1.14648958e-04],
       [-1.88329716e-02, -1.83386511e-02,  8.35056908e-05, ...,
         7.89778658e-08,  4.02552245e-06,  3.67493295e-05],
       ...,
       [-1.71135950e-02, -8.55391604e-03, -1.17936183e-04, ...,
         1.14914245e-07,  5.15938202e-06,  1.38449181e-05],
       [ 1.20164078e-03, -5.22446892e-02,  9.54695736e-05, ...,
         3.80265946e-08,  2.91475123e-06, -7.62510846e-05],
       [ 2.11436521e-03, -9.83917609e-03, -4.57811869e-04, ...,
         2.80145483e-07,  2.48628398e-06, -3.00018113e-05]]), array([[-4.30779889e-03,  1.28049904e-02, -2.28723111e-07, ...,
        -1.80503672e-07, -5.28984892e-06,  1.94780822e-05],
       [ 1.03270428e-02,  9.40329680e-03, -5.48798099e-05, ...,
        -2.08840435e-07, -2.47136378e-06,  1.14648958

Columns (0,113) have mixed types.Specify dtype option on import or set low_memory=False.


------------------ brain ATLDOt4998 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
      GeneName         GeneID_y #Chr        Pos Ref    Alt Type  Length  \
645    CACNA1B  ENSG00000148408    9  140772669   A  ATCCA  INS       4   
1324    SHANK1  ENSG00000161681   19   51165375   A     AC  INS       1   
900     ATP8A2  ENSG00000132932   13   26153947   A    ATT  INS       2   
245        BSN  ENSG00000164061    3   49699875   T      C  SNV       0   
865     PPFIA2  ENSG00000139220   12   81671194   C      T  SNV       0   
...        ...              ...  ...        ...  ..    ...  ...     ...   
595       FBP2  ENSG00000130957    9   97333849   C      T  SNV       0   
795      FOLR2  ENSG00000165457   11   71929609   G      A  SNV       0   
1388     IGSF5  ENSG00000183067   21   41151122   T      C  SNV       0   
826       PRB4  ENSG00000230657   12   11461549   G      C  SNV    

Columns (0,13,22) have mixed types.Specify dtype option on import or set low_memory=False.


------------------ Muscle - Skeletal SpasticParaparesisOt5005 ----------------
{'random_state': 1234, 'n_estimators': 300, 'min_samples_split': 10, 'min_samples_leaf': 3, 'max_depth': None, 'bootstrap': True}
    GeneName         GeneID_y #Chr        Pos        Ref  Alt Type  Length  \
921   MYBPC2  ENSG00000086967   19   50963368          A  AGC  INS       2   
920   MYBPC2  ENSG00000086967   19   50963366          A   AG  INS       1   
20    TRIM63  ENSG00000158022    1   26392867          C    T  SNV       0   
93     OBSCN  ENSG00000154358    1  228464168  AGTACGGGG    A  DEL       8   
726     IDH2  ENSG00000182054   15   90628533      ATGAC    A  DEL       4   
..       ...              ...  ...        ...        ...  ...  ...     ...   
592  TAS2R31  ENSG00000256436   12   11183901          C    G  SNV       0   
758   ZNF785  ENSG00000197162   16   30594329          G    T  SNV       0   
757   HIRIP3  ENSG00000149929   16   30001072          G    C  SNV       0   
593  TAS2R3

[array([[-2.17807730e-02, -1.34052925e-02,  3.07526426e-05, ...,
         1.27062409e-08, -3.06829798e-07,  1.67035384e-05],
       [-2.00843783e-02, -1.16878134e-02, -5.68414364e-06, ...,
         1.23539739e-08, -5.81612319e-07,  1.87085122e-05],
       [-3.29134075e-02, -1.27875327e-02,  4.07533453e-04, ...,
         4.04578783e-08,  1.15226476e-06,  1.54969403e-05],
       ...,
       [ 1.19619946e-03, -2.03582937e-02, -1.39071856e-04, ...,
         1.03917801e-08, -8.85064528e-04,  5.63174806e-05],
       [ 1.26078571e-03, -1.97225897e-02, -1.30428472e-04, ...,
         1.03917801e-08, -8.79779829e-04,  5.65703257e-05],
       [ 2.14042906e-03, -1.11736527e-02,  1.80110632e-04, ...,
         7.84625731e-09,  2.10947163e-05,  8.47880788e-06]]), array([[ 2.17807730e-02,  1.34052925e-02, -3.07526426e-05, ...,
        -1.27062409e-08,  3.06829798e-07, -1.67035384e-05],
       [ 2.00843783e-02,  1.16878134e-02,  5.68414364e-06, ...,
        -1.23539739e-08,  5.81612318e-07, -1.87085122

--------------------------- ComplexVDefOB13 ------------------------
Tissues:  brain, Muscle - Skeletal
------------------ brain ComplexVDefOB13 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos Ref    Alt Type  Length  \
129  CACNA1B  ENSG00000148408    9  140772669   A  ATCCA  INS       4   
176   ATP8A2  ENSG00000132932   13   26153947   A    ATT  INS       2   
12      SPEN  ENSG00000065526    1   16264328   A  AGGCC  INS       4   
11      SPEN  ENSG00000065526    1   16264326   A     AG  INS       1   
174    EP400  ENSG00000183495   12  132445252   A    AGC  INS       2   
..       ...              ...  ...        ...  ..    ...  ...     ...   
249   CAPN12  ENSG00000182472   19   39230864   G      C  SNV       0   
177  SLC15A1  ENSG00000088386   13   99339970   G      C  SNV       0   
154    VSIG2  ENSG00000019102   11  124619781   A  

[array([[ 4.00915106e-03, -1.30005706e-02,  6.94806043e-05, ...,
         2.52526906e-07,  1.25519578e-06,  1.40316687e-05],
       [ 1.33949402e-03, -7.69234956e-03,  2.01458237e-04, ...,
         7.05391011e-08, -2.02184063e-07, -7.19333206e-06],
       [ 9.51078551e-04, -1.71737753e-02,  1.85027292e-04, ...,
         6.60585888e-08, -5.52385079e-07, -3.93869798e-05],
       ...,
       [-1.35406030e-02, -8.91505138e-03,  2.39743319e-04, ...,
         3.04887307e-08,  1.37381634e-06,  5.28509581e-06],
       [ 2.81194447e-03, -8.04068048e-03,  2.86529677e-04, ...,
         1.17997894e-06,  2.55547082e-09, -2.33910286e-05],
       [-3.19992421e-02, -2.14188400e-02, -9.83082723e-04, ...,
         1.58421824e-07, -1.39859085e-06,  2.51435711e-05]]), array([[-4.00915106e-03,  1.30005706e-02, -6.94806043e-05, ...,
        -2.52526906e-07, -1.25519578e-06, -1.40316687e-05],
       [-1.33949402e-03,  7.69234956e-03, -2.01458237e-04, ...,
        -7.05391011e-08,  2.02184064e-07,  7.19333206

Columns (0,113) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
     GeneName         GeneID_y #Chr        Pos Ref     Alt Type  Length  \
719    SPTBN4  ENSG00000160460   19   41081391   A  AACACT  INS       5   
747    OSBPL2  ENSG00000130703   20   60866850   G       A  SNV       0   
560       NIN  ENSG00000100503   14   51239168   G       A  SNV       0   
382      FKTN  ENSG00000106692    9  108382330   A      AA  INS       1   
753    DOPEY2  ENSG00000142197   21   37603007  AT       A  DEL       1   
..        ...              ...  ...        ...  ..     ...  ...     ...   
399  ANKRD30A  ENSG00000148513   10   37433929   G       T  SNV       0   
552     OR4M1  ENSG00000176299   14   20249083   G       C  SNV       0   
766     DDX53  ENSG00000184735    X   23018653   T       C  SNV       0   
223     OTOL1  ENSG00000182447    3  161221009   T       G  SNV       0   
461     MUC5B  ENSG00000117983   11    127257

[array([[ 1.98317175e-04,  8.55773209e-04,  3.87332516e-05, ...,
         5.98211464e-07,  6.07149691e-06,  1.33585716e-05],
       [ 3.58273288e-04,  1.71873956e-03,  2.59613124e-05, ...,
         2.92946543e-06,  1.23122422e-05,  1.35570454e-05],
       [ 3.58463809e-04,  1.71812368e-03,  2.25641897e-05, ...,
         2.92946543e-06,  1.22742246e-05,  1.35452145e-05],
       ...,
       [ 3.39018930e-04,  1.72805058e-03,  1.61983202e-05, ...,
         8.34012244e-07, -1.85703030e-05,  1.19303225e-05],
       [ 3.39865456e-04,  1.71200427e-03,  1.52483580e-05, ...,
         8.30889034e-07, -1.82537523e-05,  1.14716403e-05],
       [ 2.95659771e-04, -2.39624344e-03,  2.43684148e-05, ...,
         1.01706094e-06,  6.44793420e-06,  9.01175988e-06]]), array([[-1.98317175e-04, -8.55773209e-04, -3.87332516e-05, ...,
        -5.98211464e-07, -6.07149692e-06, -1.33585716e-05],
       [-3.58273288e-04, -1.71873956e-03, -2.59613124e-05, ...,
        -2.92946543e-06, -1.23122422e-05, -1.35570454

--------------------------- AtaxiaOtB0926 ------------------------
Tissues:  brain
------------------ brain AtaxiaOtB0926 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y  #Chr        Pos Ref Alt Type  Length  SIFTval  \
23      CHL1  ENSG00000134121     3     432645   G   T  SNV       0     0.00   
114    PRMT7  ENSG00000132600    16   68387455   G   T  SNV       0     0.00   
140    ZNFX1  ENSG00000124201    20   47866092   G   C  SNV       0     0.00   
83     GRIA4  ENSG00000152578    11  105845264   C   T  SNV       0      NaN   
98   ATP6V1D  ENSG00000100554    14   67819719   T   A  SNV       0     0.01   
..       ...              ...   ...        ...  ..  ..  ...     ...      ...   
40   ZDHHC11  ENSG00000188818     5     796148   T   C  SNV       0      NaN   
39   ZDHHC11  ENSG00000188818     5     796064   T   C  SNV       0      NaN   
37   ZDHHC1

Columns (13,22,32,113) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos  Ref Alt Type  Length  SIFTval  \
116     TP63  ENSG00000073282    3  189584501    G   A  SNV       0     0.02   
306    KRT79  ENSG00000185640   12   53224062    A   G  SNV       0     0.00   
252    TRAF2  ENSG00000127191    9  139818342    C   T  SNV       0     0.00   
164    GSTA4  ENSG00000170899    6   52843382  ATG   A  DEL       2      NaN   
336     MYH6  ENSG00000197616   14   23874456    T   C  SNV       0     0.00   
..       ...              ...  ...        ...  ...  ..  ...     ...      ...   
437   ENTHD2  ENSG00000167302   17   79203053    G   C  SNV       0     0.28   
220   NECAB1  ENSG00000123119    8   91962054    T   C  SNV       0     1.00   
222   VPS13B  ENSG00000132549    8  100821659    T   C  SNV       0      NaN   
250  CAMSAP1  ENSG00000130559    9  138714663    C   T  SNV       0   

Columns (0,22) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 600, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_depth': None, 'bootstrap': False}
         GeneName         GeneID_y #Chr        Pos  Ref Alt Type  Length  \
423        ALOXE3  ENSG00000179148   17    8007512    G   T  SNV       0   
309       ALDH3B2  ENSG00000132746   11   67431970    G   A  SNV       0   
4           PADI1  ENSG00000142623    1   17555507    C   T  SNV       0   
108          TP63  ENSG00000073282    3  189584501    G   A  SNV       0   
156         GSTA4  ENSG00000170899    6   52843382  ATG   A  DEL       2   
..            ...              ...  ...        ...  ...  ..  ...     ...   
479      C19orf57  ENSG00000132016   19   14003622    G   A  SNV       0   
198         MUC3A  ENSG00000228273    7  100550960    A   C  SNV       0   
188  RP11-468B6.1  ENSG00000223614    7   63680484    C   T  SNV       0   
433      KRTAP1-3  ENSG00000221880   17   39190973    C   G  SNV       0   
58        NOSTRIN  ENSG00000163072

{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
     GeneName         GeneID_y #Chr        Pos                   Ref Alt Type  \
101     SYNJ1  ENSG00000159082   21   34067458                     G   A  SNV   
22       ECE2  ENSG00000145194    3  183995987                    AC   A  DEL   
34       MDC1  ENSG00000137337    6   30681083  AAAAAGGCGGCCCAAGGCCG   A  DEL   
33       MDC1  ENSG00000137337    6   30681077                AGAAGG   A  DEL   
84        NF1  ENSG00000196712   17   29559867                     C   A  SNV   
..        ...              ...  ...        ...                   ...  ..  ...   
62   TNFRSF1A  ENSG00000067182   12    6441103                     G   T  SNV   
63   TNFRSF1A  ENSG00000067182   12    6441111                     G   C  SNV   
61   TNFRSF1A  ENSG00000067182   12    6441098                     C   T  SNV   
105    MAGEA3  ENSG00000221867    X  151935240              

Columns (0,113) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr       Pos Ref Alt Type  Length  SIFTval  \
205   SCAMP1  ENSG00000085365    5  77745854   A  AA  INS       1      NaN   
315    STAU2  ENSG00000040341    8  74621271   G   C  SNV       0      NaN   
283    SRRM3  ENSG00000177679    7  75914936   A  AC  INS       1      NaN   
540    SARM1  ENSG00000004139   17  26699368   A  AC  INS       1      NaN   
750    OTUD5  ENSG00000068308    X  48814778   C   A  SNV       0      NaN   
..       ...              ...  ...       ...  ..  ..  ...     ...      ...   
352   ANTXRL  ENSG00000198250   10  47678684   A   C  SNV       0     0.21   
729    POTED  ENSG00000166351   21  15013735   A   G  SNV       0     0.08   
691  KIR2DS4  ENSG00000221957   19  55349051   T   C  SNV       0     1.00   
692  KIR2DS4  ENSG00000221957   19  55349061   T   A  SNV       0     0.30   
728    POTED

Columns (0) have mixed types.Specify dtype option on import or set low_memory=False.


------------------ brain OpticAtrophy16012 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos Ref    Alt Type  Length  \
187    SRRM3  ENSG00000177679    7   75914936   A     AC  INS       1   
147   SCAMP1  ENSG00000085365    5   77745854   A     AA  INS       1   
203    KMT2C  ENSG00000055609    7  151945072   A     AT  INS       1   
419    SARM1  ENSG00000004139   17   26699368   A     AC  INS       1   
202    KMT2C  ENSG00000055609    7  151927023   G      C  SNV       0   
..       ...              ...  ...        ...  ..    ...  ...     ...   
302    TEAD4  ENSG00000197905   12    3104142   C      G  SNV       0   
176    OPRM1  ENSG00000112038    6  154412617   C      A  SNV       0   
186    KCTD7  ENSG00000243335    7   66103439   A  AAGGA  INS       4   
384   CIRH1A  ENSG00000141076   16   69199448   C      A  SNV       0   
489  PL

Columns (0) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
          GeneName         GeneID_y #Chr        Pos    Ref Alt Type  Length  \
60            FMN2  ENSG00000155816    1  240255570  AGCGG   A  DEL       4   
732          LMTK3  ENSG00000142235   19   48997203    AGT   A  DEL       2   
733          LMTK3  ENSG00000142235   19   49014762     AG   A  DEL       1   
713  CTD-3193O13.9  ENSG00000183248   19    7938308      A  AG  INS       1   
277          SRRM3  ENSG00000177679    7   75914936      A  AC  INS       1   
..             ...              ...  ...        ...    ...  ..  ...     ...   
343         PTCHD3  ENSG00000182077   10   27703164      G   T  SNV       0   
843          USP26  ENSG00000134588    X  132160186      T   A  SNV       0   
566        CCDC135  ENSG00000159625   16   57762293      C   A  SNV       0   
290           NOS3  ENSG00000164867    7  150710880      G   C  SNV       0   
2

Columns (0) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr       Pos Ref Alt Type  Length  SIFTval  \
399    BRSK2  ENSG00000174672   11   1466990  AC   A  DEL       1      NaN   
211   SCAMP1  ENSG00000085365    5  77745854   A  AA  INS       1      NaN   
800  PPP1R3F  ENSG00000049769    X  49126412   C   A  SNV       0      NaN   
587    SARM1  ENSG00000004139   17  26699368   A  AC  INS       1      NaN   
277    TRRAP  ENSG00000196367    7  98553777   C   A  SNV       0      NaN   
..       ...              ...  ...       ...  ..  ..  ...     ...      ...   
397    MUC5B  ENSG00000117983   11   1270868   T   A  SNV       0     0.36   
759    POTED  ENSG00000166351   21  14982952   A   G  SNV       0     0.91   
396    MUC5B  ENSG00000117983   11   1267123   A   T  SNV       0     1.00   
431   UBTFL1  ENSG00000255009   11  89819380   A   G  SNV       0      NaN   
794    DDX53

------------------ brain DevDelay5148 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
     GeneName         GeneID_y  #Chr        Pos    Ref Alt Type  Length  \
113    IQSEC3  ENSG00000120645    12     250288      A   C  SNV       0   
98   B4GALNT4  ENSG00000182272    11     376072      A   C  SNV       0   
140     ZBTB4  ENSG00000174282    17    7369539  AGGCC   A  DEL       4   
180   CACNA1I  ENSG00000100346    22   40060742      A   C  SNV       0   
141     ZBTB4  ENSG00000174282    17    7369544    ATG   A  DEL       2   
..        ...              ...   ...        ...    ...  ..  ...     ...   
53      TMA16  ENSG00000198498     4  164440480      G   T  SNV       0   
176     MED15  ENSG00000099917    22   20939156      G   C  SNV       0   
52      TMA16  ENSG00000198498     4  164440475      C   T  SNV       0   
51      TMA16  ENSG00000198498     4  164440474      C   T  SNV  

Columns (0,113) have mixed types.Specify dtype option on import or set low_memory=False.


{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
    GeneName         GeneID_y #Chr        Pos Ref       Alt Type  Length  \
398   IQSEC3  ENSG00000120645   12     250288   A         C  SNV       0   
242     FIG4  ENSG00000112367    6  110053838   A         T  SNV       0   
601      CIC  ENSG00000079432   19   42799294   A        AG  INS       1   
269    CDK13  ENSG00000065883    7   40127724   G         T  SNV       0   
123   PTPN23  ENSG00000076201    3   47454409   A  ACCCCCCC  INS       7   
..       ...              ...  ...        ...  ..       ...  ...     ...   
310  POU5F1B  ENSG00000212993    8  128428322   G         C  SNV       0   
315   IFNA10  ENSG00000186803    9   21206532   C         T  SNV       0   
551    TEX14  ENSG00000121101   17   56676457   T         C  SNV       0   
363    MUC5B  ENSG00000117983   11    1261508   C         G  SNV       0   
653   PIWIL3  ENSG00000184571   22

[array([[-2.30619895e-03, -2.38251477e-03,  1.56023246e-05, ...,
         5.49535190e-06,  2.75377968e-06,  4.46560772e-06],
       [ 2.23034516e-04, -1.44812225e-03,  8.40784614e-05, ...,
         2.44025258e-06,  6.31253468e-06,  1.27797953e-05],
       [-4.54058001e-03,  6.72421699e-03,  4.58167828e-05, ...,
         7.64780810e-06,  6.25127701e-06,  9.29353843e-06],
       ...,
       [-1.59304973e-03, -1.40330409e-03,  3.31055879e-05, ...,
         4.71791921e-06,  3.32915506e-06,  4.37069931e-06],
       [ 3.27195037e-04, -1.87101774e-03,  1.08883587e-05, ...,
         1.06975770e-06,  3.91689807e-06,  6.31555038e-06],
       [ 4.22592045e-04, -2.22857700e-03,  4.55385471e-07, ...,
         8.66457838e-07,  4.50377371e-06, -2.65278905e-05]]), array([[ 2.30619895e-03,  2.38251477e-03, -1.56023246e-05, ...,
        -5.49535190e-06, -2.75377968e-06, -4.46560772e-06],
       [-2.23034516e-04,  1.44812225e-03, -8.40784615e-05, ...,
        -2.44025258e-06, -6.31253468e-06, -1.27797953

--------------------------- Usher16032 ------------------------
Tissues:  brain
------------------ brain Usher16032 ----------------
{'random_state': 1234, 'n_estimators': 400, 'min_samples_split': 10, 'min_samples_leaf': 10, 'max_depth': 40, 'bootstrap': False}
     GeneName         GeneID_y #Chr        Pos         Ref  \
43      GPR98  ENSG00000164199    5   90059126          AT   
19      PRKRA  ENSG00000180228    2  179312231           A   
28       CHL1  ENSG00000134121    3     432737           C   
33   KIAA2018  ENSG00000176542    3  113376117  ATGCTGCTGC   
138     HAUS5  ENSG00000249115   19   36109302           A   
..        ...              ...  ...        ...         ...   
91     FBRSL1  ENSG00000112787   12  133086349           G   
137     HAUS5  ENSG00000249115   19   36109297           A   
139     PSMD8  ENSG00000099341   19   38866785           T   
71   PTCHD3P1  ENSG00000224597   10   29770662           A   
74       SPRN  ENSG00000203772   10  135381814         

TypeError: 'tuple' object is not callable