In [1]:
import pandas as pd
import numpy as np
import sys
import os
sys.path.append('/home/obesity')
from obesity.snp_encoding_traintest_split import get_balanced_encoded_test_data_and_labels
from tensorflow.keras.models import load_model
from obesity.model_evaluation import metric_array
from itertools import chain

In [2]:
import pandas as pd
from sklearn.metrics import accuracy_score,accuracy_score,f1_score,matthews_corrcoef,confusion_matrix,roc_curve,auc
import matplotlib.pyplot as plt
from matplotlib import gridspec

# evalute metric (accuracy,precision,sensitivity,specificity,f1,mcc)
def predict_score_metric_array(predict_score, test_labels):
    accuracy = accuracy_score(test_labels, predict_score.round())
    confusion = confusion_matrix(test_labels, predict_score.round())
    TP = confusion[1, 1]
    TN = confusion[0, 0]
    FP = confusion[0, 1]
    FN = confusion[1, 0]
    precision = TP / float(TP + FP)
    sensitivity = TP / float(FN + TP)
    specificity = TN / float(TN + FP)
    f1 = f1_score(test_labels, predict_score.round())
    mcc = matthews_corrcoef(test_labels, predict_score.round()) 
    metric = [accuracy,precision,sensitivity,specificity,f1,mcc]
    return metric

In [3]:
input_snp_data_root ='/home/obesity/snp_data'
result_root = '/home/obesity/cv_results_10_fold'
snp_data_list=[]
for tsv in [file for file in os.listdir(input_snp_data_root) if file.endswith('.tsv')]:
    snp_data_list.append(tsv)
snp_data_list.sort()
df =pd.DataFrame(np.reshape(snp_data_list,(-1,2)), columns=['normal','obesity'])

In [4]:
df

Unnamed: 0,normal,obesity
0,TWB2_female_3060_bmi2430_exclude_combine0921_a...,TWB2_female_3060_bmi2430_exclude_combine0921_a...
1,TWB2_female_3060_bmi2430_exclude_combine_plus_...,TWB2_female_3060_bmi2430_exclude_combine_plus_...
2,TWB2_female_3060_bmi2430_exclude_combine_plus_...,TWB2_female_3060_bmi2430_exclude_combine_plus_...
3,TWB2_male_3060_bmi2430_exclude_combine0908_acg...,TWB2_male_3060_bmi2430_exclude_combine0908_acg...
4,TWB2_male_3060_bmi2430_exclude_combine_plus_KM...,TWB2_male_3060_bmi2430_exclude_combine_plus_KM...
5,TWB2_male_3060_bmi2430_exclude_combine_plus_KM...,TWB2_male_3060_bmi2430_exclude_combine_plus_KM...
6,TWB2_male_3060_bmi2430_exclude_random7500_1_pe...,TWB2_male_3060_bmi2430_exclude_random7500_1_pe...
7,TWB2_male_3060_bmi2430_exclude_random7500_2_pe...,TWB2_male_3060_bmi2430_exclude_random7500_2_pe...
8,TWB2_male_3060_bmi2430_exclude_random7500_3_pe...,TWB2_male_3060_bmi2430_exclude_random7500_3_pe...


In [4]:
ensemble_models_test_result = pd.DataFrame()
for i in range(0, len(df)):
    normal_data_path, obesity_data_path = os.path.join(input_snp_data_root,df.loc[i][0]),  os.path.join(input_snp_data_root,df.loc[i][1])
    test_data, test_labels = get_balanced_encoded_test_data_and_labels(obesity_data_path, normal_data_path)
    ped_name = os.path.basename(obesity_data_path).split('_ped',)[0]+'_ped'
    models_root = os.path.join(result_root, ped_name)+'/model'
    predict_score_df = pd.DataFrame()
    for m in range(1, 11):
        model_path = os.path.join(models_root, 'model_best_{}.h5'.format(m))
        model = load_model(model_path)
        predict_score = model.predict(test_data)
        predict_score_df['model_best_{}'.format(m)]= list(chain.from_iterable(predict_score))
    predict_score_df['Mean']= predict_score_df.mean(axis=1)
    metrics = predict_score_metric_array(np.array(predict_score_df['Mean']),test_labels)
    ensemble_models_test_result[ped_name] = metrics

In [5]:
ensemble_models_test_result.to_csv(os.path.join(result_root,'ensemble_models_test_result.csv'))

In [19]:
ensemble_models_test_result

Unnamed: 0,TWB2_female_3060_bmi2430_exclude_combine0921_acgt_ped,TWB2_female_3060_bmi2430_exclude_combine_plus_KM0921_acgt_ped,TWB2_female_3060_bmi2430_exclude_combine_plus_KM_plus_giant0921_acgt_ped,TWB2_male_3060_bmi2430_exclude_combine0908_acgt_ped,TWB2_male_3060_bmi2430_exclude_combine_plus_KM0908_acgt_ped,TWB2_male_3060_bmi2430_exclude_combine_plus_KM_plus_giant0908_acgt_ped,TWB2_male_3060_bmi2430_exclude_random7500_1_ped,TWB2_male_3060_bmi2430_exclude_random7500_2_ped,TWB2_male_3060_bmi2430_exclude_random7500_3_ped
0,0.947195,0.950495,0.950495,0.949405,0.946429,0.9375,0.497024,0.47619,0.494048
1,0.972222,0.959732,0.972414,0.957576,0.957317,0.94012,0.496774,0.470588,0.493506
2,0.921053,0.940789,0.927632,0.940476,0.934524,0.934524,0.458333,0.380952,0.452381
3,0.97351,0.960265,0.97351,0.958333,0.958333,0.940476,0.535714,0.571429,0.535714
4,0.945946,0.950166,0.949495,0.948949,0.945783,0.937313,0.47678,0.421053,0.47205
5,0.895656,0.901172,0.901967,0.898953,0.89311,0.875016,-0.00597,-0.048507,-0.011946
