In [1]:
import math
import warnings
import datetime
import scipy as sp
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
from pandas_plink import read_plink1_bin

## Change here for reproducing final results on different simulation configurations

In [2]:
PATH = './Simul_Analysis_1/'

exp_list = [['_EurEasAmrAfr_exp1.4.1','_EurEasAmrAfr_exp1.4.2','_EurEasAmrAfr_exp1.4.3']]

#Other Examples:
# exp_list = [['_EurOnly_exp1.4.1','_EurOnly_exp1.4.2','_EurOnly_exp1.4.3'],\
#             ['_AfrOnly_exp1.4.1','_AfrOnly_exp1.4.2','_AfrOnly_exp1.4.3'],\
#             ['_EurEas_exp1.4.1','_EurEas_exp1.4.2','_EurEas_exp1.4.3'],\
#             ['_EurEasAmr_exp1.3.1','_EurEasAmr_exp1.3.2','_EurEasAmr_exp1.3.3']]

## Helper Functions

In [3]:
def compute_metrics(exp_num, pred_method):
    '''
    This function computed the R^2 and Correlation Ratio Metric for the test data
    
    Inputs: exp_num of the form '_AuxPops_DatasetID' 
            AuxPops corresponds to the Auxiliary Populations used | Example: EurEasAmrAfr
            DatasetID corresponds to the specific ID for the simulated data | Example: exp1.4.1
            
            pred_method indicating which method's results are to be evaluated
            Example: MPP-GWAS
            
    Outputs: A Tuple of the form (R^2, Correlation_Ratio)     
    '''
    
    #===========================================
    
    #Load Baseline GWAS Pred Betas
    baseline_pred_betas = pd.read_csv(str(PATH)+"GWAS_SummaryStats/sas"+\
                                      str(exp_num[len(exp_num)-9:len(exp_num)])+\
                                      ".sumstat", delimiter=" ", header=None,\
                                      names=['chr','snp','genetic.dist','pos','effAllele','refAllele',\
                                             'beta','se','t','pvalue','n'])
    
    snps = list(baseline_pred_betas.iloc[:,1])
    
    baseline_pred_betas = np.asarray(list(baseline_pred_betas.iloc[:,6]), dtype=float)
    
    #Load True Betas
    true_betas = pd.read_csv(str(PATH)+"Phenotypes/beta_sas"+\
                             str(exp_num[len(exp_num)-9:len(exp_num)])+\
                             ".truebetas", delimiter=" ", header=None)

    true_betas = np.asarray(list(true_betas.iloc[:,0]), dtype=float)
    
    #===========================================
    
    #Load Test Genotype Data
    test_geno = read_plink1_bin(str(PATH)+"Genotypes/sas_test"+str(exp_num[len(exp_num)-9:len(exp_num)])+"_CHR22.bed",\
                                str(PATH)+"Genotypes/sas_test"+str(exp_num[len(exp_num)-9:len(exp_num)])+"_CHR22.bim",\
                                str(PATH)+"Genotypes/sas_test"+str(exp_num[len(exp_num)-9:len(exp_num)])+"_CHR22.fam",\
                                verbose = False)
    
    test_geno = test_geno.values
    test_geno = np.where(test_geno == 2, 0, np.where(test_geno == 0, 2, test_geno))
    test_geno = np.array(test_geno, dtype=float)
    test_geno = (test_geno - np.mean(test_geno, axis=0))/np.std(test_geno, axis=0)
    
    #Load True Test Phenotypes
    test_true_pheno = pd.read_csv(str(PATH)+"Phenotypes/pheno_sas_test"+\
                                  str(exp_num[len(exp_num)-9:len(exp_num)])+".truepheno",\
                                  delimiter=" ", header=None)
    test_true_pheno = np.asarray(list(test_true_pheno.iloc[:,0]), dtype=float)
    
    #===============Metrics Computation============================
    
    if pred_method == 'MPP-GWAS':
        
        #Load MPP-GWAS Pred Betas
        mpp_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_MultiPopPred/mppgwas_pred_sas_betas"+\
                                     str(exp_num)+".txt", delimiter="\t")

        mpp_pred_betas = np.asarray(list(mpp_pred_betas.iloc[:,1]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, mpp_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'MPP-GWAS-TarSS':
        
        #Load MPP-GWAS-TarSS Pred Betas
        mpp_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_MultiPopPred/mppgwastarss_pred_sas_betas"+\
                                     str(exp_num)+".txt", delimiter="\t")

        mpp_pred_betas = np.asarray(list(mpp_pred_betas.iloc[:,1]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, mpp_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'MPP-GWAS-Admix':
        
        #Load MPP-GWAS-Admix Pred Betas
        mpp_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_MultiPopPred/mppgwasadmix_pred_sas_betas"+\
                                     str(exp_num)+".txt", delimiter="\t")

        mpp_pred_betas = np.asarray(list(mpp_pred_betas.iloc[:,1]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, mpp_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
    
    elif pred_method == 'BASELINE':

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, baseline_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'PRSCSX':
        
        #Load PRScsx Pred Betas:
        prscsx_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_SOTAmethods/prscsx_pred_sas_betas"+\
                                        str(exp_num)+".txt", delimiter="\t", header=None)

        prscsx_pred_betas = np.asarray(list(prscsx_pred_betas.iloc[:,5]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, prscsx_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'PROSPER':
        
        #Load PROSPER Pred Betas:
        prosper_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_SOTAmethods/prosper_pred_sas_betas"+\
                                         str(exp_num)+".txt", delimiter="\t")

        prosper_betas = []
        for i in range(len(snps)):
            if snps[i] in list(prosper_pred_betas.iloc[:,0]):
                row_number = prosper_pred_betas.index.get_loc(prosper_pred_betas[prosper_pred_betas['rsid'] == snps[i]].index[0])
                prosper_betas.append(float(prosper_pred_betas.iloc[row_number,3]))
            else:
                prosper_betas.append(0.0)

        prosper_pred_betas = np.asarray(prosper_betas)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, prosper_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'LASSOSUM-ExtLD':
        
        #Load Lassosum-ExtLD Pred Betas:
        lassosum2_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_SOTAmethods/lassosumExtLD_pred_betas_sas"+\
                                           str(exp_num[len(exp_num)-9:len(exp_num)])+".txt", delimiter="\t")

        lassosum2_betas = []
        for i in range(len(snps)):
            if snps[i] in list(lassosum2_pred_betas.iloc[:,0]):
                row_number = lassosum2_pred_betas.index.get_loc(lassosum2_pred_betas[lassosum2_pred_betas['rsid'] == snps[i]].index[0])
                lassosum2_betas.append(float(lassosum2_pred_betas.iloc[row_number,3]))
            else:
                lassosum2_betas.append(0.0)

        lassosum2_pred_betas = np.asarray(lassosum2_betas)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, lassosum2_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'TLMULTI':
        
        #Load TL-Multi Pred Betas
        tlmulti_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_SOTAmethods/tlmulti_pred_sas_betas"+\
                                         str(exp_num)+".txt", delimiter=" ")

        tlmulti_pred_betas = np.asarray(list(tlmulti_pred_betas.iloc[:,1]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, tlmulti_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'MPP-PRS':
        
        #Load MPP-PRS Pred Betas:
        mpp_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_MultiPopPred/mppprs_pred_sas_betas"+\
                                     str(exp_num)+".txt", delimiter="\t")

        mpp_betas = []
        for i in range(len(snps)):
            if snps[i] in list(mpp_pred_betas.iloc[:,0]):
                row_number = mpp_pred_betas.index.get_loc(mpp_pred_betas[mpp_pred_betas['SNP'] == snps[i]].index[0])
                mpp_betas.append(float(mpp_pred_betas.iloc[row_number,1]))
            else:
                mpp_betas.append(0.0)

        mpp_pred_betas = np.asarray(mpp_betas)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, mpp_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'MPP-PRS+':
        
        #Load MPP-PRS+ Pred Betas:
        mpp_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_MultiPopPred/mppprs+_pred_sas_betas"+\
                                     str(exp_num)+".txt", delimiter="\t")

        mpp_pred_betas = np.asarray(list(mpp_pred_betas.iloc[:,1]), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, mpp_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
        
    elif pred_method == 'LASSOSUM-TrueLD':
        
        #Load MyLassosum Pred Betas:
        lassosum_pred_betas = pd.read_csv(str(PATH)+"Additional/Results_SOTAmethods/lassosumTrueLD_sas_train"+\
                                          str(exp_num[len(exp_num)-9:len(exp_num)])+".txt",\
                                          delimiter="\t")

        lassosum_pred_betas = np.asarray(list(lassosum_pred_betas['beta']), dtype=float)

        # R(Y_True_Test, Y_Pred_Test)
        test_pred_pheno = np.matmul(test_geno, lassosum_pred_betas)
        r_y_test = sp.stats.pearsonr(test_true_pheno, test_pred_pheno)[0]

        # [R(Y_True_Test, Y_Pred_Test) / R(Y_True_Test, Y_True_Test - eps)]
        y3 = np.matmul(test_geno, true_betas)
        r_y_test_best_possible = sp.stats.pearsonr(test_true_pheno, y3)[0]

        r_y_test_correlation_ratio = r_y_test/r_y_test_best_possible
    
    else:
        print("Invalid Pred Method in Input!")
        return
    
    return (r_y_test*r_y_test, r_y_test_correlation_ratio)

In [4]:
def multiple_evals(exps):
    '''
    This function computes the performance metrics for multiple 
    simulation configurations for all methods at a time.
    
    Inputs: List of experiment IDs
    
    Outputs: (R^2, Correlation_Ratio) Tuples for all methdods on
             the specific list of experiment IDs
    '''
    
    MPPGWAS_r2_test = []
    MPPGWAS_r_y_test_correlation_ratio = []
    
    MPPGWASTarSS_r2_test = []
    MPPGWASTarSS_r_y_test_correlation_ratio = []
    
    MPPGWASAdmix_r2_test = []
    MPPGWASAdmix_r_y_test_correlation_ratio = []

    BASELINE_r2_test = []
    BASELINE_r_y_test_correlation_ratio = []

    PRSCSX_r2_test = []
    PRSCSX_r_y_test_correlation_ratio = []
    
    PROSPER_r2_test = []
    PROSPER_r_y_test_correlation_ratio = []
    
    LASSOSUMExtLD_r2_test = []
    LASSOSUMExtLD_r_y_test_correlation_ratio = []
    
    TLMULTI_r2_test = []
    TLMULTI_r_y_test_correlation_ratio = []
    
    MPPPRS_r2_test = []
    MPPPRS_r_y_test_correlation_ratio = []
    
    MPPPRSplus_r2_test = []
    MPPPRSplus_r_y_test_correlation_ratio = []
    
    LASSOSUMTrueLD_r2_test = []
    LASSOSUMTrueLD_r_y_test_correlation_ratio = []

    for i in range(len(exps)):
        
        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'MPP-GWAS')

        MPPGWAS_r2_test.append(r2_test)
        MPPGWAS_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
        
        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'MPP-GWAS-TarSS')

        MPPGWASTarSS_r2_test.append(r2_test)
        MPPGWASTarSS_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
        
        #==============
        if ('EurOnly' in exps[i]) or ('EasOnly' in exps[i]) or ('AmrOnly' in exps[i]) or ('AfrOnly' in exps[i]):
            MPPGWASAdmix_r2_test.append(None)
            MPPGWASAdmix_r_y_test_correlation_ratio.append(None)
        else:
            r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'MPP-GWAS-Admix')

            MPPGWASAdmix_r2_test.append(r2_test)
            MPPGWASAdmix_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)

        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'BASELINE')

        BASELINE_r2_test.append(r2_test)
        BASELINE_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)

        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'PRSCSX')

        PRSCSX_r2_test.append(r2_test)
        PRSCSX_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)

        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'PROSPER')

        PROSPER_r2_test.append(r2_test)
        PROSPER_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
        
        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'LASSOSUM-ExtLD')

        LASSOSUMExtLD_r2_test.append(r2_test)
        LASSOSUMExtLD_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
        
        #==============
        if ('EurOnly' in exps[i]) or ('EasOnly' in exps[i]) or ('AmrOnly' in exps[i]) or ('AfrOnly' in exps[i]):
            r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'TLMULTI')

            TLMULTI_r2_test.append(r2_test)
            TLMULTI_r_y_test_coverage.append(r_y_test_correlation_ratio)
        else:
            TLMULTI_r2_test.append(None)
            TLMULTI_r_y_test_correlation_ratio.append(None)
            
        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'MPP-PRS')

        MPPPRS_r2_test.append(r2_test)
        MPPPRS_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
        
        #==============
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'MPP-PRS+')

        MPPPRSplus_r2_test.append(r2_test)
        MPPPRSplus_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)

        #==============   
        r2_test, r_y_test_correlation_ratio = compute_metrics(exps[i], 'LASSOSUM-TrueLD')

        LASSOSUMTrueLD_r2_test.append(r2_test)
        LASSOSUMTrueLD_r_y_test_correlation_ratio.append(r_y_test_correlation_ratio)
    
    return (MPPGWAS_r2_test, MPPGWAS_r_y_test_correlation_ratio,\
            MPPGWASTarSS_r2_test, MPPGWASTarSS_r_y_test_correlation_ratio,\
            MPPGWASAdmix_r2_test, MPPGWASAdmix_r_y_test_correlation_ratio,\
            BASELINE_r2_test, BASELINE_r_y_test_correlation_ratio,\
            PRSCSX_r2_test, PRSCSX_r_y_test_correlation_ratio,\
            PROSPER_r2_test, PROSPER_r_y_test_correlation_ratio,\
            LASSOSUMExtLD_r2_test, LASSOSUMExtLD_r_y_test_correlation_ratio,\
            TLMULTI_r2_test, TLMULTI_r_y_test_correlation_ratio,\
            MPPPRS_r2_test, MPPPRS_r_y_test_correlation_ratio,\
            MPPPRSplus_r2_test, MPPPRSplus_r_y_test_correlation_ratio,\
            LASSOSUMTrueLD_r2_test, LASSOSUMTrueLD_r_y_test_correlation_ratio)

In [5]:
mppgwas_metric_avg = []
mppgwas_metric_std = []
mppgwastarss_metric_avg = []
mppgwastarss_metric_std = []
mppgwasadmix_metric_avg = []
mppgwasadmix_metric_std = []
baseline_metric_avg = []
baseline_metric_std = []
prscsx_metric_avg = []
prscsx_metric_std = []
prosper_metric_avg = []
prosper_metric_std = []
lassosumextLD_metric_avg = []
lassosumextLD_metric_std = []
tlmulti_metric_avg = []
tlmulti_metric_std = []
mppprs_metric_avg = []
mppprs_metric_std = []
mppprsplus_metric_avg = []
mppprsplus_metric_std = []
lassosumtrueLD_metric_avg = []
lassosumtrueLD_metric_std = []

for j in range(len(exp_list)):
    
    MPPGWAS_r2_test, MPPGWAS_r_y_test_correlation_ratio,\
    MPPGWASTarSS_r2_test, MPPGWASTarSS_r_y_test_correlation_ratio,\
    MPPGWASAdmix_r2_test, MPPGWASAdmix_r_y_test_correlation_ratio,\
    BASELINE_r2_test, BASELINE_r_y_test_correlation_ratio,\
    PRSCSX_r2_test, PRSCSX_r_y_test_correlation_ratio,\
    PROSPER_r2_test, PROSPER_r_y_test_correlation_ratio,\
    LASSOSUMExtLD_r2_test, LASSOSUMExtLD_r_y_test_correlation_ratio,\
    TLMULTI_r2_test, TLMULTI_r_y_test_correlation_ratio,\
    MPPPRS_r2_test, MPPPRS_r_y_test_correlation_ratio,\
    MPPPRSplus_r2_test, MPPPRSplus_r_y_test_correlation_ratio,\
    LASSOSUMTrueLD_r2_test, LASSOSUMTrueLD_r_y_test_correlation_ratio = multiple_evals(exp_list[j])
    
    mppgwas_metric_avg.append(np.mean(MPPGWAS_r_y_test_correlation_ratio))
    mppgwas_metric_std.append(np.std(MPPGWAS_r_y_test_correlation_ratio))
    mppgwastarss_metric_avg.append(np.mean(MPPGWASTarSS_r_y_test_correlation_ratio))
    mppgwastarss_metric_std.append(np.std(MPPGWASTarSS_r_y_test_correlation_ratio))
    if ('EurOnly' in exp_list[j][0]) or ('EasOnly' in exp_list[j][0]) or ('AmrOnly' in exp_list[j][0]) or ('AfrOnly' in exp_list[j][0]):
        mppgwasadmix_metric_avg.append(None)
        mppgwasadmix_metric_std.append(None)
    else:
        mppgwasadmix_metric_avg.append(np.mean(MPPGWASAdmix_r_y_test_correlation_ratio))
        mppgwasadmix_metric_std.append(np.std(MPPGWASAdmix_r_y_test_correlation_ratio))
    baseline_metric_avg.append(np.mean(BASELINE_r_y_test_correlation_ratio))
    baseline_metric_std.append(np.std(BASELINE_r_y_test_correlation_ratio))
    prscsx_metric_avg.append(np.mean(PRSCSX_r_y_test_correlation_ratio))
    prscsx_metric_std.append(np.std(PRSCSX_r_y_test_correlation_ratio))
    prosper_metric_avg.append(np.mean(PROSPER_r_y_test_correlation_ratio))
    prosper_metric_std.append(np.std(PROSPER_r_y_test_correlation_ratio))
    lassosumextLD_metric_avg.append(np.mean(LASSOSUMExtLD_r_y_test_correlation_ratio))
    lassosumextLD_metric_std.append(np.std(LASSOSUMExtLD_r_y_test_correlation_ratio))
    if ('EurOnly' in exp_list[j][0]) or ('EasOnly' in exp_list[j][0]) or ('AmrOnly' in exp_list[j][0]) or ('AfrOnly' in exp_list[j][0]):
        tlmulti_metric_avg.append(np.mean(TLMULTI_r_y_test_correlation_ratio))
        tlmulti_metric_std.append(np.std(TLMULTI_r_y_test_correlation_ratio))
    else:
        tlmulti_metric_avg.append(None)
        tlmulti_metric_std.append(None)
    mppprs_metric_avg.append(np.mean(MPPPRS_r_y_test_correlation_ratio))
    mppprs_metric_std.append(np.std(MPPPRS_r_y_test_correlation_ratio))
    mppprsplus_metric_avg.append(np.mean(MPPPRSplus_r_y_test_correlation_ratio))
    mppprsplus_metric_std.append(np.std(MPPPRSplus_r_y_test_correlation_ratio))
    lassosumtrueLD_metric_avg.append(np.mean(LASSOSUMTrueLD_r_y_test_correlation_ratio))
    lassosumtrueLD_metric_std.append(np.std(LASSOSUMTrueLD_r_y_test_correlation_ratio))

    
mean_data = {
    'Baseline GWAS': baseline_metric_avg,
    'Lassosum-ExtLD': lassosumextLD_metric_avg,
    'Lassosum-TrueLD': lassosumtrueLD_metric_avg,
    'TL-Multi': tlmulti_metric_avg,
    'PRS-CSx': prscsx_metric_avg,
    'PROSPER': prosper_metric_avg,
    'MPP-GWAS': mppgwas_metric_avg,
    'MPP-GWAS-TarSS': mppgwastarss_metric_avg,
    'MPP-GWAS-Admixture': mppgwasadmix_metric_avg,
    'MPP-PRS': mppprs_metric_avg,
    'MPP-PRS+': mppprsplus_metric_avg
}

std_data = {
    'Baseline GWAS': baseline_metric_std,
    'Lassosum-ExtLD': lassosumextLD_metric_std,
    'Lassosum-TrueLD': lassosumtrueLD_metric_std,
    'TL-Multi': tlmulti_metric_std,
    'PRS-CSx': prscsx_metric_std,
    'PROSPER': prosper_metric_std,
    'MPP-GWAS': mppgwas_metric_std,
    'MPP-GWAS-TarSS': mppgwastarss_metric_std,
    'MPP-GWAS-Admixture': mppgwasadmix_metric_std,
    'MPP-PRS': mppprs_metric_std,
    'MPP-PRS+': mppprsplus_metric_std
}

In [6]:
mean_data

{'Baseline GWAS': [0.6286325841173471],
 'Lassosum-ExtLD': [0.5662045904840006],
 'Lassosum-TrueLD': [0.7023172630516709],
 'TL-Multi': [None],
 'PRS-CSx': [0.5891572654704783],
 'PROSPER': [0.7179741982130976],
 'MPP-GWAS': [0.7434834654497393],
 'MPP-GWAS-TarSS': [0.6585907564792468],
 'MPP-GWAS-Admixture': [0.7324160098151065],
 'MPP-PRS': [0.6783073164399984],
 'MPP-PRS+': [0.7420796861754739]}

In [7]:
std_data

{'Baseline GWAS': [0.016112932808095143],
 'Lassosum-ExtLD': [0.014924566915248343],
 'Lassosum-TrueLD': [0.01986346139804709],
 'TL-Multi': [None],
 'PRS-CSx': [0.009302009635751476],
 'PROSPER': [0.0163591763961227],
 'MPP-GWAS': [0.0223386436302076],
 'MPP-GWAS-TarSS': [0.018145025486233337],
 'MPP-GWAS-Admixture': [0.024081429579366084],
 'MPP-PRS': [0.04331575050957168],
 'MPP-PRS+': [0.012963450504936897]}