In [37]:
import pandas as pd
import math
from collections import defaultdict
from functools import cmp_to_key
import itertools
import statsmodels.stats.multitest as multi

def isNaN(string):
    return string != string


In [38]:

fname = r'/Users/Dasha/work/UMCG/data/MR/results2/AA_T2D/BCAA_rebuttal_Dasha/MR/MR_results_rebuttal4.xlsx'
sh1 = 'pheno-BCAA_rm_BMI_SNPs'
sh2 = 'BCAA-pheno_rm_BMI_SNPs'
#sh1 = "UKB-mb"
#sh2 = "mb-UKB"
df1 = pd.read_excel (fname, sheet_name=sh1)
df2 = pd.read_excel (fname, sheet_name=sh2)
df1['exposure'] = [x for (x,y) in df1['exposure'].str.split(" \|\| ").to_list()]
df2['outcome'] = [x for (x,y) in df2['outcome'].str.split(" \|\| ").to_list()]

df1['tmp'] = df1['id.exposure'] + ":" + df1['outcome']
df2['tmp'] = df2['id.outcome'] + ":" + df2['exposure']

df1new = pd.merge(df1, df2[['tmp', 'pval']], on ='tmp', how ='left')
df1new.rename(columns = {"pval_x" : "pval", "pval_y" : "reverse_MR_pval"}, inplace=True)

df2new = pd.merge(df2, df1[['tmp', 'pval']], on ='tmp', how ='left')
df2new.rename(columns = {"pval_x" : "pval", "pval_y" : "reverse_MR_pval"}, inplace=True)

df1new.drop('tmp', inplace=True, axis=1)
df2new.drop('tmp', inplace=True, axis=1)


In [39]:

# Write all filters that failed into one field
def get_failed_flts(row):
    flt_line = "-"
    if int(row['nsnp']) < 3:
        flt_line += ';Number of SNPs'
    if not isNaN(row['egger_intercept_pval']):
        if float(row['egger_intercept_pval']) < 0.05:
            flt_line += ';Egger intercept p-value'
    if not isNaN(row['weighted_median_pval']):        
        if float(row['weighted_median_pval']) > 0.05:
            flt_line += ';Weighted median p-value'
    if not isNaN(row['mr_presso_global']):        
        if not isNaN(row['mr_presso_outlier_cor_pval']):
            if row['mr_presso_global'] == '<1e-04':
                if float(row['mr_presso_outlier_cor_pval']) > 0.05:
                    flt_line += ';MR PRESSO outlier test'
            elif float(row['mr_presso_global']) < 0.05 and float(row['mr_presso_outlier_cor_pval']) > 0.05:
                flt_line += ';MR PRESSO outlier test'
    if not isNaN(row['mr_presso_pval']):
        if float(row['mr_presso_pval']) > 0.05:
            flt_line += ';MR PRESSO p-value'
    # Leave-one-out updated to filter out only cases with exactly 1 p-value > 0.05
    loo_pvals = row['leave_one_out_pval']
    pval_cnt = 0
    if not isNaN(loo_pvals):
        for pval in map(float, loo_pvals.split(",")):
            if pval > 0.05:
                pval_cnt += 1
        if pval_cnt == 1:
            flt_line += ';Leave-one-out analysis'
        #if not isNaN(row['reverse_MR_pval']):
        #    if float(row['reverse_MR_pval']) < 0.05:
        #        flt_line += ';Reverse MR p-value'
    return(flt_line.replace("-;", "", 1))


In [40]:

df1new['failed_filters'] = df1new.apply (lambda row: get_failed_flts(row), axis=1)
df2new['failed_filters'] = df2new.apply (lambda row: get_failed_flts(row), axis=1)


In [35]:

# Write the intermediate tables with failed filters column but without removing anything
with pd.ExcelWriter(fname, mode='a') as writer:  
    df1new.to_excel(writer, sheet_name=sh1+'_all', na_rep = 'NA')
    df2new.to_excel(writer, sheet_name=sh2+'_all', na_rep = 'NA')


In [41]:

df1new_flt = df1new[(df1new.nsnp > 2)]
df2new_flt = df2new[(df2new.nsnp > 2)]

df1new_flt['BH_qval'] = multi.multipletests(df1new_flt['pval'], method = 'fdr_bh')[1]
df2new_flt['BH_qval'] = multi.multipletests(df2new_flt['pval'], method = 'fdr_bh')[1]

# Write the tables with basic filters and BH correction applied
with pd.ExcelWriter(fname, mode='a') as writer:  
    df1new_flt.to_excel(writer, sheet_name=sh1+'_flt', na_rep = 'NA')
    df2new_flt.to_excel(writer, sheet_name=sh2+'_flt', na_rep = 'NA')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1new_flt['BH_qval'] = multi.multipletests(df1new_flt['pval'], method = 'fdr_bh')[1]
