Reading in the data and then filtering out SNPs which do not meet p value significance before joining the data together.

In [1]:
%load_ext autoreload
%autoreload 2

import polars as pl

# exp: Exposure
# out: Outcome
# ea: Exposure allele
# oa: Other allele

exp_header_dict = {
  'rsID':'rsid',
  'CHROM':'chr_exp',
  'ALT':'ea_exp',
  'REF':'oa_exp',
  'POOLED_ALT_AF':'eaf_exp',
  'EFFECT_SIZE':'beta_exp',
  'SE':'se_exp',
  'pvalue':'pval_exp'
}

out_header_dict = {
  'markername':'rsid',
  'chr':'chr_out',
  'bp_hg19':'pos_out',
  'effect_allele':'ea_out',
  'noneffect_allele':'oa_out',
  'effect_allele_freq':'eaf_out',
  'beta':'beta_out',
  'se_dgc':'se_out',
  'p_dgc':'pval_out'}

pthresh = 5e-8

# Renaming columns and filtering data to only include observations which fulfill significance threshold
dexp = (pl.scan_csv("dataset/ldlc_gwas.txt",separator="\t")
        .rename(exp_header_dict)
        .filter((pl.col('pval_exp') < pthresh)))
dout = (pl.scan_csv("dataset/mi_gwas.tsv",separator="\t")
        .rename(out_header_dict))

combined = (dexp.join(dout, on='rsid')
            # Convert all data to lowercase
            .with_columns(
                pl.col('ea_exp').str.to_lowercase(),
                pl.col('oa_exp').str.to_lowercase(),
                pl.col('ea_out').str.to_lowercase(),
                pl.col('oa_out').str.to_lowercase(),
            # Convert minor allele freq to effect allele freq
                pl.col('eaf_exp').mul(-1).add(1))
            .collect())

print(combined.shape)

(75089, 26)


We can use the provided harmonize function.

In [2]:
from MR.harmonize import harmonize

total = harmonize(combined, palindromic_action=1, palindromic_threshold=0.08)
print(total.shape)

# Seeing if duplicates are dropped
print(total.unique(subset=['rsid']).shape)

(73095, 26)
(73095, 26)


Clumping the data based on Linkage Disequilibirum (LD). This is to filter out SNPs which are closely correlated in order to essentially avoid double counting results into the final calculation.

In [3]:
from MR.ld import ld_clump

pruned_rsids = ld_clump(total['rsid'], total['pval_exp'])

processed_data = (total.join(pruned_rsids, on='rsid'))

print(processed_data.shape)

(379, 26)


In [4]:
from MR.ld import ld_matrix

# print(ld_matrix(processed_data['rsid']))

Calculating causal effects.

In [5]:
# print('MR Presso')
# result = calculate_effect(processed_data, 'presso')
# print(f'Effect: {result["effect"]}')
# print(f'se: {result["se"]}\n')

from MR.mr_methods.egger_regression import mr_egger_regression
from MR.mr_methods.inverse_variance_weighted import mr_inverse_variance_weighted
from MR.mr_methods.maximum_likelihood import mr_maximum_likelihood
from MR.mr_methods.median import mr_penalised_weighted_median, mr_simple_median, mr_weighted_median
from MR.mr_methods.mode import mr_simple_mode, mr_weighted_mode
from MR.mr_methods.presso import mr_remove_outliers
from MR.mr_methods.wald_ratio import mr_wald_ratio


data = mr_remove_outliers(processed_data, 'cooks distance')

print('Inverse Variance Weighted')
result = mr_inverse_variance_weighted(data['beta_exp'], data['beta_out'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

print('Wald ratio')
result = mr_wald_ratio(data['beta_exp'], data['beta_out'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

print('Simple Median')
result = mr_simple_median(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'], 1000)
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

print('Weighted Median')
result = mr_weighted_median(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

# print('Penalised Weighted Median')
# result = mr_penalised_weighted_median(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
# print(f'Effect: {result["effect"]}')
# print(f'se: {result["se"]}')
# print(f'pval: {result["pval"]}\n')

print('Egger Regression')
result = mr_egger_regression(data['beta_exp'], data['beta_out'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

print('Simple Mode')
result = mr_simple_mode(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

print('Weighted Mode')
result = mr_weighted_mode(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')

# print('Penalised Weighted Mode')
# result = mr_penalised_weighted_mode(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
# print(f'Effect: {result["effect"]}')
# print(f'se: {result["se"]}')
# print(f'pval: {result["pval"]}\n')

print('Maximum Likelihood')
result = mr_maximum_likelihood(data['beta_exp'], data['beta_out'], data['se_exp'], data['se_out'])
print(f'Effect: {result["effect"]}')
print(f'se: {result["se"]}')
print(f'pval: {result["pval"]}\n')


Inverse Variance Weighted
Effect: 0.5079116967099213
se: 0.043636846224405335
pval: 0.0

Wald ratio
Effect: shape: (352,)
Series: 'beta_out' [f64]
[
	0.599166
	0.924326
	0.557665
	-0.757966
	-0.417966
	0.638072
	-1.151759
	1.923373
	1.456407
	-0.695233
	-1.150925
	2.173613
	…
	0.262104
	1.883298
	-0.211527
	1.919243
	-1.312231
	-0.126507
	1.448315
	-2.366599
	1.868123
	1.161532
	0.247398
	2.120737
	0.972098
]
se: shape: (352,)
Series: 'se_out' [f64]
[
	0.387151
	1.099366
	0.158785
	0.701223
	0.789399
	1.09859
	0.54861
	1.186723
	1.150618
	1.227839
	0.960803
	0.597147
	…
	0.449429
	0.616606
	1.291699
	1.045216
	0.921131
	0.672134
	1.157983
	0.976417
	0.59601
	1.334014
	0.911858
	0.890098
	0.802859
]
pval: [1.21712222e-01 4.00470840e-01 4.44631578e-04 2.79732944e-01
 5.96476949e-01 5.61368675e-01 3.57804999e-02 1.05072801e-01
 2.05598489e-01 5.71240978e-01 2.30964412e-01 2.72638445e-04
 9.97981035e-01 3.68283422e-01 1.55792870e-01 3.18106738e-05
 5.46675187e-01 1.99626012e-02 2.42382859e