# SMR
- Based on SMR_template.txt
- run updated v8 GTEx on all Summary Statistics (including new BD summary statistics)
- automate swarm creation

In [None]:
import zipfile
import pandas as pd
import os
import glob
import numpy as np
import scipy
from scipy.stats import norm
import sys
import subprocess

In [11]:
def shell_do(command, log=False, return_log=False, make_part=False):
    print(f'Executing: {(" ").join(command.split())}', file=sys.stderr)

    if make_part == False:
        res = subprocess.run(command.split(), stdout=subprocess.PIPE)
    else:
        res = subprocess.run(command, shell=True, stdout=subprocess.PIPE)

In [15]:
def SMR(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}.SMR_swarm', 'w') as f:
        f.write('cd .../omicSynth/v8/intermediate_results/ \n')
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Single file per assay starts here. \n')
        f.write('\n')
        f.write('## expression_blood_eQTLgen ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/eQTLgen/cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --ld-upper-limit --out {out_dir}/{disease}_expression_blood_eQTLgen_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.write('## expression_brain_eMeta ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/Brain-eMeta/Brain-eMeta --ld-upper-limit --out {out_dir}/{disease}_expression_brain_eMeta_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.write('## methylation_brain_mMeta ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/regQTLs/Brain-mMeta/Brain-mMeta --ld-upper-limit --out {out_dir}/{disease}_methylation_brain_mMeta_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write('## chromatin_blood_Bryois ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/regQTLs/Bryois_caQTLs/bryois_NatCommun_2018_50kb_cQTLs --ld-upper-limit --out {out_dir}/{disease}_chromatin_blood_Bryois_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.write('# Multi chromsome reference data starts here. \n')
        f.write(' \n')
        
        f.write('## methylation_blood_mcrae ## \n')
        for i in range(1,23): 
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/regQTLs/Blood-mMeta/bl_mqtl_lite_chr{i} --out {out_dir}/{disease}_methylation_blood_mcrae_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.close()
    
    # swarm command
    cmd = f'swarm -b 15 -f .../omicSynth/v8/swarms/{disease}.SMR_swarm -g 48 -t 12 --logdir .../omicSynth/v8/swarms --module SMR'
    
    shell_do(cmd)

def SMR_meta(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}_meta.SMR_swarm', 'w') as f:
        f.write('cd .../omicSynth/intermediate_results/ \n')
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Multi chromsome reference data starts here. \n')
        f.write(' \n')
        
        f.write('## expression_basalganglia_metaBrain ## \n')
        for i in range(1,23):
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/2020-05-26-Basalganglia-EUR/2020-05-26-Basalganglia-EUR-{i}-SMR-besd --out {out_dir}/{disease}_expression_Basalganglia_metaBrain_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel_gr38_metaBrain_variant_names --smr-multi \n')
        f.write(' \n')

        f.write('## expression_cerebellum_metaBrain ## \n')
        for i in range(1,23):
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/2020-05-26-Cerebellum-EUR/2020-05-26-Cerebellum-EUR-{i}-SMR-besd --out {out_dir}/{disease}_expression_Cerebellum_metaBrain_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel_gr38_metaBrain_variant_names --smr-multi \n')
        f.write(' \n')

        f.write('## expression_cortex_metaBrain ## \n')
        for i in range(1,23):    
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/2020-05-26-Cortex-EUR/2020-05-26-Cortex-EUR-{i}-SMR-besd --out {out_dir}/{disease}_expression_Cortex_metaBrain_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel_gr38_metaBrain_variant_names --smr-multi \n')
        f.write(' \n')

        f.write('## expression_hippocampus_metaBrain ## \n')
        for i in range(1,23):  
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/2020-05-26-Hippocampus-EUR/2020-05-26-Hippocampus-EUR-{i}-SMR-besd --out {out_dir}/{disease}_expression_Hippocampus_metaBrain_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel_gr38_metaBrain_variant_names --smr-multi \n')
        f.write(' \n')

        f.write('## expression_spinalchord_metaBrain ## \n')
        for i in range(1,23): 
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/2020-05-26-Spinalcord-EUR/2020-05-26-Spinalcord-EUR-{i}-SMR-besd --out {out_dir}/{disease}_expression_Spinalcord_metaBrain_SMR_chr{i} --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel_gr38_metaBrain_variant_names --smr-multi \n')
        f.write(' \n')
        f.close()
        
    # swarm command
    cmd = f'swarm -b 15 -f .../omicSynth/v8/swarms/{disease}_meta.SMR_swarm -g 48 -t 12 --logdir .../omicSynth/v8/swarms --module SMR'

    shell_do(cmd)

def SMR_gtex(disease, stats_path, work_dir, out_dir, gtex_list):
    with open(f'.../omicSynth/v8/swarms/{disease}_gtex.SMR_swarm', 'w') as f:
        f.write('cd .../micSynth/v8/intermediate_results/ \n')
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# expression_GTEx \n')
        f.write(' \n')

        for part in gtex_list:
            f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/eQTLs/GTEx_v8/eQTL_besd_lite/{part} --out {out_dir}/{disease}_expression_{part}_GTEx_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel  --smr-multi \n')
            
        f.close()
        
    # swarm command
    cmd = f'swarm -b 15 -f .../omicSynth/v8/swarms/{disease}_gtex.SMR_swarm -g 48 -t 12 --logdir .../omicSynth/v8/swarms --module SMR'

    shell_do(cmd)

def SMR_psych(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}_psych.SMR_swarm', 'w') as f:
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Single file per assay starts here. \n')
        f.write('\n')
        f.write('## expression_psychencode prefrontal cortex ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/eQTLs/PsychEncode/DER-08a_hg19_eQTL.significant --out {out_dir}/{disease}_expression_psychEncode_prefrontal_cortex_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
    
    # swarm command
    cmd = f'swarm -b 20 -f ...//omicSynth/v8/swarms/{disease}_psych.SMR_swarm -g 48 --logdir .../omicSynth/v8/swarms --time=12:00:00 --module SMR'
    
    shell_do(cmd)

def SMR_pQTL(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}_atlas.SMR_swarm', 'w') as f:
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Single file per assay starts here. \n')
        f.write('\n')
        f.write('## pQTL CSF ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/pQTLs/atlas/csf_atlas_update --out {out_dir}/{disease}_pQTL_csf_SMR_allChrs --bfile .../additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.write('## pQTL plasma ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/pQTLs/atlas/plasma_atlas_update --out {out_dir}/{disease}_pQTL_plasma_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.write('## pQTL brain ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/pQTLs/atlas/brain_atlas_update --out {out_dir}/{disease}_pQTL_brain_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
    
    # swarm command
    cmd = f'swarm -b 20 -f .../omicSynth/v8/swarms/{disease}_atlas.SMR_swarm -g 48 --logdir .../omicSynth/v8/swarms --time=12:00:00 --module SMR'
    
    shell_do(cmd)

def SMR_eQTL(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}_multiancestry.SMR_swarm', 'w') as f:
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Single file per assay starts here. \n')
        f.write('\n')
        f.write('## eQTL multi ancestry ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary .../omicSynth/SMR_omics/eQTLs/multiancestry/multi_ancestry_eqtl --out {out_dir}/{disease}_eQTL_multiancestry_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')

    # swarm command
    cmd = f'swarm -b 20 -f .../omicSynth/v8/swarms/{disease}_multiancestry.SMR_swarm -g 48 --logdir .../omicSynth/v8/swarms --time=12:00:00 --module SMR'
    
    shell_do(cmd)

def SMR_eqtlGen(disease, stats_path, work_dir, out_dir):
    with open(f'.../omicSynth/v8/swarms/{disease}_eqtlgen.SMR_swarm', 'w') as f:
        f.write('cd .../omicSynth/v8/intermediate_results/ \n')
        f.write('module load SMR \n')
        f.write('\n')
        f.write('\n')
        f.write('# Single file per assay starts here. \n')
        f.write('\n')
        f.write('## expression_blood_eQTLgen ## \n')
        f.write(f'smr --gwas-summary {stats_path} --beqtl-summary {work_dir}/SMR_omics/eQTLs/eQTLgen/cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --out {out_dir}/{disease}_expression_blood_eQTLgen_SMR_allChrs --bfile .../omicSynth/additional_reference_data/1KG_and_refFlats/1kgenomes/1kg_eur_1pct_ref_panel --smr-multi \n')
        f.write(' \n')
        f.close()
    
    # swarm command
    cmd = f'swarm -b 15 -f .../omicSynth/v8/swarms/{disease}_eqtlgen.SMR_swarm -g 48 -t 12 --logdir .../omicSynth/v8/swarms --module SMR'
    
    shell_do(cmd)

In [3]:
v8_GTEx_path = '.../omicSynth/SMR_omics/eQTLs/GTEx_v8/eQTL_besd_lite/'

# define remaining paths
out_dir = '.../omicSynth/v8/intermediate_results'
work_dir = '.../omicSynth'

## All Diseases

### Data Prep

In [4]:
# pull all GTEx paths
gtex_raw = glob.glob(f'{v8_GTEx_path}/*') 

gtex_short = [] # list to hold gtex tissue names
for x in gtex_raw:
    short_dx = x.split('/')[-1].rsplit('.',1)[0] # strip path to obtain the tissue name
    if short_dx not in gtex_short:
        gtex_short.append(short_dx)

In [5]:
dx_list = glob.glob('.../omicSynth/outcome_summary_stats/*.ma') # paths so we can list out names of diseases we have sum stats for
dx_clean = [] 
for x in dx_list:
    if 'metaBrain' not in x:
        dx_clean.append(x.split('/')[-1].rsplit('.')[0]) # strip for disease name
dx_clean.sort() # sort list of diseases in alphabetical order
dx_sum = dx_clean[1:] # cut off AD since we have different stats to use
dx_sum38 = dx_sum.copy()
dx_sum.remove('LBD') # remove LBD to use different stats
dx_sum.remove('FTDold') # remove old FTD sum stats 

In [6]:
# sumstats gr37
sumstats_ad = '.../omicSynth/intermediate_results_AD_bellenguez/AD_hg19_smr.txt'
sumstats_bd = '.../omicSynth/intermediate_results_BD_schiz/BDI_hg19_smr.txt'
sumstats_lbd = '.../omicSynth/lbd_smr/LBD_37.ma'
sumstats = []
for x in dx_sum:
    sumstats.append(glob.glob(f'.../omicSynth/outcome_summary_stats/{x}.ma')[0])

In [7]:
# sumstats gr38
sumstats_meta_ad = '.../omicSynth/intermediate_results_AD_bellenguez/AD_metabrain_version_smr.txt'
sumstats_meta_bd = '.../omicSynth/intermediate_results_BD_schiz/BDI_hg38_smr.txt'
sumstats_meta = []
for x in dx_sum38:
    sumstats_meta.append(glob.glob(f'.../omicSynth/outcome_summary_stats/{x}_metaBrain_gr38_edition.ma')[0])

### GR37 Runs

In [8]:


# define remaining paths
out_dir = '.../omicSynth/v8/inter_test'


In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR('AD',sumstats_ad, work_dir, out_dir)
SMR('BD',sumstats_bd, work_dir, out_dir)
SMR('LBD',sumstats_lbd, work_dir, out_dir)

In [None]:
SMR('FTD_pottier', '.../omicSynth/outcome_summary_stats/FTD_Pottier37.ma', work_dir, out_dir)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR(dx,stat, work_dir, out_dir)

In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_gtex('AD',sumstats_ad, work_dir, out_dir, gtex_short)
SMR_gtex('BD',sumstats_bd, work_dir, out_dir, gtex_short)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR_gtex(dx,stat, work_dir, out_dir, gtex_short)

In [None]:
SMR_gtex('FTD_pottier', '.../omicSynth/outcome_summary_stats/FTD_Pottier37.ma', work_dir, out_dir, gtex_short)

In [13]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_psych('AD',sumstats_ad, work_dir, out_dir)
SMR_psych('BD',sumstats_bd, work_dir, out_dir)

In [None]:
SMR_psych('LBD',sumstats_lbd, work_dir, out_dir)

In [None]:
SMR_psych('PSP', sumstats[-4], work_dir, out_dir)

In [None]:
SMR_psych('FTD_pottier', '.../omicSynth/outcome_summary_stats/FTD_Pottier37.ma', work_dir, out_dir)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR_psych(dx,stat, work_dir, out_dir)

In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_pQTL('AD',sumstats_ad, work_dir, out_dir)
SMR_pQTL('BD',sumstats_bd, work_dir, out_dir)
SMR_pQTL('LBD',sumstats_lbd, work_dir, out_dir)

In [None]:
SMR_pQTL('PSP', sumstats[-4], work_dir, out_dir)

SMR_pQTL('FT2', '.../omicSynth/outcome_summary_stats/FTD_Pottier37.ma', work_dir, out_dir)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR_pQTL(dx,stat, work_dir, out_dir)

In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_eQTL('AD',sumstats_ad, work_dir, out_dir)
SMR_eQTL('BD',sumstats_bd, work_dir, out_dir)
SMR_eQTL('LBD',sumstats_lbd, work_dir, out_dir)

In [None]:
SMR_eQTL('FTD_pottier', '.../omicSynth/outcome_summary_stats/FTD_Pottier37.ma', work_dir, out_dir)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR_eQTL(dx,stat, work_dir, out_dir)

In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_eqtlGen('AD',sumstats_ad, work_dir, out_dir)
SMR_eqtlGen('BD',sumstats_bd, work_dir, out_dir)
SMR_eqtlGen('LBD',sumstats_lbd, work_dir, out_dir)

In [None]:
# run for all other dx
for dx, stat in zip(dx_sum, sumstats):
    SMR_eqtlGen(dx,stat, work_dir, out_dir)

### MetaBrain GR38 Runs

In [None]:
# run SMR process for AD and BD seperate due to different summary stat paths
SMR_meta('AD',sumstats_meta_ad, work_dir, out_dir)
SMR_meta('BD',sumstats_meta_bd, work_dir, out_dir)

In [None]:
SMR_meta('FTD_pottier', '.../omicSynth/outcome_summary_stats/FTD_Pottier37_metaBrain_gr38_edition.ma', work_dir, out_dir)

In [None]:
# g38
for dx, stat in zip(dx_sum, sumstats_meta):
    SMR_meta(dx,stat, work_dir, out_dir)