In [None]:
import pandas as pd
import subprocess
import sys
import numpy as np
import os
import shutil
import joblib

In [None]:
def shell_do(command, log=False, return_log=False):
    print(f'Executing: {(" ").join(command.split())}', file=sys.stderr)

    res=subprocess.run(command.split(), stdout=subprocess.PIPE)

    if log:
        print(res.stdout.decode('utf-8'))
    if return_log:
        return(res.stdout.decode('utf-8'))

In [None]:
def reformat_bim(bim_path):
    # read bim file, name columns
    bim = pd.read_csv(bim_path, sep='\s+', header=None)
    bim.columns = ['CHR','ID','LOC','BP','ALT','REF']
    print(bim.head())
    print(bim.shape)
    
    # change to chr:basepair:ref:alt format and write to file
    bim['ID'] = bim['CHR'].astype(str) + ':' + bim['BP'].astype(str) + ':' + bim['REF'] + ':' + bim['ALT']
    print(bim.head())
    bim.to_csv(bim_path, sep='\t', index=None, header=None)

In [None]:
def liftover_genotypes(path_dict, bash_path):
    # write swarm file
    with open('liftover.swarm', 'w') as f:
        for cohort in path_dict:
            f.write(f"bash {bash_path} -i {path_dict[cohort]['geno']} -d {path_dict[cohort]['out_dir']} -o {path_dict[cohort]['out']}\n")
        f.close()
    
    # queue swarm job
    shell_do('swarm -f liftover.swarm -g 500 -t 32 --partition largemem --time 10-00:00:00 --module python/3.7,plink/1.9')

In [None]:
def liftover_stats(path_dict, bash_path):
    # need to set up per-user R library download bigsnpr
    with open('liftover_stats.swarm', 'w') as f:
        for cohort in path_dict:
            f.write(f"bash {bash_path} -i {path_dict[cohort]['stats_path']} -c {path_dict[cohort]['chrom']} -p {path_dict[cohort]['pos']} -o {path_dict[cohort]['out_path']}\n")
        f.close()
    
    shell_do('swarm -f liftover_stats.swarm -g 50 --module R')

In [None]:
# project path
wd = f'insert_path'

# liftover bash path
liftover_bash_path = f'{wd}/processing/liftover/liftover.sh'

# liftover stats bash path
liftover_stats_bash_path = f'{wd}/processing/liftover/liftover_stats.sh'

# liftover path dict
liftover_path_dict = {'JG':{}}

# liftover stats path dict
liftover_stats_path_dict = {'AD':{}, 'PD':{}, 'ALS':{}, 'FTD':{}}

In [None]:
# jg paths
jg_geno_path = f'{wd}/ROSMAPMayoRNAseqMSBB/joint_genotyping/qc/jointGenotypingROSMAPMayoRNAseqMSBB_pheno_qc'
jg_out_dir = f'{wd}/ROSMAPMayoRNAseqMSBB/joint_genotyping/lifted'
os.makedirs(jg_out_dir, exist_ok=True)
jg_out_path = f'{jg_out_dir}/jointGenotypingROSMAPMayoRNAseqMSBB_pheno_qc_lifted'

In [None]:
# adding to path dict
liftover_path_dict['JG']['geno'] = jg_geno_path
liftover_path_dict['JG']['out_dir'] = jg_out_dir
liftover_path_dict['JG']['out'] = jg_out_path

In [None]:
liftover_genotypes(liftover_path_dict, liftover_bash_path)

In [None]:
# reformat bim files once lifted
reformat_bim(f'{jg_out_path}.bim')

In [None]:
# path to summary stats liftover script
liftover_path = f'{wd}/processing/liftover/liftover_summary_stats.R'

In [None]:
# read in PD stats
pd_stats_path_hg19 = 'insert_pd_stats_path'
pd_stats = pd.read_csv(pd_stats_path_hg19, sep='\s+')

# create chr and pos columns from SNP
pd_stats['chr'] = pd_stats['SNP'].str.split(':').str[0]
pd_stats['chr'] = pd_stats['chr'].str.split('r').str[1]
pd_stats['pos'] = pd_stats['SNP'].str.split(':').str[1]

# wirte to file
pd_stats_path = f'{wd}/processing/sum_stats/nallsEtAl2019.txt'
pd_stats.to_csv(pd_stats_path, sep='\t', index=None)

In [None]:
# Schwartzentruber weights (AD summary stats)
ad_stats_path = 'insert_ad_stats_path'
ad_stats_out_path = f'{wd}/processing/sum_stats/Schwartzentruber_2021_lifted_hg38.txt'
# name of chromosome/position columns
ad_chrom = 'chromosome'
ad_pos = 'base_pair_location'

# PD summary stats
pd_stats_out_path = f'{wd}/processing/sum_stats/nallsEtAl2019_lifted_hg38.txt'
# name of chromosome/position columns
pd_chrom = 'chr'
pd_pos = 'pos'

# ALS summary stats
als_stats_path = 'insert_als_stats_path'
als_stats_out_path = f'{wd}/processing/sum_stats/alsMetaSummaryStats_lifted_hg38.txt'
# name of chromosome/position columns
als_chrom = 'CHR'
als_pos = 'BP'

# FTD summary stats
ftd_stats_path = 'insert_ftd_stats_path'
ftd_stats_out_path = f'{wd}/processing/sum_stats/Meta-analysis.Matched.AllResults_lifted_hg38.txt'
# name of chromosome/position columns
ftd_chrom = 'Chr'
ftd_pos = 'Bp'

In [None]:
liftover_stats_path_dict['AD']['stats_path'] = ad_stats_path
liftover_stats_path_dict['AD']['out_path'] = ad_stats_out_path
liftover_stats_path_dict['AD']['chrom'] = ad_chrom
liftover_stats_path_dict['AD']['pos'] = ad_pos
liftover_stats_path_dict['PD']['stats_path'] = pd_stats_path
liftover_stats_path_dict['PD']['out_path'] = pd_stats_out_path
liftover_stats_path_dict['PD']['chrom'] = pd_chrom
liftover_stats_path_dict['PD']['pos'] = pd_pos
liftover_stats_path_dict['ALS']['stats_path'] = als_stats_path
liftover_stats_path_dict['ALS']['out_path'] = als_stats_out_path
liftover_stats_path_dict['ALS']['chrom'] = als_chrom
liftover_stats_path_dict['ALS']['pos'] = als_pos
liftover_stats_path_dict['FTD']['stats_path'] = ftd_stats_path
liftover_stats_path_dict['FTD']['out_path'] = ftd_stats_out_path
liftover_stats_path_dict['FTD']['chrom'] = ftd_chrom
liftover_stats_path_dict['FTD']['pos'] = ftd_pos

In [None]:
liftover_stats(liftover_stats_path_dict, liftover_stats_bash_path)