purpose: run predixcan and FUSION TWAS on externalizing1.0 data

# S-PrediXcan

In [None]:
#!/bin/bash
#SBATCH --job-name predixcan_ext
#SBATCH --partition condo
#SBATCH --qos condo
#SBATCH --nodes 1
#SBATCH -a 1-13 
#SBATCH -c 4
#SBATCH -t 2:00:00
#SBATCH --mem-per-cpu 8G
#SBATCH -o /tscc/nfs/home/bsleger/bsl/SUD_cross_species/job_run_out/predixcan_ext-%j.o
#SBATCH -e /tscc/nfs/home/bsleger/bsl/SUD_cross_species/job_run_out/predixcan_ext-%j.e
#SBATCH --mail-type END,FAIL
#SBATCH --mail-user bsleger@ucsd.edu
#SBATCH --account csd795

#cd /tscc/projects/ps-palmer/brittany/MetaXcan/GTEx/brain_models/
db_list=(`ls *.db`) #len=13 change back for rerun all

#failed_rerun=( en_Brain_Caudate_basal_ganglia.db en_Brain_Cerebellar_Hemisphere.db en_Brain_Cerebellum.db en_Brain_Cortex.db en_Brain_Hypothalamus.db en_Brain_Putamen_basal_ganglia.db )

cd /tscc/projects/ps-palmer/brittany/MetaXcan/
source activate imlabtools

m=${db_list[$SLURM_ARRAY_TASK_ID-1]}
#m=${failed_rerun[$SLURM_ARRAY_TASK_ID-1]}
echo $m

software/SPrediXcan.py \
--model_db_path  "GTEx/brain_models/"$m \
--covariance GTEx/gtex_v8_expression_elastic_net_snp_smultixcan_covariance.txt.gz \
--gwas_file /tscc/projects/ps-palmer/brittany/SUD_cross_species/ext_sumstat_2019/FINAL.EXT_COMMON_FACTOR.EXTERNALIZING.20191014.PREPARED.wFREQ.A1.txt.gz \
--snp_column SNP \
--effect_allele_column A1 \
--non_effect_allele_column A2 \
--beta_column BETA.A1 \
--pvalue_column P \
--output_file "results/predixcan_externalizing2019_"${m##*/}".csv"


# FUSION association

In [None]:
#decompress all of the GTEx models 
cd "${FUS_PATH}/WEIGHTS"
for f in *.tar.bz2; do
    tar xjf $f 
done

The primary input is genome-wide summary statistics in LD-score format. At minimum, this is a flat file with a header row containing the following fields:

SNP – SNP identifier (rsID)
A1 – first allele (effect allele)
A2 – second allele (other allele)
Z – Z-scores, sign with respect to A1.

## run fusion- script

In [None]:
#!/bin/bash
#SBATCH --job-name ext_FUSION
#SBATCH --partition condo
#SBATCH --qos condo
#SBATCH -a 1-9
#SBATCH --time 2:00:00
#SBATCH --nodes 1
#SBATCH --cpus-per-task 4
#SBATCH --mem-per-cpu 4G
#SBATCH -o /tscc/nfs/home/bsleger/bsl/SUD_cross_species/ext_FUSION-%j.o
#SBATCH -e /tscc/nfs/home/bsleger/bsl/SUD_cross_species/ext_FUSION-%j.e
#SBATCH --mail-type END,FAIL
#SBATCH --mail-user bsleger@ucsd.edu
#SBATCH --account csd795



FUS_PATH='/tscc/nfs/home/bsleger/bsl/fusion_twas-master/'
SUD_PATH='/tscc/projects/ps-palmer/brittany/SUD_cross_species/'
DATA_FILE='ext_sumstat_2019/FINAL.EXT_COMMON_FACTOR.EXTERNALIZING.20191014.PREPARED.wFREQ.A1.txt'
OUT_PATH="${SUD_PATH}ext_FUSION/"
OUT_PREF="ext2019"


cd "${FUS_PATH}WEIGHTS"
db_list=(`ls GTEx.Brain*.pos`)
# echo ${#db_list[*]} 
#db list ls 9 long - make job array that's 1-9

cd $FUS_PATH

source activate lzenv

TISSUE=${db_list[$SLURM_ARRAY_TASK_ID-1]}
#m=${db_list[1]}
echo $TISSUE


for ((CHR = 1; CHR < 23; CHR++));
do
    echo $CHR
    OUT=${OUT_PATH}${OUT_PREF}_${TISSUE}_${CHR}.dat
    echo $OUT
    Rscript FUSION.assoc_test.R \
    --sumstats $SUD_PATH$DATA_FILE \
    --weights "./WEIGHTS/"${TISSUE} \
    --weights_dir ./WEIGHTS/ \
    --ref_ld_chr ./LDREF/1000G.EUR. \
    --chr $CHR \
    --out $OUT
done

## concat results together

### Python- get set of prefixes 

In [1]:
import pandas as pd
import os

In [2]:
os.chdir("/tscc/projects/ps-palmer/brittany/SUD_cross_species/ext_FUSION")

In [3]:
files=os.listdir()

In [8]:
files
if 'FUSION_concat' in files:
    files.remove('FUSION_concat')

In [13]:
prefixes=set(map(lambda string: string.split(".")[0]+'.'+string.split(".")[1], files))
if '.ipynb_checkpoints' in prefixes:
    prefixes.remove('.ipynb_checkpoints')

In [15]:
str='( '
for f in prefixes:
    str=str+"'"+f+"' "
    
str=str[0:len(str)-1]+' )'
print(str)

( 'ext2019_GTEx.Brain_Caudate_basal_ganglia' 'ext2019_GTEx.Brain_Cortex' 'ext2019_GTEx.Brain_Putamen_basal_ganglia' 'ext2019_GTEx.Brain_Hippocampus' 'ext2019_GTEx.Brain_Nucleus_accumbens_basal_ganglia' 'ext2019_GTEx.Brain_Hypothalamus' 'ext2019_GTEx.Brain_Cerebellum' 'ext2019_GTEx.Brain_Frontal_Cortex_BA9' 'ext2019_GTEx.Brain_Cerebellar_Hemisphere' )


### bash concat files together

In [1]:
#bash
#run to concat all the files 
cd /tscc/projects/ps-palmer/brittany/SUD_cross_species/ext_FUSION
#make directory for concated files
if [ ! -d "FUSION_concat" ]; then
  mkdir FUSION_concat

fi

prefixes=( 'ext2019_GTEx.Brain_Caudate_basal_ganglia' 'ext2019_GTEx.Brain_Cortex' 'ext2019_GTEx.Brain_Putamen_basal_ganglia' 'ext2019_GTEx.Brain_Hippocampus' 'ext2019_GTEx.Brain_Nucleus_accumbens_basal_ganglia' 'ext2019_GTEx.Brain_Hypothalamus' 'ext2019_GTEx.Brain_Cerebellum' 'ext2019_GTEx.Brain_Frontal_Cortex_BA9' 'ext2019_GTEx.Brain_Cerebellar_Hemisphere' )

for p in ${prefixes[@]}; do
    for CHR in {1..22}; do
        # Define the file
        file="${p}.pos_${CHR}.dat"
        echo $file
        if [ ${CHR} -eq 1 ];
        then
            cp $file FUSION_concat/${p}.dat
        else
            awk 'FNR>1' $file >> FUSION_concat/${p}.dat
            
        fi
        if [ $CHR -eq 6 ];
        then
            file="${p}.pos_${CHR}.dat.MHC"
            echo $file
            awk 'FNR>1' $file >> FUSION_concat/${p}.dat

        fi   
    done
done

# makes files  /tscc/projects/ps-palmer/brittany/SUD_cross_species/nicsa_gwas/results/gwas/mlma_concat/regressedlr_cigday_pc1.mlma
# /tscc/projects/ps-palmer/brittany/SUD_cross_species/nicsa_gwas/results/gwas/mlma_concat/regressedlr_cigday_pc1.mlma

ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_1.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_2.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_3.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_4.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_5.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_6.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_6.dat.MHC
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_7.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_8.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_9.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_10.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_11.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_12.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_13.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_14.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_15.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_16.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_17.dat
ext2019_GTEx.Brain_Caudate_basal_ganglia.pos_18.dat
ext2019_GTEx.Brain

predixcan and fusion results compared in a different notebook :) 

SUD_cross_species/scripts/TWAS_FUSION_predixcan_comparison_human_rat.ipynb