In [None]:
import os
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'

In [None]:
import scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import roc_auc_score

In [None]:
# PRS-CS function
def prs_cs(src='PRScs/PRScs.py',
            ref_dir='../reference_data/1kgp/ldblk_1kg_eas',
            bim_prefix='../target_data/target.data.impute',
            sst_file=None,
            n_gwas=79550,
            chrom='1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22',
            out_dir=None,
            phi=1e-2,
            seed=68):

    os.system(f'python {src} --ref_dir={ref_dir} \
                            --bim_prefix={bim_prefix} \
                            --sst_file={sst_file} \
                            --n_gwas={n_gwas} \
                            --chrom={chrom} \
                            --phi={phi} \
                            --seed={seed} \
                            --out_dir={out_dir}')

def concat_prscs_output(out_dir=None,
                        out_name=None):
    res = pd.DataFrame([], columns=['CHROM', 'SNP', 'POS', 'A1', 'A2', 'pst_eff'])
    for f in os.listdir(out_dir):
        if f.startswith(out_name) and ('pst_eff' in f):
            file_path = os.path.join(out_dir, f)
            temp = pd.read_table(file_path, header=None)
            temp.columns = ['CHROM', 'SNP', 'POS', 'A1', 'A2', 'pst_eff']
            res = pd.concat([res, temp], axis=0)
    return res.sort_values(by=['CHROM', 'POS'], ignore_index=True)

def prscs_score(target_prefix='../target_data/target.data.impute',
                out_dir='../results/breast_cancer/',
                out_name=None):
    prscs_output = concat_prscs_output(out_dir=out_dir, out_name=out_name)
    file_path = os.path.join(out_dir, out_name + '.concat.txt')
    prscs_output.to_csv(file_path, sep='\t', header=None, index=False)
    os.system(f'plink --bfile {target_prefix} \
                        --allow-no-sex \
                        --score {file_path} 2 4 6 \
                        --out {file_path[:-11]}')
    print(f'Output file saved to {file_path[:-11]}.profile \nPRS-CS done!')

## 1. Breast cancer

In [None]:
prs_cs(src='PRScs/PRScs.py',
        ref_dir='/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas',
        bim_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
        sst_file='/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/breast_cancer/hum0197.v3.BBJ.BC.v1/gwas.PRSCS.txt',
        n_gwas=79550,
        out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ',
        phi=1e-2,
        seed=68)



--ref_dir=/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas
--bim_prefix=/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
--sst_file=/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/breast_cancer/hum0197.v3.BBJ.BC.v1/gwas.PRSCS.txt
--a=1
--b=0.5
--phi=0.01
--n_gwas=79550
--n_iter=1000
--n_burnin=500
--thin=5
--out_dir=/home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ
--chrom=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22']
--beta_std=FALSE
--write_psi=FALSE
--write_pst=FALSE
--seed=68


##### process chromosome 1 #####
... parse reference file: /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... 85604 SNPs on chromosome 1 read from /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... parse bim file: /home/sangnv/Desktop/VGP_10_diseases/target_data/target.

In [3]:
prscs_score(target_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
                out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/',
                out_name='prscs.hum0197.v3.BBJ')

PLINK v1.90b6.21 64-bit (19 Oct 2020)          www.cog-genomics.org/plink/1.9/
(C) 2005-2020 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ.log.
Options in effect:
  --allow-no-sex
  --bfile /home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
  --out /home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ
  --score /home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ.concat.txt 2 4 6

64186 MB RAM detected; reserving 32093 MB for main workspace.
5605066 variants loaded from .bim file.
1608 people (0 males, 0 females, 1608 ambiguous) loaded from .fam.
Ambiguous sex IDs written to
/home/sangnv/Desktop/VGP_10_diseases/results/breast_cancer/prscs.hum0197.v3.BBJ.nosex
.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1608 founders and 0 nonfounders present.
Calculating allele frequ

## 2. Colorectal cancer

In [None]:
prs_cs(src='PRScs/PRScs.py',
        ref_dir='/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas',
        bim_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
        sst_file='/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/colorectal_cancer/hum0197.v3.BBJ.CC.v1/gwas.PRSCS.txt',
        n_gwas=167691,
        chrom='1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22',
        out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ',
        phi=1e-2,
        seed=68)



--ref_dir=/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas
--bim_prefix=/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
--sst_file=/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/colorectal_cancer/hum0197.v3.BBJ.CC.v1/gwas.PRSCS.txt
--a=1
--b=0.5
--phi=0.01
--n_gwas=167691
--n_iter=1000
--n_burnin=500
--thin=5
--out_dir=/home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ
--chrom=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22']
--beta_std=FALSE
--write_psi=FALSE
--write_pst=FALSE
--seed=68


##### process chromosome 1 #####
... parse reference file: /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... 85604 SNPs on chromosome 1 read from /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... parse bim file: /home/sangnv/Desktop/VGP_10_diseases/target_dat

In [4]:
prscs_score(target_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
                out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/',
                out_name='prscs.hum0197.v3.BBJ')

PLINK v1.90b6.21 64-bit (19 Oct 2020)          www.cog-genomics.org/plink/1.9/
(C) 2005-2020 Shaun Purcell, Christopher Chang   GNU General Public License v3
Logging to /home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ.log.
Options in effect:
  --allow-no-sex
  --bfile /home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
  --out /home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ
  --score /home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ.concat.txt 2 4 6

64186 MB RAM detected; reserving 32093 MB for main workspace.
5605066 variants loaded from .bim file.
1608 people (0 males, 0 females, 1608 ambiguous) loaded from .fam.
Ambiguous sex IDs written to
/home/sangnv/Desktop/VGP_10_diseases/results/colorectal_cancer/prscs.hum0197.v3.BBJ.nosex
.
Using 1 thread (no multithreaded calculations invoked).
Before main variant filters, 1608 founders and 0 nonfounders present.
Calculat

## 3. Gastric Cancer

In [None]:
prs_cs(src='PRScs/PRScs.py',
        ref_dir='/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas',
        bim_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
        sst_file='/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/gastric_cancer/hum0197.v3.BBJ.GC.v1/gwas.PRSCS.txt',
        n_gwas=167122,
        chrom='1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22',
        out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/gastric_cancer/prscs.hum0197.v3.BBJ',
        phi=1e-2,
        seed=68)



--ref_dir=/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas
--bim_prefix=/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
--sst_file=/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/gastric_cancer/hum0197.v3.BBJ.GC.v1/gwas.PRSCS.txt
--a=1
--b=0.5
--phi=0.01
--n_gwas=167122
--n_iter=1000
--n_burnin=500
--thin=5
--out_dir=/home/sangnv/Desktop/VGP_10_diseases/results/gastric_cancer/prscs.hum0197.v3.BBJ
--chrom=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22']
--beta_std=FALSE
--write_psi=FALSE
--write_pst=FALSE
--seed=68


##### process chromosome 1 #####
... parse reference file: /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... 85604 SNPs on chromosome 1 read from /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... parse bim file: /home/sangnv/Desktop/VGP_10_diseases/target_data/targ

## 4. Parkinson's disease (PD)

In [None]:
prs_cs(src='PRScs/PRScs.py',
        ref_dir='/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas',
        bim_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
        sst_file='/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/pd/hum0197.v3.BBJ.PD.v1/gwas.PRSCS.txt',
        n_gwas=176128,
        out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/pd/prscs.hum0197.v3.BBJ',
        phi=1e-2,
        seed=68)



--ref_dir=/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas
--bim_prefix=/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute
--sst_file=/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/pd/hum0197.v3.BBJ.PD.v1/gwas.PRSCS.txt
--a=1
--b=0.5
--phi=0.01
--n_gwas=176128
--n_iter=1000
--n_burnin=500
--thin=5
--out_dir=/home/sangnv/Desktop/VGP_10_diseases/results/pd/prscs.hum0197.v3.BBJ
--chrom=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22']
--beta_std=FALSE
--write_psi=FALSE
--write_pst=FALSE
--seed=68


##### process chromosome 1 #####
... parse reference file: /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... 85604 SNPs on chromosome 1 read from /home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas/snpinfo_1kg_hm3 ...
... parse bim file: /home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute.bim ...
.

## 5. CKD

In [None]:
prs_cs(src='PRScs/PRScs.py',
        ref_dir='/home/sangnv/Desktop/VGP_10_diseases/reference_data/1kgp/ldblk_1kg_eas',
        bim_prefix='/home/sangnv/Desktop/VGP_10_diseases/target_data/target.data.impute',
        sst_file='/home/sangnv/Desktop/VGP_10_diseases/sum_stats_data/ckd/hum0197.v3.BBJ.CRF.v1/gwas.PRSCS.txt',
        n_gwas=176462,
        out_dir='/home/sangnv/Desktop/VGP_10_diseases/results/ckd/prscs.hum0197.v3.BBJ',
        phi=1e-2,
        seed=68)