-
Notifications
You must be signed in to change notification settings - Fork 0
/
03-gwas.sh
executable file
·129 lines (111 loc) · 3.92 KB
/
03-gwas.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash
# strict stop if there are any errors
set -e
# get environmental variables
source config.env
# create results directory
mkdir -p ${results_dir}/03
# log everything from this script to a logfile in the results director
exec &> >(tee ${results_dir}/03/logfile${1})
# Inputs:
# - sparse GRM - 01
# - PCs - 01
# - genotype data - 00
# - phenotypes - 02
# - covariates - 00
# Processes:
# - FastGWA per phen x age x ancestry
# Output:
# - GWAS summary stats per phen x age x ancestry
# - results/03/phen_<phencode>_<ancestry>_<age>.*
# Allow specific analysis to be run
# Can take any number between 1:ngwas where ngwas is the number of rows in ${phenotype_processed_dir}/phenolist
index=$1
nphen=`cat ${phenotype_processed_dir}/phenolist | wc -l`
if [ -z $index ]
then
echo "Running all $nphen GWASs"
else
re='^[0-9]+$'
if ! [[ $index =~ $re ]] ; then
echo "error: Index variable is not a number"
echo "Usage: ${0} [index number]"
exit 1
fi
if [ "$index" -gt "$nphen" ] ; then
echo "error: Index is larger than number of phenotypes"
echo "Usage: ${0} [index number]"
exit 1
fi
echo "Running $index of $nphen GWASs"
fi
## TODO
# copy bim files over to results/03
# Do GWAS for each phenotype
i=1
phenolist=( $(cat ${phenotype_processed_dir}/phenolist) )
for phen in ${phenolist[@]}
do
if [ -z $index ] || [ "$index" -eq "$i" ] ; then
filename=$(basename -- ${phen})
filename="${filename%.*}"
echo $filename
covs=$(echo $phen | sed 's/.phen$/.covs/1')
echo $covs
echo "0" > ${phen}.flag
if [ "$env_family_data" == "true" ]
then
echo "family"
( ./bin/gcta-1.94.1 \
--mbfile ${genotype_processed_dir}/geno_chrs.txt \
--fastGWA-mlm \
--grm-sparse ${genotype_processed_dir}/${bfile_prefix} \
--exclude ${genotype_processed_dir}/bfiles/vremove \
--pheno ${phen} \
--qcovar ${covs} \
--thread-num ${env_threads} \
--maf 0 \
--geno 1 \
--out ${results_dir}/03/${filename} ) \
|| ( echo "1" > ${phen}.flag )
flag=`cat ${phen}.flag`
echo $flag
if [ "$flag" -eq "1" ] ; then
echo "LMM failed. Trying linear model using unrelateds only"
./bin/gcta-1.94.1 \
--mbfile ${genotype_processed_dir}/geno_chrs.txt \
--fastGWA-lr \
--exclude ${genotype_processed_dir}/bfiles/vremove \
--keep ${genotype_processed_dir}/scratch/kingunrelated.txt \
--pheno ${phen} \
--qcovar ${covs} \
--thread-num ${env_threads} \
--maf 0 \
--geno 1 \
--out ${results_dir}/03/${filename}
fi
else
echo "not family"
./bin/gcta-1.94.1 \
--mbfile ${genotype_processed_dir}/geno_chrs.txt \
--fastGWA-lr \
--pheno ${phen} \
--exclude ${genotype_processed_dir}/bfiles/vremove \
--keep ${genotype_processed_dir}/scratch/kingunrelated.txt \
--qcovar ${covs} \
--thread-num ${env_threads} \
--maf 0 \
--geno 1 \
--out ${results_dir}/03/${filename}
fi
# compress GWAS
# keep only b, se, af, n because all other info is constant across GWASs
# if space is a real issue could sacrifice af and n
# add variantid
echo "Compressing output..."
Rscript resources/genotypes/compress_gwas.r ${results_dir}/03/${filename}.fastGWA
rm ${results_dir}/03/${filename}.fastGWA
fi
i=$((i+1))
done
echo "Successfully performed GWAS of all phenotypes!"