# Run GxSex

In [None]:
#!/bin/sh

working_dir=/share/storage/REDS-III/GWA_G_by_Sex/rbc_osmotic
imputation_root=/share/storage/REDS-III/RBCOmics/data/imputation/v1/imputations_all_maf0.01/
phenotype_root=${working_dir}

method=palinear

for pheno in SCRN.pink_pct_hemol.adj; do
    for (( chr=1; chr<24; chr++ )); do
        out_file=rbc.1000G_p3.chr$chr.osmotic~gender+age+donfreq+evs+SNPgender.stats
        phenotype_file=pheno_osmotic.txt
        geno_prefix=rbc.ALL.1000G_p3.chr

        /share/storage/REDS-III/common/software/nextflow/nextflow-0.25.1-all \
        /share/storage/REDS-III/common/software/pipelines/_pipeline.association.interaction.out_stats_files.v0.1.nf \
            --final_chunks $imputation_root/chunks/final_chunks.chr$chr \
            --input_pheno $phenotype_root/$phenotype_file \
            --imputation_dir $imputation_root/chr$chr \
            --example_mldose $imputation_root/chr$chr/rbc.ALL.1000G_p3.chr$chr.0.mach.mldose.gz \
            --geno_prefix $geno_prefix \
            --working_dirs $working_dir \
            --out $working_dir/$out_file \
            --method $method \
            --interaction_col 1 \
            --interaction_covar GenderNum \
            -resume
    done
done


In [None]:
results_dir=/share/storage/REDS-III/GWA_G_by_Sex/rbc_osmotic
file_prefix=rbc.1000G_p3
file_suffix=osmotic~gender+age+donfreq+evs+SNPgender

for (( chr=1; chr<24; chr++ )); do
    inFile=${results_dir}/${file_prefix}.chr$chr.${file_suffix}.stats
    outFile=${results_dir}/final/${file_prefix}.chr$chr.${file_suffix}.maf_gt_0.01
    echo Processing $inFile
    echo "chrom name position A1 A2 Freq1 MAF Quality Rsq n Mean_predictor_allele beta_SNP_add sebeta_SNP_add beta_SNP_Gender sebeta_SNP_Gender cov_SNP_int_SNP_Gender loglik chi_2df p_2df" > $outFile
    tail -n +1 $inFile |
        perl -slane 'if ($F[5] >= 0.01 && $F[7] >= 0.8) { print "$chr $_"; }' -- -chr="$chr" >> $outFile
done


## Calculate p-values for SNP-only (marginal SNP) and SNPxGender (joint SNP)

In [None]:
results_dir=/share/storage/REDS-III/GWA_G_by_Sex/rbc_osmotic
file_prefix=rbc.1000G_p3
file_suffix=osmotic~gender+age+donfreq+evs+SNPgender.maf_gt_0.01

for chr in {1..23}; do
    /share/storage/REDS-III/scripts/qsub_job.sh \
    --job_name calc_p_$chr \
    --script_prefix ${results_dir}/processing/calc_p_$chr \
    --mem 10 \
    --cpu 1 \
    --priority 0 \
    --program /share/storage/REDS-III/GWA_G_by_Sex/calculate_stats_for_probabel_results_v2.R \
    --in_file ${results_dir}/final/${file_prefix}.chr$chr.${file_suffix} \
    --out_file ${results_dir}/final/${file_prefix}.chr$chr.${file_suffix}.p \
    --remove_missing_p
done



## Generate plots for marginal SNP p-values and joint SNP p-values

In [None]:
### START Generate plots for p_SNP ###
results_dir=/share/storage/REDS-III/GWA_G_by_Sex/rbc_osmotic
file_prefix=rbc.1000G_p3
file_suffix=osmotic~gender+age+donfreq+evs+SNPgender.maf_gt_0.01.p

# plot p_2df
outFile=${results_dir}/processing/${file_prefix}.${file_suffix}_2df.table
echo -e "VARIANT_ID\tCHR\tPOSITION\tP\tTYPE" > $outFile
for chr in {1..23}; do
    inFile=${results_dir}/final/${file_prefix}.chr$chr.${file_suffix}
    echo Processing $inFile
    awk 'NR>1{if (($4 eq "A" || $4 eq "C" || $4 eq "G" || $4 eq "T") && ($5 eq "A" || $5 eq "C" || $5 eq "G" || $5 eq "T")) {
                type="snp"} else {type="indel"} print $2,$1,$3,$19,type}' $inFile \
        >> $outFile
done


/share/storage/REDS-III/scripts/qsub_job.sh \
    --job_name gwas_plots_snp \
    --script_prefix ${results_dir}/processing/${file_prefix}.${file_suffix}_2df.plots \
    --mem 20 \
    --cpu 1 \
    --priority 0 \
    --program /share/storage/REDS-III/common/software/R/generate_gwas_plots.v6.R \
    --in ${results_dir}/processing/${file_prefix}.${file_suffix}_2df.table \
    --in_chromosomes autosomal_nonPAR \
    --in_header \
    --out ${results_dir}/final/${file_prefix}.${file_suffix}_2df \
    --col_id VARIANT_ID \
    --col_chromosome CHR \
    --col_position POSITION \
    --col_p P \
    --col_variant_type TYPE \
    --generate_snp_indel_manhattan_plot \
    --manhattan_odd_chr_color red \
    --manhattan_even_chr_color blue \
    --manhattan_points_cex 1.5 \
    --generate_snp_indel_qq_plot \
    --qq_lines \
    --qq_points_bg black \
    --qq_lambda 



# plot p for SNP itself
outFile=${results_dir}/processing/${file_prefix}.${file_suffix}_SNP.table
echo -e "VARIANT_ID\tCHR\tPOSITION\tP_SNP\tTYPE" > $outFile
for chr in {1..23}; do
    inFile=${results_dir}/final/${file_prefix}.chr$chr.${file_suffix}
    echo Processing $inFile
    awk 'NR>1{if (($4 eq "A" || $4 eq "C" || $4 eq "G" || $4 eq "T") && ($5 eq "A" || $5 eq "C" || $5 eq "G" || $5 eq "T")) {
                type="snp"} else {type="indel"} print $2,$1,$3,$21,type}' $inFile \
        >> $outFile
done
             

/share/storage/REDS-III/scripts/qsub_job.sh \
    --job_name gwas_plots_snp \
    --script_prefix ${results_dir}/processing/${file_prefix}.${file_suffix}_SNP.plots \
    --mem 20 \
    --cpu 1 \
    --priority 0 \
    --program /share/storage/REDS-III/common/software/R/generate_gwas_plots.v6.R \
    --in ${results_dir}/processing/${file_prefix}.${file_suffix}_SNP.table \
    --in_chromosomes autosomal_nonPAR \
    --in_header \
    --out ${results_dir}/final/${file_prefix}.${file_suffix}_SNP \
    --col_id VARIANT_ID \
    --col_chromosome CHR \
    --col_position POSITION \
    --col_p P_SNP \
    --col_variant_type TYPE \
    --generate_snp_indel_manhattan_plot \
    --manhattan_odd_chr_color red \
    --manhattan_even_chr_color blue \
    --manhattan_points_cex 1.5 \
    --generate_snp_indel_qq_plot \
    --qq_lines \
    --qq_points_bg black \
    --qq_lambda 


### END Generate plots ###


### START Generate plots for p_INT ###
outFile=${results_dir}/processing/${file_prefix}.${file_suffix}_INT.table
echo -e "VARIANT_ID\tCHR\tPOSITION\tP_INT\tTYPE" > $outFile
for (( chr=1; chr<24; chr++ )); do
    inFile=${results_dir}/final/${file_prefix}.chr$chr.${file_suffix}
    echo Processing $inFile
    awk 'NR>1{if (($4 eq "A" || $4 eq "C" || $4 eq "G" || $4 eq "T") && ($5 eq "A" || $5 eq "C" || $5 eq "G" || $5 eq "T")) {
                type="snp"} else {type="indel"} print $2,$1,$3,$23,type}' $inFile \
        >> $outFile
done
             

/share/storage/REDS-III/scripts/qsub_job.sh \
    --job_name gwas_plots_snp \
    --script_prefix ${results_dir}/processing/${file_prefix}.${file_suffix}_INT.plots \
    --mem 20 \
    --cpu 1 \
    --priority 0 \
    --program /share/storage/REDS-III/common/software/R/generate_gwas_plots.v6.R \
    --in ${results_dir}/processing/${file_prefix}.${file_suffix}_INT.table \
    --in_chromosomes autosomal_nonPAR \
    --in_header \
    --out ${results_dir}/final/${file_prefix}.${file_suffix}_INT \
    --col_id VARIANT_ID \
    --col_chromosome CHR \
    --col_position POSITION \
    --col_p P_INT \
    --col_variant_type TYPE \
    --generate_snp_indel_manhattan_plot \
    --manhattan_odd_chr_color red \
    --manhattan_even_chr_color blue \
    --manhattan_points_cex 1.5 \
    --generate_snp_indel_qq_plot \
    --qq_lines \
    --qq_points_bg black \
    --qq_lambda 


### END Generate plots ###
