# Script for building the conda environment for the Tractor - Mix model.

Author: Jose Jaime Martinez-Magana

Day: 25 February 2023

This script was developed to build the Tractor - Mix model for in the Yale HPC - Grace cluster.

In [None]:
# if your cluster uses slurm as job handler follow the next command for requesting resources
# request resources in slurm based HPC
srun --pty --mem=8G -p interactive bash

# we recommend creating a directory for analysis, heere we created a directory called tractor_mix, in the following path
mkdir /home/jjm262/palmer_scratch/genomics/yalepenn/tractor_mix
# move to the created directory
cd /home/jjm262/palmer_scratch/genomics/yalepenn/tractor_mix
# inside that directory we recommend creating the following directories
mkdir databases  phenofile  programs  sample_lists  scripts

# load conda if your server uses module as package handler follow the next script
module load miniconda

# create a conda environment for all the requiered scripts
# create conda environment
conda create --name tractor_mix
# activate tractor_mix environment
conda activate tractor_mix

# install htslib
conda install -c bioconda htslib
# install shapeit4
conda install -c bioconda shapeit4
# install shapeit, if you uncomment the next line this will install shapeit version 2
conda install -c dranew shapeit
# install rfmix
conda install -c bioconda rfmix
# install GENESIS
conda install -c bioconda bioconductor-genesis
# install admixture
conda install -c bioconda admixture
# install optparse
# library installed in 29 March 2023
conda install -c bioconda r-optparse


# install GMMAT after this step
# run the RScript
# create a directory inside scripts named environment/
mkdir /home/jjm262/palmer_scratch/genomics/yalepenn/tractor_mix/scripts/environment/
# inside that create a file named in install_packages.Rscript
Rscript /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/environment/install_packages.Rscript
# follow the content of install_packages.Rscript
#!/usr/bin/env Rscript
# install required R packages, for tractor_mix
required_packages=c("GMMAT")
install.packages("GMMAT", repos="https://cloud.r-project.org")

# build dependencies for tractor
conda install -c anaconda pandas
conda install -c anaconda numpy #already installed

# inside the environment directory create another called extdata/
mkdir extdata/
cd extdata/
# download tractor scripts
git clone https://github.com/Atkinson-Lab/Tractor.git

# Download plink version 2, to a path of your preference
wget -O plink_linux_x86_64_20230116.zip https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20230116.zip
# unzip plink
unzip plink_linux_x86_64_20230116.zip


# installing bcftools
# this step is not neccesary if you have installed a functional bcftools in your server
# we are getting an error when installing bcfttols from conda
# this steps work for us, to have a functional bcftools installed in the environment
mkdir bcftools
cd bcftools
git clone --recurse-submodules https://github.com/samtools/htslib.git
git clone https://github.com/samtools/bcftools.git
cd bcftools
autoheader && autoconf && ./configure --enable-libgsl --enable-perl-filters
make
# a functional bcftools can be found in the path where you download the bcftools github
# also a functional htslib is going to be downloaded is up to you if you use this or other alsready installed in your system
# /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/environment/extdata/bcftools/bcftools/bcftools
# the prevous path changes based on where dou you decide to install your bcftools 
# cheat sheet of bcftools, https://gist.github.com/elowy01/93922762e131d7abd3c7e8e166a74a0b

# move to the directory databases/ create a directory named shapeit/
# download genetic maps for genome build hg19
mkdir shapeit
cd shapeit
# inside shapeit create two directories named genetic_maps  haplotypes
mkdir genetic_maps  haplotypes
# move to genetic maps and download the genetic maps
cd genetic_maps
wget -0 genetic_map_HapMapII_GRCh37.tar.gz https://ftp.ncbi.nlm.nih.gov/hapmap/recombination/2011-01_phaseII_B37/genetic_map_HapMapII_GRCh37.tar.gz
tar -xvf genetic_map_HapMapII_GRCh37.tar.gz

# download 1000 Genomes haplotypes
# return to shapeit directory
cd /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/databases/shapeit
# move to haplotypes and download haplotypes
cd haplotypes
wget -O 1000GP_Phase3.tgz https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.tgz
tar -xvf 1000GP_Phase3.tgz

# move to environment directory
cd /home/jjm262/palmer_scratch/genomics/yalepenn/tractor_mix/scripts/environment/
# create yml file fro environment
conda env export > tractor_mix.yml