In [None]:
#######################################
Function: 
Unzip and copy the New Zealand genome gunzipped file to use as a reference genome.
#######################################

In [None]:
gunzip -c /powerplant/workspace/1/genome_analysis/fish/Pseudocaranx/genome/caranx_ignoblis.v1.dna.superscaffold.fa.gz > /powerplant/workspace/hramzr/blasting/nz_trevally.fasta

In [None]:
#######################################
Function: 
1. Load NCBI blast module v2.2.25. 
2. Make the 'blastresults' folder to store output in.
3. Make a 'blastlogs' folder that will store output and error logs.
#######################################

In [None]:
module load ncbi-blast/2.2.25
mkdir blastresults
mkdir blastlogs

In [None]:
#######################################
Function: 
1. Declare the REFERENCE genome database directory.
2. Declare the query directory consisting of candidate sex genes to run 
against the reference genome.
3. Declare the ouput directory for the blastresults.
4. Declare the log directory for the output and error logs. 
#######################################

In [None]:
db1=/powerplant/workspace/hramzr/blasting/nz_trevally.fasta
query1=/powerplant/workspace/hramzr/candidate_genes_nuc2.fasta
outdir=/powerplant/workspace/hramzr/blasting/blastresults/
logdir=/powerplant/workspace/hramzr/blasting/blastlogs/

In [None]:
#######################################
Input: 
Reference sequence

Function: 
Make a blast database for blast to use as input.

Output:
Output database
#######################################

In [None]:
makeblastdb -in ${db1} -dbtype nucl -out nz_trevally

In [None]:
#######################################
Input: 
1. query1, these are the candidate genes.
2. nz_trevally, this is the database with the reference genome to map against.

Function: 
Blast the candidate genes against the reference genome to see where the candidate genes map on the genome.

Output:
Mapping file.
#######################################

In [None]:
# Defining the location where log files will go.
LOG=${logdir}

# Defing the JOB name for the cluster as 'blasting'.
JOB=blasting

# Defining how many CPUs are required for the process, 
# which is 1 here as blast doesn't need much.
CPU=1

# Defining the queue variable as normal, as the process needs regular resources.
QUEUE=normal

# Defining the BASH blastn command, with word size set to 11, $(query1) as query,
# the 'nz_trevally_ database as the database the query runs against, 
# max_target_seqs set to 5, and the out put directory being the ${outdir} and 
# the output format being format 6.
COMMAND="blastn -word_size 11 -query $query1 \
        -db nz_trevally \
        -max_target_seqs 5 \
        -out ${outdir}nz_trevallyfmt6.out2 \
        -outfmt 6"
        
# The command being printed to see whether there are no oddities.        
echo ${COMMAND}

#Submitting all of the variables to the cluster.
bsub \

# -J being set to the job variable.
-J ${JOB} \

# -n being set to the CPU variable.
-n ${CPU} \

# -o being given the logdirectory and jobnames for outputlogs.
-o ${LOG}/${JOB}_%J.out \

# -e being given the logdirectory and jobnames for errorlogs.
-e ${LOG}/${JOB}_%J.err \

# The command being run.
${COMMAND}

In [None]:
#######################################
Input: 
Reference BAM file.

Function: 
Blast the candidate genes against the reference genome to see where the candidate genes map on the genome.

Output:
Indexed reference BAM file.
#######################################

In [None]:
# Load samtools 1.9 module.
module load samtools/1.9
# Index the new zealand trevally BAM file.
samtools index ${outdir}nz_trevally.bam