In [None]:
# install working environments

# Conda envrionment setup
conda install -c conda-forge conda
# for bioconda
conda install -c bioconda-utils

# mamba environment setup
conda install mamba -n base -c conda-forge
# or
conda install -c conda-forge mamba

In [None]:
# install prokka
conda install -c conda-forge -c bioconda -c defaults prokka
# activate prokka_env
conda activate prokka_env

In [None]:
# install plasmid finder with its database

# go to the directory to store plasmidfinder database
cd /mnt/DATAPOOL/mmibstudentnew/

# clone database repository
git clone https://bitbucket.org/genomicepidemiology/plasmidfinder.git
cd plasmidfinder_db
PLASMID_DB=$(pwd)
# install plasmidfinder database with executable kma_index program
python3 INSTALL.py kma_index

# install dependencies
mamba install cgecore
mamba install tabulate

# install kma version 2.8.1
git clone https://bitbucket.org/genomicepidemiology/kma
mamba install kma
# install blast latest version
ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
mamba install blast

In [None]:
# install panaroo
mamba install -c conda-forge -c bioconda -c defaults panaroo

# add cd-hit and MAFFT for building multiple sequence alignments
mamba install cd-hit
mamba install mafft

# install required dependencies
mamba install biopython numpy networkx gffutils edlib joblib tdqm cd-hit
# install optional dependencies
mamba install prank mafft clustal mash

In [None]:
# install ABRicate
mamba install abricate
abricate --check
abricate --list
# download latest ABRicate database version
abricate-get_db --db ncbi --force

In [None]:
# install rgi with its database
mamba install rgi

# download latest AMR reference data from CARD
wget https://card.mcmaster.ca/latest/data
# unarchive downloaded file
tar -xvf data ./card.jsontar -xvf data ./card.json

# load AMR data in working directory > "/mnt/DATAPOOL/mmibstudentnew/dummy/"
rgi load --card_json /mnt/DATAPOOL/mmibstudentnew/dummy/card.json --local
# load AMR data system wide
rgi load --card_json /mnt/DATAPOOL/mmibstudentnew/dummy/card.json

# check database version
# check local
rgi database --version --local
# check system wide
rgi database --version

# clean previous versions
# clean rgi local
rgi clean --local
# clean system wide
rgi clean

# download wildcard data
wget -O wildcard_data.tar.bz2 https://card.mcmaster.ca/latest/variants
mkdir -p wildcard
tar -xjf wildcard_data.tar.bz2 -C wildcard
gunzip wildcard/*.gz

# create annotation files using CARD data
rgi card_annotation -i /mnt/DATAPOOL/mmibstudentnew/dummy/card.json > card_annotation.log 2>&1
# create annotation files using WILDCARD data
rgi wildcard_annotation -i wildcard --card_json /mnt/DATAPOOL/mmibstudentnew/dummy/card.json > wildcard_annotation.log 2>&1

#load data into RGI
rgi load --card_json/mnt/DATAPOOL/mmibstudentnew/dummy/card.json \ 
--debug --local \ 
--card_annotation card_database_v3.2.6.fasta \ 
--card_annotation_all_models card_database_v3.2.6_all.fasta \ 
--wildcard_annotation wildcard_database.fasta \ 
--wildcard_annotation_all_models wildcard_database_all.fasta \ 
--wildcard_index /path/to/wildcard/index-for-model-sequences.txt \ 
--wildcard \ 
--amr_kmers /path/to/wildcard/all_amr_61mers.txt \ 
--kmer_database /path/to/wildcard/61_kmer_db.json \ 
--kmer_size 61

In [None]:
# run plasmidfinder on 261 ctx-M-15 dummy plasmids
for file in *;do 
mkdir /mnt/DATAPOOL/mmibstudentnew/dummy/output_files/test/$file/; 
plasmidfinder.py -i $file 
-o /mnt/DATAPOOL/mmibstudentnew/dummy/output_files/test/$file/ 
-t 0.60 -x -p 
/mnt/DATAPOOL/mmibstudentnew/plasmidfinder/plasmidfinder_db/;done

In [None]:
# run prokka on 261 ctx-M-15 dummy plasmids
for file in *.fasta; do prokka --kingdom Bacteria 
--outdir $file$file --prefix $file $file --force;done

In [None]:
# grep all gff files
for subdir in ;do cp $subdir/.gff $subdir.gff; done;