### __MOB-suite:__
__Software tools for clustering, reconstruction and typing of plasmids from draft assemblies__

Docs:

https://github.com/phac-nml/mob-suite

In [1]:
import polars as pl

mobtyper = "/home/npilquinao/BEM-ISP/group_storage/MOBtyper"

#### __MOB typer__

__Using MOB-typer to perform replicon and relaxase typing of complete plasmids and predict mobility__

"You can perform plasmid typing using a fasta formated file containing a single plasmid represented by one or more contigs or it can treat all of the sequences in the fasta file as independent. The default behaviour is to treat all sequences in a file as from one plasmid, so do not include multiple unrelated plasmids in the file without specifying --multi as they will be treated as a single plasmid."

__Tool options__

In [12]:
!mob_typer --help

  from scipy.stats import chi2
usage: mob_typer [-h] -i INFILE -o OUT_FILE
                 [--biomarker_report_file BIOMARKER_REPORT_FILE]
                 [-g MGE_REPORT_FILE] [-a ANALYSIS_DIR] [-n NUM_THREADS]
                 [-s SAMPLE_ID] [-x] [--min_rep_evalue MIN_REP_EVALUE]
                 [--min_mob_evalue MIN_MOB_EVALUE]
                 [--min_con_evalue MIN_CON_EVALUE] [--min_length MIN_LENGTH]
                 [--min_rep_ident MIN_REP_IDENT]
                 [--min_mob_ident MIN_MOB_IDENT]
                 [--min_con_ident MIN_CON_IDENT]
                 [--min_rpp_ident MIN_RPP_IDENT] [--min_rep_cov MIN_REP_COV]
                 [--min_mob_cov MIN_MOB_COV] [--min_con_cov MIN_CON_COV]
                 [--min_rpp_cov MIN_RPP_COV] [--min_rpp_evalue MIN_RPP_EVALUE]
                 [--min_overlap MIN_OVERLAP] [-k] [--debug]
                 [--plasmid_mash_db PLASMID_MASH_DB] [-m PLASMID_META]
                 [--plasmid_db_type PLASMID_DB_TYPE]
                 [--plasmid_

__Test__

In [29]:
%%bash

seq=/home/npilquinao/BEM-ISP/group_storage/hybracter/assembly_output/FINAL_OUTPUT/complete
out=/home/npilquinao/BEM-ISP/group_storage/MOBtyper/

mob_typer \
    --infile $seq/VA692_plasmid.fasta \
    --sample_id "VA692" \
    --multi \
    --num_threads 65 \
    --out_file $out/VA692_mobtyper_TEST.tsv

  from scipy.stats import chi2
2024-08-14 04:51:49,449 mob_suite.mob_typer INFO: Running Mob-typer version 3.1.9 [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:172]
2024-08-14 04:51:49,449 mob_suite.mob_typer INFO: Processing fasta file /home/npilquinao/BEM-ISP/group_storage/hybracter/assembly_output/FINAL_OUTPUT/complete/VA692_plasmid.fasta [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:174]
2024-08-14 04:51:49,451 mob_suite.mob_typer INFO: Creating analysis directory /tmp/tmpkxamaft1 [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:187]
2024-08-14 04:51:49,452 mob_suite.mob_typer INFO: SUCCESS: Found program blastn at /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/bin/blastn [in /srv/storage/oceania

__All samples__

In [13]:
%%bash

seq=/home/npilquinao/BEM-ISP/group_storage/hybracter/assembly_output/FINAL_OUTPUT/complete
out=/home/npilquinao/BEM-ISP/group_storage/MOBtyper/

for sample in $seq/*_plasmid.fasta; do

    sample_name=$(basename "$sample" _plasmid.fasta)

    mob_typer \
        --infile $sample \
        --sample_id $sample_name \
        --multi \
        --num_threads 65 \
        --out_file "$out/${sample_name}_mobtyper.tsv"

done

  from scipy.stats import chi2
2024-08-14 04:35:00,076 mob_suite.mob_typer INFO: Running Mob-typer version 3.1.9 [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:172]
2024-08-14 04:35:00,076 mob_suite.mob_typer INFO: Processing fasta file /home/npilquinao/BEM-ISP/group_storage/hybracter/assembly_output/FINAL_OUTPUT/complete/VA1046_plasmid.fasta [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:174]
2024-08-14 04:35:00,076 mob_suite.mob_typer INFO: Creating analysis directory /tmp/tmp8me69qp5 [in /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/lib/python3.11/site-packages/mob_suite/mob_typer.py:187]
2024-08-14 04:35:00,077 mob_suite.mob_typer INFO: SUCCESS: Found program blastn at /srv/storage/oceania@storage2.rennes.grid5000.fr/npilquinao/BEM/conda/BEM_ISP/bin/blastn [in /srv/storage/oceani

__View results__

In [32]:
import polars as pl

sample = "VA61"

T = pl.read_csv(f"{mobtyper}/{sample}_mobtyper.tsv", separator="\t")
#T = pl.read_csv(mobtyper + "/VA692_mobtyper_TEST.tsv", separator="\t")

display(T.head())

print(f"N° plasmids: {T.shape[0]}\n")

for sample_id in T["sample_id"].unique():
    print(sample_id)

sample_id,num_contigs,size,gc,md5,rep_type(s),rep_type_accession(s),relaxase_type(s),relaxase_type_accession(s),mpf_type,mpf_type_accession(s),orit_type(s),orit_accession(s),predicted_mobility,mash_nearest_neighbor,mash_neighbor_distance,mash_neighbor_identification,primary_cluster_id,secondary_cluster_id,predicted_host_range_overall_rank,predicted_host_range_overall_name,observed_host_range_ncbi_rank,observed_host_range_ncbi_name,reported_host_range_lit_rank,reported_host_range_lit_name,associated_pmid(s)
str,i64,i64,f64,str,str,str,str,str,str,str,str,str,str,str,f64,str,str,str,str,str,str,str,str,str,str
"""plasmid00001 length=270547 pla…",1,270547,0.464367,"""8be2b053d194329cdd385f268ae71b…","""IncHI2A,rep_cluster_1088""","""BX664015,000340__KU353730""","""MOBH""","""NC_012555_00231""","""MPF_F""","""NC_005211_00005,NC_009838_0000…","""MOBH""","""NC_017500""","""conjugative""","""CP041734""",0.003578,"""Enterobacter hormaechei subsp.…","""AA739""","""AJ058""","""order""","""Enterobacterales""","""order""","""Enterobacterales""","""order""","""Enterobacterales""","""20851899"""
"""plasmid00002 length=45073 plas…",1,45073,0.46895,"""d4f92e4fa686e778b9540d022ef372…","""IncX3""","""000221__NC_025130_00012""","""MOBP""","""NC_020552_00059""","""MPF_T""","""NC_017624_00009,NC_019083_0003…","""-""","""-""","""conjugative""","""MH286949""",0.000144,"""Escherichia coli""","""AA038""","""AH615""","""order""","""Enterobacterales""","""order""","""Enterobacterales""","""family""","""Enterobacteriaceae""","""26842776; 22470007"""
"""plasmid00003 length=4853 plasm…",1,4853,0.524624,"""fd7c9f346d0e51fe63910af63c63cf…","""rep_cluster_2335""","""KU302809_00001""","""MOBP,MOBP""","""NC_011404_00002,NC_001848_0000…","""-""","""-""","""MOBP""","""KJ009324""","""mobilizable""","""CP042491""",0.000168,"""Enterobacter hormaechei""","""AA517""","""AI685""","""multi-phylla""","""Actinomycetota,Bacillota,Pseud…","""multi-phylla""","""Actinomycetota,Bacillota,Pseud…","""order""","""Enterobacterales""","""20008783"""
"""plasmid00004 length=2494 plasm…",1,2494,0.515237,"""a4021558222d39e46ff088cf2f1c7b…","""rep_cluster_2335""","""KU302809_00001""","""-""","""-""","""-""","""-""","""MOB_unknown""","""NC_017722""","""mobilizable""","""CP042492""",0.000781,"""Enterobacter hormaechei""","""AB040""","""AJ580""","""multi-phylla""","""Actinomycetota,Bacillota,Pseud…","""multi-phylla""","""Actinomycetota,Bacillota,Pseud…","""order""","""Enterobacterales""","""20008783"""


N° plasmids: 4

plasmid00003 length=4853 plasmid_copy_number_short=4.23x plasmid_copy_number_long=0.02x circular=true
plasmid00002 length=45073 plasmid_copy_number_short=0.87x plasmid_copy_number_long=2.03x circular=true
plasmid00001 length=270547 plasmid_copy_number_short=0.74x plasmid_copy_number_long=0.38x circular=true
plasmid00004 length=2494 plasmid_copy_number_short=11.85x plasmid_copy_number_long=0.0x circular=true
