In [None]:
import iseq_prof_analysis as analysis
import itertools
import plotly.express as px
from fasta_reader import read_fasta
from pathlib import Path
from sam_io import read_sam
import iseq_prof
import gff_io
from tqdm.notebook import tqdm
import hmmer
from dna_features_viewer import GraphicFeature, GraphicRecord

In [None]:
analysis.load_config()
EVALUE_THRSHOLD = 1e-10

# Depth 49, Hybrid consensus

In [None]:
root = analysis.config.chlamydia.root_dir
hybrid_consensus = analysis.config.chlamydia.hybrid_consensus
output_dir = root / "output0.01"
hybrid = list(read_fasta(root / hybrid_consensus))
print(f"# {hybrid_consensus}")
print(f"Number of targets: {len(hybrid)}")
print(f"Target 2: >{hybrid[1].defline}")

## Prokka+HMMER3

In [None]:
assembly_gffs = {}

for assembly_gff in gff_io.read_gff(output_dir / "prokka" / "assembly.gff"):
    ID = assembly_gff.attributes_asdict()["ID"]
    assembly_gffs[ID] = assembly_gff

features = []
colors = itertools.cycle(px.colors.qualitative.Plotly)
profile_colors = {}
for domtbl_row in hmmer.read_domtbl(output_dir / "assembly" / "domtblout.txt"):
    assembly_gff = assembly_gffs[domtbl_row.query.name]
    offset = int(assembly_gff.start) - 1

    alifrom = (domtbl_row.ali_coord.start - 1) * 3
    alito = domtbl_row.ali_coord.stop * 3

    # [start, end)
    start = offset + alifrom
    end = offset + alito
    
    profile_name = domtbl_row.target.name
    if profile_name not in profile_colors:
        profile_colors[profile_name] = next(colors)

    strand = int(assembly_gff.strand + "1")
    feature = GraphicFeature(start=start, end=end, strand=strand,
                             color=profile_colors[profile_name],
                             label=profile_name)
    features.append(feature)

record = GraphicRecord(sequence_length=len(hybrid[1].sequence), features=features)
# ax = record.plot(figure_width=20)[0]
# ax.figure.savefig('prokka_on_hybrid_depth49_consensus.png', bbox_inches='tight')
record.plot(figure_width=20);

## iSeq (epsilon=0.1)

In [None]:
output_dir = root / "output0.1"
features = []
for item in gff_io.read_gff(output_dir / "assembly" / "output.gff"):

    atts = item.attributes_asdict()
    if float(atts["E-value"]) > EVALUE_THRSHOLD:
        continue
        
    profile_name = atts["Profile_name"]
    if profile_name not in profile_colors:
        profile_colors[profile_name] = next(colors)

    start = int(item.start)
    end = int(item.end)
    strand = int(item.strand + "1")
    feature = GraphicFeature(start=start, end=end, strand=strand,
                             color=profile_colors[profile_name],
                             label=profile_name)
    features.append(feature)

record = GraphicRecord(sequence_length=len(hybrid[1].sequence), features=features)
# ax = record.plot(figure_width=20)[0]
# ax.figure.savefig('iseq_on_hybrid_depth49_consensus.png', bbox_inches='tight')
record.plot(figure_width=20);

In [None]:
## iSeq (epsilon=0.01)

In [None]:
output_dir = root / "output0.01"
features = []
for item in gff_io.read_gff(output_dir / "assembly" / "output.gff"):

    atts = item.attributes_asdict()
    if float(atts["E-value"]) > EVALUE_THRSHOLD:
        continue
        
    profile_name = atts["Profile_name"]
    if profile_name not in profile_colors:
        profile_colors[profile_name] = next(colors)

    start = int(item.start)
    end = int(item.end)
    strand = int(item.strand + "1")
    feature = GraphicFeature(start=start, end=end, strand=strand,
                             color=profile_colors[profile_name],
                             label=profile_name)
    features.append(feature)

record = GraphicRecord(sequence_length=len(hybrid[1].sequence), features=features)
# ax = record.plot(figure_width=20)[0]
# ax.figure.savefig('iseq_on_hybrid_depth49_consensus.png', bbox_inches='tight')
record.plot(figure_width=20);

In [None]:
## iSeq (epsilon=0.001)

In [None]:
output_dir = root / "output0.001"
features = []
for item in gff_io.read_gff(output_dir / "assembly" / "output.gff"):

    atts = item.attributes_asdict()
    if float(atts["E-value"]) > EVALUE_THRSHOLD:
        continue
        
    profile_name = atts["Profile_name"]
    if profile_name not in profile_colors:
        profile_colors[profile_name] = next(colors)

    start = int(item.start)
    end = int(item.end)
    strand = int(item.strand + "1")
    feature = GraphicFeature(start=start, end=end, strand=strand,
                             color=profile_colors[profile_name],
                             label=profile_name)
    features.append(feature)

record = GraphicRecord(sequence_length=len(hybrid[1].sequence), features=features)
# ax = record.plot(figure_width=20)[0]
# ax.figure.savefig('iseq_on_hybrid_depth49_consensus.png', bbox_inches='tight')
record.plot(figure_width=20);