In [1]:
from pyEED.core import ProteinInfo
from pyEED.alignment.pairwise import multi_pairwise_alignment
from pyEED.network import pairwise_network
from pyEED.ncbi.utils import load_accessions

# Visualize networks of pairwise sequence alignments

In the following example, a protein BLAST search is conducted based on TEM-1 and TEM-109 variant of beta-lactamase. After pairwise alignment of all sequences, a network is constructed based on the alignment scores and then visualized.

## PBLAST search for seed sequences

Results from the BLAST search are renamed based to the name of query protein variant.

In [2]:
tem1 = ProteinInfo.from_ncbi("QGC48744.1")
tem109 = ProteinInfo.from_ncbi("AAT46413.1")

blast_results = []
for tem in [tem1, tem109]:
    sequences = tem.pblast(
        e_value=0.05, n_hits=15, api_key="161e6eb71dcc94511d2d0e2fc5336c1af709"
    )

    for sequence in sequences:
        sequence.name = tem.name

    blast_results.extend(sequences)
    blast_results.append(tem)

🏃🏼‍♀️ Running PBLAST
╭── protein name: TEM family beta-lactamase
├── accession: QGC48744.1
├── organism: Escherichia coli
├── e-value: 0.05
╰── max hits: 15


⬇️ Fetching protein sequences: 100%|██████████| 15/15 [00:11<00:00,  1.30it/s]


🎉 Done

🏃🏼‍♀️ Running PBLAST
╭── protein name: beta-lactamase TEM-109
├── accession: AAT46413.1
├── organism: Escherichia coli
├── e-value: 0.05
╰── max hits: 15


⬇️ Fetching protein sequences: 100%|██████████| 15/15 [00:13<00:00,  1.13it/s]

🎉 Done






## Pairwise alignment of unique sequence pairs

In [3]:
alignments = multi_pairwise_alignment(
    blast_results,
    mode="global",
    match=1,
    mismatch=-1,
    gap_open=-1,
    gap_extend=0,
    n_jobs=8,
)

⛓️ Aligning sequences: 100%|██████████| 496/496 [00:01<00:00, 342.87it/s]


## Visualize the alignment network

In [4]:
pairwise_network(
    alignments=alignments,
    weight="identity",
    color="name",
    label="organism",
    cutoff=0.996,
)

{'TEM family beta-lactamase': 'blue', 'beta-lactamase TEM-109': 'red'}
