In [3]:
from pyEED.core import ProteinInfo
from pyEED.alignment.pairwise import multi_pairwise_alignment
from pyEED.network import pairwise_network
from pyEED.ncbi.utils import load_accessions

# Visualize networks of pairwise sequence alignments

In the following example, a protein BLAST search is conducted based on TEM-1 and TEM-109 variant of beta-lactamase. After pairwise alignment of all sequences, a network is constructed based on the alignment scores and then visualized.

## PBLAST search for seed sequences

Results from the BLAST search are renamed based to the name of query protein variant.

In [4]:
tem1 = ProteinInfo.from_ncbi("QGC48744.1")
tem109 = ProteinInfo.from_ncbi("AAT46413.1")

blast_results = []
for tem in [tem1, tem109]:
    sequences = tem.pblast(
        e_value=0.05, n_hits=50, api_key="161e6eb71dcc94511d2d0e2fc5336c1af709"
    )

    for sequence in sequences:
        sequence.name = tem.name

    blast_results.extend(sequences)
    blast_results.append(tem)

🏃🏼‍♀️ Running PBLAST
╭── protein name: TEM family beta-lactamase
├── accession: QGC48744.1
├── organism: Escherichia coli
├── e-value: 0.05
╰── max hits: 50


⬇️ Fetching protein sequences: 100%|██████████| 50/50 [02:21<00:00,  2.83s/it]

🎉 Done






## Pairwise alignment of unique sequence pairs

In [9]:
alignments = multi_pairwise_alignment(
    blast_results,
    mode="global",
    match=1,
    mismatch=-1,
    gap_open=-1,
    gap_extend=0,
    n_jobs=8,
)

⛓️ Aligning sequences:   0%|          | 0/1275 [00:00<?, ?it/s]

⛓️ Aligning sequences: 100%|██████████| 1275/1275 [00:00<00:00, 5052.69it/s]


## Visualize the alignment network

In [8]:
pairwise_network(
    alignments=alignments,
    weight="identity",
    color="name",
    label="organism",
)

{'TEM family beta-lactamase': 'blue'}
