# Network Visualization

In [1]:
%reload_ext autoreload
%autoreload 2
from pyeed.core import ProteinInfo, Alignment
from pyeed.aligners import PairwiseAligner
from pyeed.network import SequenceNetwork

In [2]:
metTK = ProteinInfo.get_id("UCS38941.1")
metTK

ProteinInfo(id='proteininfo0', source_id='UCS38941.1', name='TEM1', sequence='MSASEMRAASERVGEERNSLPSVRNQVDIQVGLIGDAQVGKTSLMVKYVQNIFDEEYTQTLGVNFLKRKVSIRSTDIVFSLMDLGGQREFINMLPIATLGSSVIILLFDLTRPETLNSIKEWYRQALGLNDSAIPILVGTKYDLFIDLEEEYQEKVSKTSMKYAQVMDAPLIFCSTAKSINVQKIFKVALAKIFDLTLTIPEINEIGDPLLIYKELGSKKNKSKNSSKPRRRSPVDNENKELVSQPLNYGHTSE', organism=Organism(id='organism1', name='Nakaseomyces glabratus', taxonomy_id='5478', domain='Eukaryota', kingdom='Fungi', phylum='Ascomycota', tax_class='Saccharomycetes', order='Saccharomycetales', family='Saccharomycetaceae', genus='Nakaseomyces', species='Nakaseomyces glabratus'), citation=Citation(id='citation0', doi=None, pubmed_id=None, medline_id=None, year=None, authors=[]), family_name=None, regions=[ProteinRegion(id='proteinregion0', name='Spg1', spans=[Span(id='span0', start=28, end=210)], note='Septum-promoting GTPase (Spg1); cd04128', cross_reference='CDD:206701', type=None)], sites=[Site(id='site0', name='other', type='unannotated', positions=

In [3]:
# Run local blastp search
blast_results = metTK.blastp(
    db_path="/Users/max/Documents/GitHub/blast-db/data/source",
    n_hits=200,
)

🏃 Running BLAST
⬇️ Fetching 200 protein entries from NCBI...
⬇️ Fetching 194 taxonomy entries from NCBI...


## Multi Pairwise Alignment

In [4]:
# Create and run alignment
multi_parwise_alignments = Alignment.from_sequences(
    blast_results, aligner=PairwiseAligner
)

⛓️ Running pairwise alignments: 100%|██████████| 20100/20100 [00:03<00:00, 5759.82it/s]


In [14]:
n = SequenceNetwork(
    sequences=blast_results,
    pairwise_alignments=multi_parwise_alignments,
    threshold=0.76,
    weight="identity",
    dimensions=3,
    color="species",
)
n.add_target(metTK)
n.visualize()

## Clustering

In [35]:
%reload_ext autoreload
%autoreload 2
from pyeed.containers.mmseqs2 import MMSeqs2

In [36]:
c = MMSeqs2().run_container()

TypeError: descriptor '_tempdir_path' for 'AbstractContainer' objects doesn't apply to a 'MMSeqs2' object

In [None]:
n = SequenceNetwork(
    sequences=blast_results,
    pairwise_alignments=multi_parwise_alignments,
    threshold=0.9,
    weight="identity",
    dimensions=3,
    color="taxonomy_id",
)
n.add_target(metTK)
n.visualize()

In [None]:
from rich import print
from rich.progress import Progress

In [None]:
import numpy as np
import time


li = np.arange(9).reshape(3, 3)

In [None]:
li.size

9

In [None]:
print("⬇️ Downloading...")

with Progress(transient=True) as progress:
    task = progress.add_task("Working", total=li.size)
    for subset in li:
        for l in subset:
            progress.update(task, advance=1)
            time.sleep(0.5)

with Progress(transient=True) as progress:
    task = progress.add_task("Working", total=li.size)
    for subset in li:
        for l in subset:
            progress.update(task, advance=1)
            time.sleep(0.5)

with Progress(transient=True) as progress:
    task = progress.add_task("Working", total=li.size)
    for subset in li:
        for l in subset:
            progress.update(task, advance=1)
            time.sleep(0.5)

print("🎉 Done!")

Output()

Output()

Output()

KeyboardInterrupt: 

In [None]:
from pyeed.core import ProteinInfo

protein = ProteinInfo(sequence="MTEITAAMVKELREDKAVQLLREKGLGK")

In [None]:
protein = ProteinInfo.get_id("UCS38941.1")
protein.to_fasta("protein.fasta")