### TEM Beta Lactamase Analysis 001

This is the general work file which will contain all the analysis of the beta lactamase.

In [1]:
%reload_ext autoreload
%autoreload 2
import os
import sys
import py4cytoscape as p4c
from pyeed.core import ProteinInfo, Alignment
from pyeed.aligners import PairwiseAligner
from pyeed.network import SequenceNetwork
import networkx as nx


##### starting point for analysis

The starting point for the beta lactamase will be a TEM Protein. Possibly TEM 01 but this is not certain.

In [2]:
starting_protein_tem = ProteinInfo.get_id("QGC48744.1")

starting_protein_tem_1_1 = ProteinInfo.get_id("QGC48744.1")
starting_protein_tem_1_2 = ProteinInfo.get_id("QDG00224.1")
starting_protein_tem_33 = ProteinInfo.get_id("QWY17510.1")
starting_protein_tem_109 = ProteinInfo.get_id("AAT46413.1")

current_path = os.path.dirname(os.getcwd())

##### simple search & save & aligment

Here we search with blast in NCBI database and create the aligments.

In [3]:
n_hits = 200
e_value = 0.05

In [4]:
# blast_results_starting_protein_1 = starting_protein_tem.ncbi_blastp(n_hits=n_hits, e_value=e_value)
blast_results_starting_protein_1_1 = starting_protein_tem_1_1.ncbi_blastp(n_hits=n_hits, e_value=e_value)
blast_results_starting_protein_1_2 = starting_protein_tem_1_2.ncbi_blastp(n_hits=n_hits, e_value=e_value)
blast_results_starting_protein_33 = starting_protein_tem_33.ncbi_blastp(n_hits=n_hits, e_value=e_value)
blast_results_starting_protein_109 = starting_protein_tem_109.ncbi_blastp(n_hits=n_hits, e_value=e_value)

blast_results_starting_protein_1_1.append(blast_results_starting_protein_1_2)
blast_results_starting_protein_1_1.append(blast_results_starting_protein_33)
blast_results_starting_protein_1_1.append(blast_results_starting_protein_109)

blast_results_starting_protein = blast_results_starting_protein_1_1

🏃🏼‍♀️ Running PBLAST
╭── protein name: TEM family beta-lactamase
├── accession: QGC48744.1
├── organism: Escherichia coli
├── e-value: 0.05
╰── max hits: 200
⬇️ Fetching 200 protein entries from NCBI...


Attempt 1 of 3 failed: Error fetching data from NCBI: IncompleteRead(0 bytes read)


Attempt 1 of 3 failed: Error fetching data from NCBI: IncompleteRead(0 bytes read)


KeyboardInterrupt: 

In [None]:
# save the blast search results
output_folder_blast_search = os.path.join(current_path, "TEM-lactamase", "data", "data_blast_search_big_{}_{}_{}".format(starting_protein_tem.source_id.replace('.', ''), n_hits, int(e_value*1000)))
os.makedirs(output_folder_blast_search, exist_ok=True)
for hit in blast_results_starting_protein:
    with open(output_folder_blast_search + "/{}.json".format(hit.id), "w") as f:
        f.write(hit.json())



##### network building & visualize

In [7]:
%reload_ext autoreload
%autoreload 2
# read in the saved fetched results
n_hits = 30
e_value = 0.05
name = starting_protein_tem.source_id.replace('.', '')
read_blast_folder = os.path.join(current_path, "TEM-lactamase", "data", "data_blast_search_{}_{}_{}".format(name, n_hits, int(e_value*1000)))
protein_list = []
for file in os.listdir(read_blast_folder):
    with open(os.path.join(read_blast_folder, file), "r") as f:
        protein_list.append(ProteinInfo.from_json(f))


In [8]:
# create pairwise aligments
mode = "global"
alignment_blast_search = Alignment.from_sequences(protein_list, aligner=PairwiseAligner, mode=mode)
# save the pairwise alignments
output_folder_alignments = os.path.join(current_path, "TEM-lactamase", "data", "data_alignments_pairwise_{}_{}_{}_{}".format(mode, starting_protein_tem.source_id.replace('.', ''), n_hits, int(e_value*1000)))
os.makedirs(output_folder_alignments, exist_ok=True)
for alignment in alignment_blast_search:
    with open(output_folder_alignments + "/{}.json".format(alignment.id), "w") as f:
        f.write(alignment.json())

⛓️ Running pairwise alignments:   0%|          | 0/465 [00:00<?, ?it/s]

⛓️ Running pairwise alignments: 100%|██████████| 465/465 [00:03<00:00, 126.23it/s]


In [9]:
print(alignment_blast_search[100].json())

{
  "id": "pairwisealignment100",
  "method": "global",
  "input_sequences": [
    {
      "id": "sequence3",
      "source_id": "ANG22502.1",
      "sequence": "MGIQRSRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW"
    },
    {
      "id": "sequence17",
      "source_id": "ANG17639.1",
      "sequence": "MSAQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW"
    }
  ],
  "aligned_sequences": [
    {
      "id": "sequence231",
      "source_id": "ANG22502.1",
      "sequence": "MGIQRS----RVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIE

In [7]:
p4c.cytoscape_ping()
p4c.cytoscape_version_info()

You are connected to Cytoscape!


{'apiVersion': 'v1',
 'cytoscapeVersion': '3.10.2',
 'automationAPIVersion': '1.9.0',
 'py4cytoscapeVersion': '1.9.0'}