Importing all required modules from Parsing to Visualizing.

In [None]:
import os
from Bio.PDB import PDBParser
import networkx as nx 
import matplotlib.pyplot as plt 
from itertools import combinations
import numpy as np 

try:
    import py3Dmol
    py3Dmol_installed = True
except:
    py3Dmol_installed = False

print("Packages loaded and py3Dmol installed:", py3Dmol_installed)

Parsing a .pdb file.

In [None]:
pdb_file = "1CRN.pdb"
parser = PDBParser(QUIET=True)
structure = parser.get_structure("protein",pdb_file)

Extracting the residues and their 3D coordinates from the PDB structure.

In [None]:
residues = []
for model in structure:
    for chain in model:
        for residue in chain:
            if 'CA' in residue:
                ca=residue['CA']
                residues.append((chain.id, residue.id[1], ca.get_coord()))

print(f"loaded {len(residues)} residues. ")

Build a residue interaction network: add nodes for residues, connect pairs within 5 Ã…, and store distances as edge weights.

In [None]:
cutoff = 5.0 #In Angstroms
G = nx.Graph()

for chain_id, res_id, coord in residues:
    G.add_node((chain_id, res_id), coord=coord)

for (n1,n2) in combinations(G.nodes, 2):
    coord1 = G.nodes[n1]['coord']
    coord2 = G.nodes[n2]['coord']
    distance = np.linalg.norm(coord1 -coord2)
    if distance <= cutoff:
        G.add_edge(n1,n2, weight=distance)

print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges.")


Computing node degree and betweenness centrality, then display the top 10 residues for each metric.

In [None]:
degree_dict = dict(G.degree())
betweenness_dict = nx.betweenness_centrality(G) 

top_degree = sorted(degree_dict.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 residues by degree: ", top_degree)

top_betweeness = sorted(betweenness_dict.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 residues by betweenness: ", top_betweeness)

Extracting just the node identifiers for the top residues by degree and betweenness centrality.


In [None]:
top_degree_nodes = [node for node, _ in top_degree]
top_betweenness_nodes = [node for node, _ in top_betweeness]

Assigning colors and sizes to nodes: orange for top-degree hubs, red for top-betweenness bottlenecks, skyblue for others.

In [None]:
node_colors = []
node_sizes = []

for node in G.nodes():
    if node in top_degree_nodes:
        node_colors.append('orange')   # hubs
        node_sizes.append(500)
    elif node in top_betweenness_nodes:
        node_colors.append('red')      # bottlenecks
        node_sizes.append(450)
    else:
        node_colors.append('skyblue')  # normal residues
        node_sizes.append(300)


Visualizing the protein residue interaction network in 2D, highlighting hubs and bottlenecks with custom colors and sizes.

In [None]:
plt.figure(figsize=(12, 10))
pos = nx.spring_layout(G, seed=42)

nx.draw(
    G,
    pos,
    with_labels=True,
    labels={node: node[1] for node in G.nodes()},  # residue numbers
    node_color=node_colors,
    node_size=node_sizes,
    font_size=10,
    width=1.5
)

plt.title("Protein Residue Interaction Network (Top Residues Highlighted)")
plt.axis('off')
plt.show()


Visualizing the 3D protein structure with Py3Dmol, coloring top-degree residues orange and top-betweenness residues red.

In [None]:
import py3Dmol

view = py3Dmol.view(query='pdb:1CRN')  # or use your local PDB file
view.setStyle({'cartoon': {'color':'lightgrey'}})

# Highlighting by degree
for node in top_degree_nodes:
    chain, res_id = node
    view.addStyle({'chain': chain, 'resi': res_id}, {'stick': {'color':'orange'}})

# Highlighting by betweenness
for node in top_betweenness_nodes:
    chain, res_id = node
    view.addStyle({'chain': chain, 'resi': res_id}, {'stick': {'color':'red'}})

view.zoomTo()
view.show()


Printing the total number of nodes and edges in the residue interaction network.

In [None]:
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())
