In [None]:
from Bio.PDB import PDBParser
import pymol
from pymol import cmd
import matplotlib.pyplot as plt
import os


In [None]:
def calculate_distance(pdb_file, chain_id, res1, res2):
    """Calculate distance between two residues in a protein structure."""
    parser = PDBParser()
    structure = parser.get_structure("Protein", pdb_file)
    atom1 = structure[0][chain_id][res1]['CA']  # Alpha Carbon atom
    atom2 = structure[0][chain_id][res2]['CA']
    return atom1 - atom2


In [None]:
def visualize_in_pymol(pdb_file, residues_to_highlight):
    """Load and visualize a protein structure in PyMOL with highlighted residues."""
    cmd.reinitialize()  # Reset PyMOL session
    cmd.load(pdb_file)
    cmd.show("cartoon")
    cmd.color("cyan", "all")
    
    for resi in residues_to_highlight:
        selection = f"resi {resi}"
        cmd.select(f"highlight_{resi}", selection)
        cmd.show("sticks", selection)
        cmd.color("yellow", selection)
    
    cmd.png("processed_results/protein_visualization.png")
    print(f"Visualization saved as protein_visualization.png")

In [None]:
def analyze_protein(file_path, chain_id, residues_of_interest):
    """Perform basic analysis on a protein structure."""
    print(f"Analyzing protein structure: {file_path}")
    distances = []
    for i in range(len(residues_of_interest) - 1):
        distance = calculate_distance(file_path, chain_id, residues_of_interest[i], residues_of_interest[i+1])
        distances.append(distance)
        print(f"Distance between residue {residues_of_interest[i]} and {residues_of_interest[i+1]}: {distance:.2f} Å")
    return distances

In [None]:
def visualize_distances(distances, residues):
    """Visualize distances between residues as a bar chart."""
    plt.figure(figsize=(10, 6))
    plt.bar(range(len(distances)), distances, color='skyblue')
    plt.xticks(range(len(distances)), [f"{residues[i]}-{residues[i+1]}" for i in range(len(distances))], rotation=45)
    plt.ylabel("Distance (Å)")
    plt.xlabel("Residue Pairs")
    plt.title("Distances Between Residues")
    plt.tight_layout()
    plt.show()

In [None]:

protein_dir = "data/protein_models"
# Example protein: Beta-Galactosidase (1JZC)
pdb_file = os.path.join(protein_dir, "1JZC.pdb")
chain_id = "A"
residues_of_interest = [200, 205, 210, 215]
# Perform analysis
distances = analyze_protein(pdb_file, chain_id, residues_of_interest)
# Visualize distances
visualize_distances(distances, residues_of_interest)
# Visualize in PyMOL
visualize_in_pymol(pdb_file, residues_of_interest)
print("Analysis and visualization complete.")