<a href="https://colab.research.google.com/github/DrFrank25/Syndecan_4-Ag73/blob/main/Normal_Mode_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Normal Mode Analysis (NMA)**

In [None]:
# ============================================
# MD -> Representative PDB -> Normal Mode Analysis (ANM) Pipeline
# ============================================
# Steps:
#   1) Upload MD simulation outputs (.tpr + .xtc) for each replicate
#   2) Generate representative PDBs containing only Cα atoms:
#        - Option A: Average structure (last FRACTION_EQUIL part of trajectory)
#        - Option B: Snapshot of the last frame
#   3) Perform ANM (ProDy) on each PDB
#   4) Save & download:
#        - High-res (1200 DPI) plots: Scree + Collectivity
#        - CSV file of eigenvalues
# ============================================

In [None]:
# --- Install matplotlib ---
!pip -q install MDAnalysis prody tqdm biopython pandas


In [1]:
# --- Import libraries ---
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from google.colab import files

import MDAnalysis as mda
from MDAnalysis.analysis import align
from prody import parsePDB, ANM, calcCollectivity

In [None]:
# ---------- Parameters ----------
USE_AVERAGE       = True     # True = average PDB from last fraction; False = last-frame snapshot
FRACTION_EQUIL    = 0.20     # Fraction of trajectory used for averaging
N_MODES            = 20      # Number of ANM modes to compute
DPI_SAVE           = 1200    # Resolution for figures
CA_SELECTION       = "protein and name CA"
CSV_OUTPUT         = "ANM_results.csv"

In [None]:
# ---------- Upload files ----------
print("Upload .tpr and .xtc pairs for each replicate:")
uploaded = files.upload()
fnames = sorted(uploaded.keys())
print("Uploaded files:", fnames)

tpr_files = sorted([f for f in fnames if f.lower().endswith(".tpr")])
xtc_files = sorted([f for f in fnames if f.lower().endswith(".xtc")])

if len(tpr_files) == 0 or len(xtc_files) == 0:
    raise SystemExit("At least one (.tpr + .xtc) pair is required.")

n_reps = min(len(tpr_files), len(xtc_files))
pairs = list(zip(tpr_files[:n_reps], xtc_files[:n_reps]))

print(f"\nDetected {n_reps} replicate(s):")
for i, (tpr, xtc) in enumerate(pairs, start=1):
    print(f"  Rep {i}: {tpr} + {xtc}")

In [None]:
# ---------- Helper functions ----------
def write_CA_snapshot(universe, outfile_ca_pdb):
    """Write last-frame Cα snapshot as PDB."""
    ca = universe.select_atoms(CA_SELECTION)
    universe.trajectory[-1]
    ca.write(outfile_ca_pdb)

def write_CA_average(universe, outfile_ca_pdb, frac=0.2):
    """Align trajectory, compute average Cα structure of last fraction, save as PDB."""
    align.AlignTraj(universe, universe, select=CA_SELECTION, in_memory=True).run()
    ca = universe.select_atoms(CA_SELECTION)

    n_frames = len(universe.trajectory)
    start = int((1.0 - frac) * n_frames)

    coords_sum, count = None, 0
    for ts in universe.trajectory[start:]:
        xyz = ca.positions.copy()
        coords_sum = xyz if coords_sum is None else coords_sum + xyz
        count += 1

    avg_coords = coords_sum / max(count, 1)


    # Save average Cα structure using ProDy
    tmp_name = "_tmp_ca_snapshot.pdb"
    ca.write(tmp_name)
    strc = parsePDB(tmp_name)
    calphas = strc.select('protein and name CA')
    calphas.setCoords(avg_coords)
    calphas.writePDB(outfile_ca_pdb)
    os.remove(tmp_name)

def run_anm_on_ca_pdb(ca_pdb_path, tag, n_modes=20, dpi=1200):
    """Run ANM and generate scree + collectivity plots."""
    structure = parsePDB(ca_pdb_path)
    calphas = structure.select('protein and name CA')
    if calphas is None or calphas.numAtoms() < 3:
        raise RuntimeError(f"Invalid Cα selection in {ca_pdb_path}")

    anm = ANM(f"ANM_{tag}")
    anm.buildHessian(calphas)
    anm.calcModes(n_modes=n_modes)
    eigvals = anm.getEigvals()

    # Scree plot
    plt.figure(figsize=(7,5))
    plt.plot(range(1, len(eigvals)+1), eigvals, "o-", linewidth=1.8)
    plt.xlabel("Mode Number", fontsize=12)
    plt.ylabel("Eigenvalue (variance-like)", fontsize=12)
    plt.title(f"ANM Scree Plot - {tag}", fontsize=13)
    plt.grid(True, alpha=0.4)
    plt.tight_layout()
    scree_name = f"ANM_scree_{tag}.png"
    plt.savefig(scree_name, dpi=dpi, bbox_inches='tight')
    plt.close()

    # Collectivity
    m = min(10, len(anm))
    collectivities = calcCollectivity(anm[:m], calphas)
    plt.figure(figsize=(7,5))
    plt.bar(range(1, m+1), collectivities)
    plt.xlabel("Mode", fontsize=12)
    plt.ylabel("Collectivity", fontsize=12)
    plt.title(f"ANM Collectivity (first {m} modes) - {tag}", fontsize=13)
    plt.tight_layout()
    coll_name = f"ANM_collectivity_{tag}.png"
    plt.savefig(coll_name, dpi=dpi, bbox_inches='tight')
    plt.close()

    return eigvals, scree_name, coll_name

In [None]:
# ---------- Main loop ----------
results = []
for i, (tpr, xtc) in enumerate(pairs, start=1):
    tag = f"rep{i}"
    print(f"\n==== Processing {tag}: {tpr} + {xtc} ====")

    u = mda.Universe(tpr, xtc)

    ca_pdb = f"CA_{'avg' if USE_AVERAGE else 'last'}_{tag}.pdb"
    if USE_AVERAGE:
        print(f"Generating average Cα-PDB (last {int(FRACTION_EQUIL*100)}%) -> {ca_pdb}")
        write_CA_average(u, ca_pdb, frac=FRACTION_EQUIL)
    else:
        print(f"Generating last-frame Cα-PDB -> {ca_pdb}")
        write_CA_snapshot(u, ca_pdb)

    eigvals, scree_png, coll_png = run_anm_on_ca_pdb(
        ca_pdb, tag=f"{tag}_{'avg' if USE_AVERAGE else 'last'}",
        n_modes=N_MODES, dpi=DPI_SAVE
    )

    results.append({
        "replicate": tag,
        "pdb": ca_pdb,
        "eigenvalues": eigvals.tolist(),
        "scree_plot": scree_png,
        "collectivity_plot": coll_png
    })


In [None]:
# ---------- Save CSV with eigenvalues ----------
df = pd.DataFrame({
    "replicate": r["replicate"],
    **{f"eig_{j+1}": val for j, val in enumerate(r["eigenvalues"])}
} for r in results)

df.to_csv(CSV_OUTPUT, index=False)
print(f"\nEigenvalues saved to {CSV_OUTPUT}")

In [None]:
# ---------- Auto-download ----------
files.download(CSV_OUTPUT)
for r in results:
    files.download(r["scree_plot"])
    files.download(r["collectivity_plot"])

print("\nPipeline completed successfully!")