<a href="https://colab.research.google.com/github/DrFrank25/Syndecan_4-Ag73/blob/main/PCA_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Script to PCA analysis with Python**


In [1]:
# --- Intall MDAnalysis
!pip install MDAnalysis tqdm

Collecting MDAnalysis
  Downloading mdanalysis-2.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (108 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/108.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.7/108.7 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Collecting GridDataFormats>=0.4.0 (from MDAnalysis)
  Downloading GridDataFormats-1.0.2-py3-none-any.whl.metadata (4.9 kB)
Collecting mmtf-python>=1.0.0 (from MDAnalysis)
  Downloading mmtf_python-1.1.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting mda-xdrlib (from MDAnalysis)
  Downloading mda_xdrlib-0.2.0-py3-none-any.whl.metadata (19 kB)
Collecting mrcfile (from GridDataFormats>=0.4.0->MDAnalysis)
  Downloading mrcfile-1.5.4-py2.py3-none-any.whl.metadata (7.0 kB)
Downloading mdanalysis-2.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [None]:
# --- Import libraries ---
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
from tqdm import tqdm
import MDAnalysis as mda
from MDAnalysis.analysis import align

In [None]:
# === Upload GROMACS files ===
print("Please upload md.tpr and traj.xtc")
uploaded = files.upload()

In [None]:
# Load trajectory (must have md.tpr and traj.xtc)
u = mda.Universe("md.tpr", "traj.xtc")

In [None]:
# Select C-alpha atoms
ca = u.select_atoms("protein and name CA")

In [None]:
# Align trajectory to the first frame to remove overall translation/rotation
ref = u.select_atoms("protein and name CA")
aligner = align.AlignTraj(u, ref, select="protein and name CA", in_memory=True).run()

In [None]:
# ===== Build coordinate matrix =====
print("Building coordinate matrix...")

coords = []
for ts in tqdm(u.trajectory, desc="Processing frames"):
    coords.append(protein.positions.flatten())

X = np.array(coords)
X -= X.mean(axis=0)  # mean-center the data

In [None]:
# ===== Covariance matrix =====
print("Computing covariance matrix...")
cov = np.cov(X.T)

In [None]:
# === Eigen-decomposition ===
eigvals, eigvecs = np.linalg.eigh(cov)

In [None]:
# Sort by descending eigenvalue
idx = np.argsort(eigvals)[::-1]
eigvals = eigvals[idx]
eigvecs = eigvecs[:, idx]

In [None]:
# ===== Projections on PC1 and PC2 =====
proj = np.dot(X, eigvecs[:, :2])


In [None]:
# ===== Plotting =====
print("Generating plots...")

# Scree plot
plt.figure(figsize=(8, 5))
plt.plot(np.arange(1, 11), eigvals[:10], "o-", lw=2)
plt.xlabel("Principal Component", fontsize=12)
plt.ylabel("Eigenvalue (variance)", fontsize=12)
plt.title("Scree Plot of PCA", fontsize=14)
plt.grid(True)
plt.tight_layout()
plt.savefig("PCA_scree.png", dpi=1200, bbox_inches="tight")
plt.close()

# PC1 vs PC2 plot
plt.figure(figsize=(7, 6))
plt.scatter(proj[:, 0], proj[:, 1], alpha=0.5, s=10, c=np.arange(len(proj)))
plt.xlabel("PC1", fontsize=12)
plt.ylabel("PC2", fontsize=12)
plt.title("PCA Projection (PC1 vs PC2)", fontsize=14)
plt.colorbar(label="Frame Index")
plt.grid(True)
plt.tight_layout()
plt.savefig("PCA_projection.png", dpi=1200, bbox_inches="tight")
plt.close()


In [None]:
 ===== Download Figures =====
print("Saving and downloading figures...")
files.download("PCA_scree.png")
files.download("PCA_projection.png")

print("PCA analysis completed!")