# Reproducibility Notebook
## CDK2 AI Drug Discovery (EMBS)

This notebook reproduces the core figures from:
**Nayarisseri et al., Nature Communications (submitted)**

It regenerates RMSD, PCA, DCCM and Free Energy Landscape plots from MD trajectories.

In [None]:
import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from scipy.stats import gaussian_kde

## Load MD trajectory

In [None]:
traj = md.load('../md_simulation/md.xtc', top='../md_simulation/md.gro')
protein = traj.topology.select('protein')
traj = traj.atom_slice(protein)
ref = traj[0]

## RMSD

In [None]:
rmsd = md.rmsd(traj, ref)
plt.plot(rmsd)
plt.xlabel('Frame')
plt.ylabel('RMSD (nm)')
plt.savefig('RMSD.png')
plt.show()

## PCA

In [None]:
X = traj.xyz.reshape(traj.n_frames, -1)
pca = PCA(n_components=2)
PC = pca.fit_transform(X)
plt.scatter(PC[:,0], PC[:,1], s=5)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.savefig('PCA.png')
plt.show()

## Dynamic Cross Correlation Matrix (DCCM)

In [None]:
coords = traj.xyz
mean = coords.mean(axis=0)
delta = coords - mean
n = delta.shape[1]
dccm = np.zeros((n,n))
for i in range(n):
    for j in range(n):
        num = np.mean(delta[:,i,:] * delta[:,j,:])
        den = np.sqrt(np.mean(delta[:,i,:]**2) * np.mean(delta[:,j,:]**2))
        dccm[i,j] = num/den
plt.imshow(dccm, cmap='bwr', vmin=-1, vmax=1)
plt.colorbar()
plt.savefig('DCCM.png')
plt.show()

## Free Energy Landscape (FEL)

In [None]:
rg = md.compute_rg(traj)
xy = np.vstack([rmsd, rg])
kde = gaussian_kde(xy)
xi, yi = np.mgrid[rmsd.min():rmsd.max():100j, rg.min():rg.max():100j]
zi = kde(np.vstack([xi.flatten(), yi.flatten()]))
F = -np.log(zi.reshape(xi.shape))
plt.contourf(xi, yi, F, 50)
plt.xlabel('RMSD (nm)')
plt.ylabel('Rg (nm)')
plt.colorbar()
plt.savefig('FEL.png')
plt.show()