# Cluster Transitions for Dynamic by Design Data

## Imports

In [None]:
from pathlib import Path

import numpy as np

## Acquire Trajectories

In [None]:
data_dir = Path('./data').resolve()
xtc_dir = data_dir.joinpath('Disordered_By_Design/XTC_files')

unique_names = [name.stem for name in xtc_dir.glob('*.xtc')]

print(f"{len(unique_names)} unique starting configurations:")
print(unique_names)

In [None]:
from Bio import PDB

pdb_dir = data_dir.joinpath('Disordered_By_Design/2KMV/')
sample_file = pdb_dir.joinpath('2KMV_01_02.pdb')

sample_pdb = PDB.PDBParser().get_structure('sample', sample_file)
print(f"Sample structure with {len([_ for atom in sample_pdb.get_atoms()])} models")

In [None]:
import mdtraj

sample_u = mdtraj.load_xtc(
    data_dir.joinpath('Disordered_By_Design/XTC_files/md_0_1_align_2KMV_01_02.xtc'),
    top=data_dir.joinpath('Disordered_By_Design/2KMV/2KMV_01_02.pdb'),
    atom_indices=range(0, 2834)
)
print(sample_u)

In [None]:
from MDAnalysis import Universe

sample_u = Universe(
    topology=data_dir.joinpath('Disordered_By_Design/2KMV/2KMV_01_02.pdb'), topology_format='pdb',
    trajectory=data_dir.joinpath('Disordered_By_Design/XTC_files/md_0_1_align_2KMV_01_02.xtc'), format='xtc'
)

for traj in sample_u.trajectory:
    print(f"Frame {traj.frame} has {len(sample_u.atoms)} atoms")
print(f"Found {len(sample_u.atoms)} atoms")
print(f"Found {len(sample_u.residues)} residues")
print(f"Found {len(sample_u.segments)} segments")

## Clustering

In [None]:
K = 2000
import pickle
with open(f'clusters-{K//1000}K.pkl', 'rb') as f:
    clusters = pickle.load(f)

In [None]:
C = clusters['X']
C = np.stack(C)
Cangles = np.array([clusters['phi0'], clusters['psi0'], clusters['phi1'], clusters['psi1']]).T

In [None]:
from sklearn.neighbors import NearestNeighbors
neigh = NearestNeighbors().fit(C.reshape(C.shape[0], -1))
#indices = neight.kneighbors(C.reshape(C.shape[0], -1), n_neighbors=1, return_distance=False)

In [None]:
from Bio.PDB import Selection, Atom, Residue, Structure

# data format seems to have changed, change alex' parser to give same results
# # TODO: 'get_coordinates' function not really documented well enough to be sure, ask alex
def get_coordinates(traj_path: Path, parser=None, filter_atoms={'N','CA','C','O'}):
    if parser is None:
        parser = PDBParser()
    
    structure: Structure = parser.get_structure(id=traj_path.stem, file=traj_path)
    print(f'{traj_path.stem} has length: {len(structure)}')

    initial_frame: Structure = structure[0]
    valid_residue_ids = []
    for residue in Selection.unfold_entities(initial_frame, 'R'):
        filtered_atoms = residue[list(filter_atoms)]
    print(valid_residue_ids)
    return

    for frame in structure:
        for residue in Selection.unfold_entities(frame, 'R'):
            for atom in residue:
                if atom.get_name() in filter_atoms:
                    print(residue.get_id())


pd_traj = get_coordinates(xtc_dir.joinpath(unique_names[0]+'.pdb'))