# Import modules

In [47]:
import numpy as np
import importlib
from sklearn.decomposition import PCA, KernelPCA
# Import visualization module for molecular visualization
from tools.visualization import Visualization
# Import descriptors module to obtain descriptors for triad molecule
from tools.descriptors import Descriptors
# Import plotting module for plotting
from tools import plotting
importlib.reload(plotting)

<module 'tools.plotting' from '/xspace/hl4212/durf_hq/projects/Gustave_Li/Main_program/tools/plotting.py'>

# Load files

In [48]:
file_dir = '/xspace/hl4212/DURF_datasets/triad_molecule'
traj_path = f'{file_dir}/triad_dataset.nc'
top_path = f'{file_dir}/triad_forcefield_ground.prmtop'

# Data visualization

In [49]:
triad_viz = Visualization(traj_path, top_path)
triad_viz.ball_and_stick()

NGLWidget(max_frame=99999)

## Compare different conformations

In [50]:
triad_viz.compare(0, -1)

NGLWidget()

# Get descriptors

In [51]:
d = Descriptors(traj_path, top_path)
EuclidianDist_1 = d.eucdist('C33','C128')
Angle_1 = d.angle('C33','C96','C128')
Angle_2 = d.angle('C33','C69','C96')
Angle_3 = d.angle('C69','C96','C128')
Dihedral_1 = d.dihedral('C21','C61','C66','C65')
Dihedral_2 = d.dihedral('C89','N6','C95','C96')
# For the RMSD descriptor, choose the frame with the largest 'Angle_1' as linear, the smallest as bent
# The frame number for linear is 88213, for bent is 29685
RMSD_Linear = d.rmsd(frame = 88213)
RMSD_Bent = d.rmsd(frame = 29685)

In [52]:
# Visulize the descriptors in dataframe
d_DataFrame = d.to_df(EuclidianDist_1=EuclidianDist_1, Angle_1=Angle_1,\
                      Angle_2=Angle_2, Angle_3=Angle_3, Dihedral_1=Dihedral_1,\
                      Dihedral_2=Dihedral_2, RMSD_Linear =RMSD_Linear, RMSD_Bent =RMSD_Bent)
d_DataFrame.iloc[88213, :]

EuclidianDist_1    4.680938
Angle_1            3.102149
Angle_2            2.592577
Angle_3            2.856325
Dihedral_1        -1.394683
Dihedral_2        -2.993495
RMSD_Linear        0.000000
RMSD_Bent          1.464485
Name: 88213, dtype: float32

In [53]:
# Convert dataframe to nparray for dimensionality reduction and save it back to disk
results_dir = '/xspace/hl4212/durf_hq/projects/Gustave_Li/Main_program/results'
array_path = f'{results_dir}/descriptors_arr'
d_array = np.array(d_DataFrame)
np.save(array_path, d_array)

# [Dimensionality reduction](Dimensionality_reduction.py) (Run on hpc)

# Visualization of the dimreduct results

In [54]:
results_dir = '/xspace/hl4212/durf_hq/projects/Gustave_Li/Main_program/results'
pca = np.load(f'{results_dir}/dimreduct_PCA.npy')
plotting.dimreduct('pca', pca)

In [55]:
kpca = np.load(f'{results_dir}/dimreduct_kPCA_rbf.npy')
plotting.dimreduct("kpca_rbf", kpca)

In [56]:
kpca = np.load(f'{results_dir}/dimreduct_kPCA_poly.npy')
plotting.dimreduct("kpca_poly", kpca)

In [57]:
tsne_std = np.load(f'{results_dir}/dimreduct_tsne_standard.npy')
plotting.dimreduct("dimreduct_tsne_standard", tsne_std)

In [58]:
tsne_optm = np.load(f'{results_dir}/dimreduct_tsne_optimized.npy')
plotting.dimreduct("dimreduct_tsne_optimized", tsne_optm)

In [None]:
mds = MDS(n_components=2)
result_arr = mds.fit_transform(d_array)
