# Multi-compound biplot

In [None]:
from IPython.display import display
import matplotlib.pyplot as plt

import shnitsel as sh
import shnitsel.xarray

from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.drawOptions.addAtomIndices = True

## Load molecules

In [None]:
# This tutorial follows after walkthroughs/2_pca_across_compounds.ipynb
# Please run that notebook first to obtain homologs.nc

homologs = sh.open_frames('/tmp/homologs.nc')
homologs

## Pairwise distance PCA fitted over all analogical compounds

In [None]:
pca = homologs.atXYZ.sh.pairwise_dists_pca()
pca

In [None]:
fig, axs = plt.subplot_mosaic([['I01', 'A01'],['A02', 'A03']], layout='constrained')

for name, ax in axs.items():
    ax.set_title(name)
    for (cmpnd, trajid), traj in pca.sh.mgroupby(['cmpnd', 'trajid']):
        ax.plot(traj[:,0], traj[:,1], c='#ccc')

for (cmpnd, trajid), traj in pca.sh.mgroupby(['cmpnd', 'trajid']):
        axs[cmpnd].plot(traj[:,0], traj[:,1], c='#2c3e50')

fig.supxlabel('PC1')
fig.supylabel('PC2')

In [None]:
plt.rc('font', size=16)
plt.rc('lines', linewidth=1)
fig, ax = plt.subplots(1,1)
fig.set_size_inches(4, 5)
for (cmpnd, trajid), traj in pca.sh.mgroupby(['cmpnd', 'trajid']):
    if cmpnd == 'A03':
        ax.plot(traj[:,0], traj[:,1], c='#2c3e50', zorder=10)
    else:
        ax.plot(traj[:,0], traj[:,1], c='#ccc')

ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
# fig.savefig('/nc/reports/2025-06-30_4-step_figure/2.pdf')

## Biplot of pairwise-distance PCA fitted to all analogs

In [None]:
sh.plot.biplot_kde(homologs, 3, 1, 0, 2, scatter_color='geo')
display(homologs.atXYZ.sel(cmpnd='I01').isel(frame=0).sh.to_mol(+1,to2D=True))
display(homologs.atXYZ.sel(cmpnd='A01').isel(frame=0).sh.to_mol(to2D=True))
display(homologs.atXYZ.sel(cmpnd='A02').isel(frame=0).sh.to_mol(-3,to2D=True))
display(homologs.atXYZ.sel(cmpnd='A03').isel(frame=0).sh.to_mol(-8,to2D=True))

In [None]:
sh.plot.biplot_kde(homologs.sel(frame=(homologs.cmpnd=='I01') | (homologs.cmpnd=='A01')), 3, 1, 0, 2, scatter_color='geo')

The next plot is somewhat redundant, but illustrates how to colour a PCA by dihedral directly:

In [None]:
pca = homologs.atXYZ.sh.pairwise_dists_pca()
dih = homologs.atXYZ.sh.dihedral(3, 1, 0, 2, deg=True)
pca['dih'] = dih
plt.scatter(pca[:, 0], pca[:, 1], c=pca['dih'], s=0.2, alpha=0.5)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.colorbar(label=dih.attrs['long_name'] + ' / °')