# Superimposition: Rotations versus Kabsch
In this notebook, we find out what the best way is to align the fragments. We do this by comparing different rmse's of different superimposition (alignment) methods.

- We also check if mirroring makes the alignment better/worse.

- Another thing we look at is if the H's in for example methyl are better left out of the alignment or not.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

## Checking rotations and kabsch
- does it matter which labels we use for rotation
- does it matter if we mirror

We get 8 result lines:
- Kabsch, mirrored/not mirrored
- Rotation labels 2, 4, 6, mirrored/notmirrored
- Rotation labels 1, 2, 3, mirrored/notmirrored
- Rotation labesl 1, 4, 6, mirrored/notmirrored

In [None]:
# rotation mirrored
df_rot_246 = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_246_structures.csv')
df_rot_123 = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_123_structures.csv')
df_rot_146 = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_146_structures.csv')

# rotation not mirrored
df_rot_246_not_mir = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_not_mir_246_structures.csv')
df_rot_123_not_mir = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_not_mir_123_structures.csv')
df_rot_146_not_mir = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_rot_not_mir_146_structures.csv')

# kabsch, both mirrored and not mirrored
df_kab = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_structures.csv')
df_kab_not_mir = pd.read_csv('../../results/pairs/RC6H5/RC6H5_R2CO_vdw.5/RC6H5_R2CO_not_mir_structures.csv')

labels = ["R, mirrored, label 2, 4, 6", "R, mirrored, label 1, 2, 3", "R, mirrored, label 1, 4, 6",
          "R, not mirrored, label 2, 4, 6", "R, not mirrored, label 1, 2, 3", "R, not mirrored, label 1, 4, 6",
          "Kabsch, mirrored", "Kabsch, not mirrored"]

dfs = [df_rot_246, df_rot_246_not_mir, df_rot_123, df_rot_123_not_mir, df_rot_146, df_rot_146_not_mir, df_kab, df_kab_not_mir]

In [None]:
# use a smoothing algorithm
for df in dfs:
    df['rmse_smooth'] = df["rmse"].rolling(window=500).mean()

In [None]:
# everything in a single figure
plt.figure(figsize=(8,4))
plt.title('RMSEs of different superimposition algorithms')

for df, label in zip(dfs, labels):
    plt.plot(range(0, len(df)), df.rmse, label=label)

plt.legend()
plt.xlabel("Structure ID")
plt.ylabel("RMSE")

plt.show()

In [None]:
# same, but now with the smoothed data
plt.figure(figsize=(8, 6))
plt.title('RMSEs of different superimposition algorithms')

for df, label in zip(dfs, labels):
    plt.plot(range(0, len(df)), df.rmse_smooth, label=label)
#     plt.hlines(df.rmse.mean(), 0, len(df), label="mean")
plt.legend(loc="upper right")

plt.ylabel("RMSE Rotation")
plt.xlabel("Structure ID")

plt.show()

In [None]:
# make the figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,4))
# fig.tight_layout()
fig.suptitle('RMSEs of different superimposition algorithms\n$RC_6H_5-R_2CO$')

ax1.plot(range(0, len(df_rot_123)), df_rot_123.rmse_smooth, label='R, lab1, 2, 3')
ax1.plot(range(0, len(df_rot_123_not_mir)), df_rot_123_not_mir.rmse_smooth, label="R, not mirrored, lab1, 2, 3")

ax1.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')
ax1.plot(range(0, len(df_kab_not_mir)), df_kab_not_mir.rmse_smooth, label='Kabsch, not mirrored', color='tab:purple')
# ax1.plot(range(0, len(df_rot_1243)), df_rot_1243.rmse_smooth, label='Rmir, lab12, 4, 3')
# ax1.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')
# ax1.plot(range(0, len(df_kab_mir)), df_kab_mir.rmse_smooth, label='Kabsch mir', color='tab:purple')

ax1.set_xlabel("Structure ID")
ax1.set_ylabel("RMSE Rotation")

ax1.legend(loc='upper right')


ax2.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')
ax2.plot(range(0, len(df_kab_not_mir)), df_kab_not_mir.rmse_smooth, label='Kabsch, not mirrored', color='tab:purple')


ax2.set_xlabel("Structure ID")
ax2.set_ylabel("RMSE Kabsch")

ax2.legend(loc='upper right')

plt.savefig('../../results/plots/comparing_rmse_kabsch_rotation_rc6h5_r2co.svg', format='svg', bbox_inches='tight')

In [None]:
# make the figure
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15,4))
# fig.tight_layout()
fig.suptitle('RMSEs of different superimposition algorithms')


ax1.plot(range(0, len(df_rot_146)), df_rot_146.rmse_smooth, label='R, lab1, 4, 6')
ax1.plot(range(0, len(df_rot_146_not_mir)), df_rot_146_not_mir.rmse_smooth, label="R, not mirrored, lab1, 4, 6")

ax1.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')
ax1.plot(range(0, len(df_kab_not_mir)), df_kab_not_mir.rmse_smooth, label='Kabsch, not mirrored', color='tab:purple')

ax1.set_xlabel("Structure ID")
ax1.set_ylabel("RMSE Rotation")

ax1.legend(loc='upper right')


ax2.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')
ax2.plot(range(0, len(df_kab_not_mir)), df_kab_not_mir.rmse_smooth, label='Kabsch, not mirrored', color='tab:purple')


ax2.set_xlabel("Structure ID")
ax2.set_ylabel("RMSE Kabsch")

ax2.legend(loc='upper right')



# plt.savefig('../../results/figures/comparing_rmse.svg', format='svg', bbox_inches='tight')

In [None]:
# make the figure
plt.figure(figsize=(8,4))
# fig.tight_layout()
plt.title('RMSEs difference with different labels\n$RC_6H_5-R_2CO$')

plt.plot(range(0, len(df_rot_123)), df_rot_123.rmse_smooth, label='R, lab 1, 2, 3')
plt.plot(range(0, len(df_rot_146)), df_rot_146.rmse_smooth, label='R, lab 1, 4, 6')
plt.plot(range(0, len(df_rot_246)), df_rot_246.rmse_smooth, label='R, lab 2, 4, 6')
plt.plot(range(0, len(df_kab)), df_kab.rmse_smooth, label='Kabsch', color='tab:red')

plt.xlabel("Structure ID")
plt.ylabel("RMSE Rotation")

plt.legend(loc='upper right')

plt.savefig('../../results/plots/comparing_rmse_different_labels_rc6h5.svg', format='svg', bbox_inches='tight')

# Check if the same happens for H2O

In [None]:
df_kab_water = pd.read_csv('../../results/pairs/H2O/H2O_R2CO_vdw.5/H2O_R2CO_structures.csv')
df_rot_water = pd.read_csv('../../results/pairs/H2O/H2O_R2CO_vdw.5/H2O_R2CO_rot_structures.csv')

df_kab_water['rmse_smooth'] = df_kab_water["rmse"].rolling(window=500).mean()
df_rot_water['rmse_smooth'] = df_rot_water["rmse"].rolling(window=500).mean()

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(range(0, len(df_rot_water)), df_rot_water.rmse_smooth, label="Rotation")
plt.plot(range(0, len(df_rot_water)), df_kab_water.rmse_smooth, label="Kabsch")

plt.xlabel("Structure ID")
plt.ylabel("RMSE")
plt.title("RMSE Rotation vs RMSE Kabsch\n$H_2O-R_2CO$")
plt.legend(loc="upper right")

plt.savefig("../../results/plots/comparing_rmse_kabsch_rotation_h2o.svg", format='svg', bbox_inches='tight')


# Does a methyl group ruin the alignment?


In [None]:
df_kab_rcome = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_structures.csv')
df_kab_rcome_with_methyl = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_with_ch3_structures.csv')
df_kab_rmse_without_methyl = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_with_ch3_rmse_without_structures.csv')
df_kab_withouth_methyl_rmse_with = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_without_ch3_rmse_with_structures.csv')

df_kab_rcome['rmse_smooth'] = df_kab_rcome["rmse"].rolling(window=500).mean()
df_kab_rcome_with_methyl['rmse_smooth'] = df_kab_rcome_with_methyl["rmse"].rolling(window=500).mean()
df_kab_rmse_without_methyl['rmse_smooth'] = df_kab_rmse_without_methyl["rmse"].rolling(window=500).mean()
df_kab_withouth_methyl_rmse_with['rmse_smooth'] = df_kab_withouth_methyl_rmse_with["rmse"].rolling(window=500).mean()

In [None]:
plt.figure(figsize=(8, 4))
plt.plot(range(0, len(df_kab_rcome_with_methyl)), df_kab_rcome_with_methyl.rmse_smooth, label="With Methyl, RMSE with")
plt.plot(range(0, len(df_kab_rmse_without_methyl)), df_kab_rmse_without_methyl.rmse_smooth, label="With Methyl, RMSE without")

plt.plot(range(0, len(df_kab_withouth_methyl_rmse_with)), df_kab_withouth_methyl_rmse_with.rmse_smooth, label="Without Methyl, RMSE with")
plt.plot(range(0, len(df_kab_rcome)), df_kab_rcome.rmse_smooth, label="Without Methyl, RMSE without")

plt.xlabel("Structure ID")
plt.ylabel("RMSE")
plt.title("RMSE's with and without methyl group\n$RCOMe-R_2CO$")
plt.legend(loc="upper right")

plt.savefig("../../results/plots/comparing_rmse_kabsch_rcome_methyl.svg", format='svg', bbox_inches='tight')


In [None]:
df_kab_rcome_not_mirrored = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_without_ch3_structures_not_mirrored.csv')
df_kab_rcome_with_methyl_not_mirrored = pd.read_csv('../../results/pairs/RCOMe/RCOMe_R2CO_vdw.5/RCOMe_R2CO_without_ch3_rmse_with_structures_not_mirrored.csv')

df_kab_rcome_not_mirrored['rmse_smooth'] = df_kab_rcome_not_mirrored["rmse"].rolling(window=500).mean()
df_kab_rcome_with_methyl_not_mirrored['rmse_smooth'] = df_kab_rcome_with_methyl_not_mirrored["rmse"].rolling(window=500).mean()

In [None]:
plt.figure(figsize=(10,3))
plt.plot(range(0, len(df_kab_rcome_with_methyl)), df_kab_rcome_with_methyl.rmse_smooth, label="With Methyl, RMSE with")
plt.plot(range(0, len(df_kab_rmse_without_methyl)), df_kab_rcome_with_methyl_not_mirrored.rmse_smooth, label="With methyl, RMSE with, not mirrored")
plt.plot(range(0, len(df_kab_rmse_without_methyl)), df_kab_rmse_without_methyl.rmse_smooth, label="With Methyl, RMSE without")

plt.plot(range(0, len(df_kab_rcome)), df_kab_rcome.rmse_smooth, label="Without Methyl, RMSE without")
plt.plot(range(0, len(df_kab_rcome_with_methyl)), df_kab_rcome_not_mirrored.rmse_smooth, label="Without methyl, RMSE without, not mirrored")
plt.plot(range(0, len(df_kab_withouth_methyl_rmse_with)), df_kab_withouth_methyl_rmse_with.rmse_smooth, label="Without Methyl, RMSE with")

plt.xlabel("Structure ID")
plt.ylabel("RMSE")
plt.title("RMSE's with and without methyl group\n$RCOMe-R_2CO$")
plt.legend(loc='upper right')
plt.savefig('../../results/figures/rmse_kabsch_methyl_mirror.svg', format='svg', bbox_inches='tight')