**Rhodium Hydrogenation of Enamides TSFF Parameterized by a new Hybrid Optimizer**

Rh Hyd Enamides TSFF as a test case for the use of a new Hybrid Particle Swarm-Differential Evolution Optimization Algorithm (hybrid_optimizer) developed by Mikaela Farrugia (github: @mmfarrugia) under Olaf Wiest & Paul Helquist.

These are all being run with the pbest localization as convergence criterion.
Note: Score distributions are incorrect due to funky mm3.fld vs mm3.hybrid.fld issue which is now fixed but untested/unrun

In [None]:
import os
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
import sys
from matplotlib import animation, rc
from IPython.display import HTML, Image
import seaborn
import pickle
import pandas as pd
from numpy import copy
from scipy import stats
import itertools

seaborn.set_theme(style="whitegrid", palette='pastel')

#src_dir = os.path.abspath("/home/mmfarrugia/repos/q2mm/q2mm")
sys.path.append("/home/mfarrugi/repos/q2mm/rh-hybrid/schrodinger.ve/lib/python3.8/site-packages/q2mm-0.0.0-py3.8.egg")

#from hybrid_optimizer import PSO_GA
import q2mm.hybrid_optimizer as hybrid_optimizer
from q2mm.hybrid_optimizer import PSO_DE
from tools.plotters import plot_cost_history, plot_contour, plot_surface, plot_summary, Mesher, Designer




All are done with 8 workers, 10 particles/TSFF candidates, with 0.001 convergence criteria, run for loop cycles of 50 iter maximum with 3 technical replicates run for each case. Further information regarding the settings, files, and parameters used in the optimizations can be found in their respective directories under the README.txt file.

All hyperparameters are derived from literature **INCLUDE REFS** and use either the TIGHT_OPT_CONFIG or GLOBAL_OPT_CONFIG of HO.

In [None]:
num_particles = 10
cycle_iter_length = 50
num_workers = 8
convergence_criteria = 0.001
num_tech_replicates = 3
num_params = 31

**Start: FCs = 0.5 + Donoghue charges**

Original Score: 2.67645

In [None]:
# Gather data on starting point

static_score = 2.67645
score_cols = ["Label", "Weight", "Reference", "Calculated", "Score", "FF"]
skiprows=[0, 1, 106390, 106391, 106392, 106393, 106394, 106395, 106396, 106397, 106398, 106399, 106400, 106401]

static_scores = pd.read_csv('/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/rh_qstatic_start.txt', skiprows=skiprows, delim_whitespace=True, names=score_cols).assign(FF=0)
# /opt_qstatic_start_hybrid_eig
bond_cols = ["param_type", "atom1", "atom2", "Equilibrium Value", "Force Constant", "Dipole Moment", "FF"]
angle_cols = ["param_type", "atom1", "atom2", "atom3", "Equilibrium Value", "Force Constant", "FF"]

bond_rows = np.arange(1859, 1867)
angle_rows = np.arange(1867, 1889)

bond_start = pd.read_csv('/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/mm3.old.fld', skiprows=lambda x: x not in bond_rows, delim_whitespace=True, names=bond_cols).assign(FF=0)
angle_start = pd.read_csv('/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/mm3.old.fld', skiprows=lambda x: x not in angle_rows, delim_whitespace=True, names=angle_cols).assign(FF=0)


TIGHT SPREAD!!

PSO ONLY

In [None]:
base_directory = '/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/tight_spread/pso_only'
directory_list = ['1', '2', '3']

In [None]:
%matplotlib inline
# PSO ONLY
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a PSO only - Rh Hyd Enamides')
ax[0].set_title('Score Diversity Throughout Parameterization')
ax[1].set_title('Loss, aka Best Score Throughout Parameterization')

ax[0].axhline(y=static_score, color='gray')
ax[1].axhline(y=static_score, color='gray')

for directory in directory_list:

    swarm_history_file = open(os.path.join(base_directory, directory, 'hybrid_opt_history.bin'), 'rb')
    swarm_history = pickle.load(swarm_history_file)
    swarm_history_file.close()
    num_iters = len(swarm_history['Y'])
    num_ffs = len(swarm_history['Y'][0])
    assert num_ffs == num_particles
    color = next(ax[0]._get_lines.prop_cycler)['color']
    Y_history = pd.DataFrame(np.array(swarm_history['Y']).reshape((num_iters, num_ffs)))
    ax[0].plot(Y_history.index, Y_history.values, '.', color=color)
    loss = Y_history.min(axis=1).cummin()
    loss.plot(kind='line', ax=ax[1], color=color, label='Final Score: '+str(loss.iloc[-1]))

ax[1].legend()

ax[0].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))
ax[1].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))

plt.show()

In [None]:
%matplotlib inline
# Plot Scores

score_cols = ["Label", "Weight", "Reference", "Calculated", "Score", "FF"]
runs = []
for directory in directory_list:
    runs.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_pso_end.txt'), skiprows=[0, 1, 106390, 106391, 106392, 106393, 106394, 106395, 106396, 106397, 106398, 106399, 106400, 106401], delim_whitespace=True, names=score_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, len(runs)+1, figsize=(25, 8))
palette = itertools.cycle(seaborn.color_palette())

color = next(palette)
seaborn.regplot(data=static_scores, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[0], color=color)
ax[0].set_ylim(top=5000)
ax[0].set_xlim(xmax = 5000)

for i, run in enumerate(runs):
    color = next(palette)
    seaborn.regplot(data=run, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[i+1], color=color)

    ax[i+1].set_ylim(top=5000)
    ax[i+1].set_xlim(xmax = 5000)

plt.show()

In [None]:
# Plot only off diagonal elements


fig, ax = plt.subplots(1, 4, figsize=(32, 10))
palette = itertools.cycle(seaborn.color_palette())

off_diag_start = static_scores.loc[static_scores['Reference'] == 0.0000]
off_diag_start = off_diag_start.sort_values(by='Calculated', ignore_index=True)
seaborn.regplot(data=off_diag_start, x=off_diag_start.index, y=score_cols[3], fit_reg=False, ax=ax[0], color='gray')

for i, run in enumerate(runs):
    off_diag = run.loc[run['Reference'] == 0.0000]
    off_diag = off_diag.sort_values(by='Calculated', ignore_index=True)

    color = next(palette)
    seaborn.regplot(data=off_diag, x=off_diag.index, y=score_cols[3], fit_reg=False, ax=ax[i+1], color=color)

plt.show()


In [None]:
# VIOLIN Plot only off diagonal elements


fig, ax = plt.subplots(1, 1, figsize=(16, 8))
off_diag_merged = pd.concat([run.loc[run['Reference'] == 0.0000] for run in runs])
off_diag_merged = pd.concat([off_diag_merged, off_diag_start])
off_diag_merged = off_diag_merged.sort_values(by='Calculated')

seaborn.boxplot(data=off_diag_merged, x='FF', y=score_cols[3])

plt.show()

In [None]:
# Plot Diagonal Elements with a linear fit

fig, ax = plt.subplots(1, 4, figsize=(32,8))
palette = itertools.cycle(seaborn.color_palette())

diag = static_scores.loc[static_scores['Reference'] != 0.0000]
slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])

seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[0])
ax[0].legend()

for i, run in enumerate(runs):
    diag = run.loc[run['Reference'] != 0.0000]
    slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])
    
    seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[i+1])

    ax[i+1].legend()

In [None]:
%matplotlib inline
# Plot FCs

bonds = []
angles = []

for directory in directory_list:
    bonds.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_pso_end.fld'), skiprows=lambda x: x not in bond_rows, delim_whitespace=True, names=bond_cols).assign(FF=directory))
    angles.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_pso_end.fld'), skiprows=lambda x: x not in angle_rows, delim_whitespace=True, names=angle_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a PSO only - Rh Hyd Enamides')
ax[0].set_title('Bonds')
ax[1].set_title('Angles')

palette = itertools.cycle(seaborn.color_palette())

color = next(palette)
seaborn.regplot(data=bond_start, label='STATIC', x = bond_start.index, y="Force Constant", fit_reg=False, ax=ax[0], color=color)
seaborn.regplot(data=angle_start, label='STATIC', x = angle_start.index, y="Force Constant", fit_reg=False, ax=ax[1], color=color)

for i in range(len(directory_list)):
    color = next(palette)
    seaborn.regplot(data=bonds[i], label='FF', x = bonds[i].index, y="Force Constant", fit_reg=False, ax=ax[0], color=color)
    seaborn.regplot(data=angles[i], label='FF', x = angles[i].index, y="Force Constant", fit_reg=False, ax=ax[1], color=color)

ax[1].legend()
plt.show()

DE ONLY

In [None]:
base_directory = '/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/tight_spread/de_only'
directory_list = ['1']#, '2', '3']

In [None]:
%matplotlib inline
# DE ONLY
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE only - Rh Hyd Enamides')
ax[0].set_title('Score Diversity Throughout Parameterization')
ax[1].set_title('Loss, aka Best Score Throughout Parameterization')

ax[0].axhline(y=static_score, color='gray')
ax[1].axhline(y=static_score, color='gray')

for directory in directory_list:

    swarm_history_file = open(os.path.join(base_directory, directory, 'hybrid_opt_history.bin'), 'rb')
    swarm_history = pickle.load(swarm_history_file)
    swarm_history_file.close()
    num_iters = len(swarm_history['Y'])
    num_ffs = len(swarm_history['Y'][0])
    assert num_ffs == num_particles
    color = next(ax[0]._get_lines.prop_cycler)['color']
    Y_history = pd.DataFrame(np.array(swarm_history['Y']).reshape((num_iters, num_ffs)))
    ax[0].plot(Y_history.index, Y_history.values, '.', color=color)
    loss = Y_history.min(axis=1).cummin()
    loss.plot(kind='line', ax=ax[1], color=color, label='Final Score: '+str(loss.iloc[-1]))

ax[1].legend()
ax[0].set_ylim(top=10)
ax[0].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))
ax[1].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))

plt.show()

In [None]:
%matplotlib inline
# Plot Scores

score_cols = ["Label", "Weight", "Reference", "Calculated", "Score", "FF"]
runs = []
for directory in directory_list:
    runs.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_de_end.txt'), skiprows=[0, 1, 106390, 106391, 106392, 106393, 106394, 106395, 106396, 106397, 106398, 106399, 106400, 106401], delim_whitespace=True, names=score_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, len(runs)+1, figsize=(25, 8))
palette = itertools.cycle(seaborn.color_palette())

#TODO plot against starting point
color = next(palette)
seaborn.regplot(data=static_scores, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[0], color=color)
ax[0].set_ylim(top=5000)
ax[0].set_xlim(xmax = 5000)

for i, run in enumerate(runs):
    color = next(palette)
    seaborn.regplot(data=run, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[i+1], color=color)

    ax[i+1].set_ylim(top=5000)
    ax[i+1].set_xlim(xmax = 5000)

plt.show()

In [None]:
# Plot only off diagonal elements


fig, ax = plt.subplots(1, 4, figsize=(32, 10))
palette = itertools.cycle(seaborn.color_palette())

off_diag_start = static_scores.loc[static_scores['Reference'] == 0.0000]
off_diag_start = off_diag_start.sort_values(by='Calculated', ignore_index=True)
seaborn.regplot(data=off_diag_start, x=off_diag_start.index, y=score_cols[3], fit_reg=False, ax=ax[0], color='gray')

for i, run in enumerate(runs):
    off_diag = run.loc[run['Reference'] == 0.0000]
    off_diag = off_diag.sort_values(by='Calculated', ignore_index=True)

    color = next(palette)
    seaborn.regplot(data=off_diag, x=off_diag.index, y=score_cols[3], fit_reg=False, ax=ax[i+1], color=color)

plt.show()


In [None]:
# VIOLIN Plot only off diagonal elements


fig, ax = plt.subplots(1, 1, figsize=(16, 8))
off_diag_merged = pd.concat([run.loc[run['Reference'] == 0.0000] for run in runs])
off_diag_merged = pd.concat([off_diag_merged, off_diag_start])
off_diag_merged = off_diag_merged.sort_values(by='Calculated')

seaborn.boxplot(data=off_diag_merged, x='FF', y=score_cols[3])

plt.show()

In [None]:
# Plot Diagonal Elements with a linear fit

fig, ax = plt.subplots(1, 4, figsize=(32,8))
palette = itertools.cycle(seaborn.color_palette())

diag = static_scores.loc[static_scores['Reference'] != 0.0000]
slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])

seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[0])
ax[0].legend()

for i, run in enumerate(runs):
    diag = run.loc[run['Reference'] != 0.0000]
    slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])
    
    seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[i+1])

    ax[i+1].legend()

In [None]:
%matplotlib inline
# Plot FCs

bonds = []
angles = []

for directory in directory_list:
    bonds.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_de_end.fld'), skiprows=lambda x: x not in bond_rows, delim_whitespace=True, names=bond_cols).assign(FF=directory))
    angles.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_de_end.fld'), skiprows=lambda x: x not in angle_rows, delim_whitespace=True, names=angle_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE only - Rh Hyd Enamides')
ax[0].set_title('Bonds')
ax[1].set_title('Angles')

palette = itertools.cycle(seaborn.color_palette())

for i in range(len(directory_list)):
    color = next(palette)
    seaborn.regplot(data=bonds[i], x = bonds[i].index, y="Force Constant", fit_reg=False, ax=ax[0], color=color)
    seaborn.regplot(data=angles[i], x = angles[i].index, y="Force Constant", fit_reg=False, ax=ax[1], color=color)


plt.show()

HYBRID NO TAPER

In [None]:
base_directory = '/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/tight_spread/de_throughout'
directory_list = ['1', '2', '3']

In [None]:
%matplotlib inline
# HYBRID NO TAPER
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE-throughout HO - Rh Hyd Enamides')
ax[0].set_title('Score Diversity Throughout Parameterization')
ax[1].set_title('Loss, aka Best Score Throughout Parameterization')

ax[0].axhline(y=static_score, color='gray')
ax[1].axhline(y=static_score, color='gray')

for directory in directory_list:

    swarm_history_file = open(os.path.join(base_directory, directory, 'hybrid_opt_history.bin'), 'rb')
    swarm_history = pickle.load(swarm_history_file)
    swarm_history_file.close()
    num_iters = len(swarm_history['Y'])
    num_ffs = len(swarm_history['Y'][0])
    assert num_ffs == num_particles
    color = next(ax[0]._get_lines.prop_cycler)['color']
    Y_history = pd.DataFrame(np.array(swarm_history['Y']).reshape((num_iters, num_ffs)))
    ax[0].plot(Y_history.index, Y_history.values, '.', color=color)
    loss = Y_history.min(axis=1).cummin()
    loss.plot(kind='line', ax=ax[1], color=color, label='Final Score: '+str(loss.iloc[-1]))

ax[1].legend()

ax[0].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))
ax[1].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))

plt.show()

In [None]:
%matplotlib inline
# Plot Scores

score_cols = ["Label", "Weight", "Reference", "Calculated", "Score", "FF"]
runs = []
for directory in directory_list:
    runs.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.txt'), skiprows=[0, 1, 106390, 106391, 106392, 106393, 106394, 106395, 106396, 106397, 106398, 106399, 106400, 106401], delim_whitespace=True, names=score_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, len(runs)+1, figsize=(25, 8))
palette = itertools.cycle(seaborn.color_palette())

#TODO plot against starting point
color = next(palette)
seaborn.regplot(data=static_scores, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[0], color=color)
ax[0].set_ylim(top=5000)
ax[0].set_xlim(xmax = 5000)

for i, run in enumerate(runs):
    color = next(palette)
    seaborn.regplot(data=run, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[i+1], color=color)

    ax[i+1].set_ylim(top=5000)
    ax[i+1].set_xlim(xmax = 5000)

plt.show()

In [None]:
# Plot only off diagonal elements


fig, ax = plt.subplots(1, 4, figsize=(32, 10))
palette = itertools.cycle(seaborn.color_palette())

off_diag_start = static_scores.loc[static_scores['Reference'] == 0.0000]
off_diag_start = off_diag_start.sort_values(by='Calculated', ignore_index=True)
seaborn.regplot(data=off_diag_start, x=off_diag_start.index, y=score_cols[3], fit_reg=False, ax=ax[0], color='gray')

for i, run in enumerate(runs):
    off_diag = run.loc[run['Reference'] == 0.0000]
    off_diag = off_diag.sort_values(by='Calculated', ignore_index=True)

    color = next(palette)
    seaborn.regplot(data=off_diag, x=off_diag.index, y=score_cols[3], fit_reg=False, ax=ax[i+1], color=color)

plt.show()


In [None]:
# VIOLIN Plot only off diagonal elements


fig, ax = plt.subplots(1, 1, figsize=(16, 8))
off_diag_merged = pd.concat([run.loc[run['Reference'] == 0.0000] for run in runs])
off_diag_merged = pd.concat([off_diag_merged, off_diag_start])
off_diag_merged = off_diag_merged.sort_values(by='Calculated')

seaborn.boxplot(data=off_diag_merged, x='FF', y=score_cols[3])

plt.show()

In [None]:
# Plot Diagonal Elements with a linear fit

fig, ax = plt.subplots(1, 4, figsize=(32,8))
palette = itertools.cycle(seaborn.color_palette())

diag = static_scores.loc[static_scores['Reference'] != 0.0000]
slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])

seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[0])
ax[0].legend()

for i, run in enumerate(runs):
    diag = run.loc[run['Reference'] != 0.0000]
    slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])
    
    seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[i+1])

    ax[i+1].legend()

In [None]:
%matplotlib inline
# Plot FCs

bonds = []
angles = []

for directory in directory_list:
    bonds.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.fld'), skiprows=lambda x: x not in bond_rows, delim_whitespace=True, names=bond_cols).assign(FF=directory))
    angles.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.fld'), skiprows=lambda x: x not in angle_rows, delim_whitespace=True, names=angle_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE-throughout HO - Rh Hyd Enamides')
ax[0].set_title('Bonds')
ax[1].set_title('Angles')

palette = itertools.cycle(seaborn.color_palette())

for i in range(len(directory_list)):
    color = next(palette)
    seaborn.regplot(data=bonds[i], x = bonds[i].index, y="Force Constant", fit_reg=False, ax=ax[0], color=color)
    seaborn.regplot(data=angles[i], x = angles[i].index, y="Force Constant", fit_reg=False, ax=ax[1], color=color)


plt.show()

HYBRID TAPER

In [None]:
base_directory = '/home/mfarrugi/repos/q2mm/rh-hybrid/static_start/tight_spread/freq_de_tapered'
directory_list = ['1', '2', '3']

In [None]:
%matplotlib inline
# HYBRID NO TAPER
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE-freq-tapered HO - Rh Hyd Enamides')
ax[0].set_title('Score Diversity Throughout Parameterization')
ax[1].set_title('Loss, aka Best Score Throughout Parameterization')

ax[0].axhline(y=static_score, color='gray')
ax[1].axhline(y=static_score, color='gray')

for directory in directory_list:

    swarm_history_file = open(os.path.join(base_directory, directory, 'hybrid_opt_history.bin'), 'rb')
    swarm_history = pickle.load(swarm_history_file)
    swarm_history_file.close()
    num_iters = len(swarm_history['Y'])
    num_ffs = len(swarm_history['Y'][0])
    assert num_ffs == num_particles
    color = next(ax[0]._get_lines.prop_cycler)['color']
    Y_history = pd.DataFrame(np.array(swarm_history['Y']).reshape((num_iters, num_ffs)))
    ax[0].plot(Y_history.index, Y_history.values, '.', color=color)
    loss = Y_history.min(axis=1).cummin()
    loss.plot(kind='line', ax=ax[1], color=color, label='Final Score: '+str(loss.iloc[-1]))

ax[1].legend()

ax[0].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))
ax[1].xaxis.set_ticks(np.arange(1, ax[0].get_xlim()[1], cycle_iter_length))

plt.show()

In [None]:
%matplotlib inline
# Plot Scores

score_cols = ["Label", "Weight", "Reference", "Calculated", "Score", "FF"]
runs = []
for directory in directory_list:
    runs.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.txt'), skiprows=[0, 1, 106390, 106391, 106392, 106393, 106394, 106395, 106396, 106397, 106398, 106399, 106400, 106401], delim_whitespace=True, names=score_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, len(runs)+1, figsize=(25, 8))
palette = itertools.cycle(seaborn.color_palette())

#TODO plot against starting point
color = next(palette)
seaborn.regplot(data=static_scores, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[0], color=color)
ax[0].set_ylim(top=5000)
ax[0].set_xlim(xmax = 5000)

for i, run in enumerate(runs):
    color = next(palette)
    seaborn.regplot(data=run, x=score_cols[2], y=score_cols[3], fit_reg=True, ax=ax[i+1], color=color)

    ax[i+1].set_ylim(top=5000)
    ax[i+1].set_xlim(xmax = 5000)

plt.show()

In [None]:
# Plot only off diagonal elements


fig, ax = plt.subplots(1, 4, figsize=(32, 10))
palette = itertools.cycle(seaborn.color_palette())

off_diag_start = static_scores.loc[static_scores['Reference'] == 0.0000]
off_diag_start = off_diag_start.sort_values(by='Calculated', ignore_index=True)
seaborn.regplot(data=off_diag_start, x=off_diag_start.index, y=score_cols[3], fit_reg=False, ax=ax[0], color='gray')

for i, run in enumerate(runs):
    off_diag = run.loc[run['Reference'] == 0.0000]
    off_diag = off_diag.sort_values(by='Calculated', ignore_index=True)

    color = next(palette)
    seaborn.regplot(data=off_diag, x=off_diag.index, y=score_cols[3], fit_reg=False, ax=ax[i+1], color=color)

plt.show()


In [None]:
# VIOLIN Plot only off diagonal elements


fig, ax = plt.subplots(1, 1, figsize=(16, 8))
off_diag_merged = pd.concat([run.loc[run['Reference'] == 0.0000] for run in runs])
off_diag_merged = pd.concat([off_diag_merged, off_diag_start])
off_diag_merged = off_diag_merged.sort_values(by='Calculated')

seaborn.boxplot(data=off_diag_merged, x='FF', y=score_cols[3])

plt.show()

In [None]:
# Plot Diagonal Elements with a linear fit

fig, ax = plt.subplots(1, 4, figsize=(32,8))
palette = itertools.cycle(seaborn.color_palette())

diag = static_scores.loc[static_scores['Reference'] != 0.0000]
slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])

seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[0])
ax[0].legend()

for i, run in enumerate(runs):
    diag = run.loc[run['Reference'] != 0.0000]
    slope, intercept, r2, pv, se = stats.linregress(diag['Reference'], diag['Calculated'])
    
    seaborn.regplot(data=diag, x='Calculated', y='Reference', color=next(palette), line_kws={'label':'$y=%3.7s*x+%3.7s   r2:%3.7s$'%(slope, intercept, r2)}, ax=ax[i+1])

    ax[i+1].legend()

In [None]:
%matplotlib inline
# Plot FCs

bonds = []
angles = []

for directory in directory_list:
    bonds.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.fld'), skiprows=lambda x: x not in bond_rows, delim_whitespace=True, names=bond_cols).assign(FF=directory))
    angles.append(pd.read_csv(os.path.join(base_directory, directory, 'rh_qstatic_ho_end.fld'), skiprows=lambda x: x not in angle_rows, delim_whitespace=True, names=angle_cols).assign(FF=directory))


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(24, 8))
fig.suptitle('STATIC FC Estimate under a DE-freq-tapered HO - Rh Hyd Enamides')
ax[0].set_title('Bonds')
ax[1].set_title('Angles')

palette = itertools.cycle(seaborn.color_palette())

for i in range(len(directory_list)):
    color = next(palette)
    seaborn.regplot(data=bonds[i], x = bonds[i].index, y="Force Constant", fit_reg=False, ax=ax[0], color=color)
    seaborn.regplot(data=angles[i], x = angles[i].index, y="Force Constant", fit_reg=False, ax=ax[1], color=color)


plt.show()