This is the ipython notebook used to generate the figure showing distribution of TanimotoCombo and TFD scores for a selection of molecule pairs. 

Created by Jordan Ehrman for the OpenForceField Force Field Comparison Project

In [1]:
# importing
import os
import pandas as pd
from itertools import permutations

In [3]:
# Taking pairwise combinations of force fields
ffs = ['GAFF','GAFF2','MMFF94','MMFF94S','SMIRNOFF']
perms = permutations(ffs,2)
perms = list(perms)

In [5]:
# Creating long lists of TFD and TanimotoCombo scores (stored in csv's)
bigdf = pd.read_csv('alldatai.csv')
gigantictup = []
for j in [1,2,3]:
    bigdf = pd.read_csv('alldata' + str(j) + '.csv')
    bigtemptuplist = []
    for i in perms:
        try:
            TFDlist = bigdf['TFD ' + i[0] + ' ' + i[1]]
            TANIlist = bigdf['TANI ' + i[0] + ' ' + i[1]]
            for k in range(len(TFDlist)):
                temptup = []
                temptup.append(TFDlist[k])
                temptup.append(TANIlist[k])
                bigtemptuplist.append(temptup)
        except:
            continue
        print(i)
    gigantictup.extend(bigtemptuplist)
    print(j)

In [8]:
# Taking a pseudorandom sample of scores (believed to be representative of entire set)
smallertup = []
for i in range(len(gigantictup)):
    if i % 500 == 0:
        smallertup.append(gigantictup[i])

In [10]:
#breaking points up based on what scores they would get, so that I can get different colors in the graph
noflaglist = []
flaglist = []
bufferlist = []
for i in range(len(smallertup)):
    if (smallertup[i][0] < 0.18 and smallertup[i][1] > 0.25 and smallertup[i][0] < 0.6):
        noflaglist.append(smallertup[i])
    elif (smallertup[i][0] > 0.2 and smallertup[i][1] > 0.50 and smallertup[i][0] < 0.6):
        flaglist.append(smallertup[i])
    elif (smallertup[i][0] > 0.18 and smallertup[i][0] < 0.2 and smallertup[i][1] > 0.25):
        bufferlist.append(smallertup[i])
    elif (smallertup[i][0] > 0.18 and smallertup[i][1] > 0.25 and smallertup[i][1] < 0.50 and smallertup[i][0] < 0.6):
        bufferlist.append(smallertup[i])

In [11]:
#importing for plotting
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as patches
from matplotlib.collections import PatchCollection
from matplotlib.patches import FancyBboxPatch
#setting parameters
data= (noflaglist, flaglist, bufferlist)
colors = ("blue","red","black")
groups = ("Similarity Flagged","Difference Flagged","Not Flagged")
plt.rcParams.update({'font.size': 15})
# Creating plot
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, facecolor="1.0")

for data, color, group in zip(data, colors, groups):
    x, y = zip(*data)
    ax.scatter(x, y, alpha=1, c=color, edgecolors='none', s=2, label=group)

lgnd = plt.legend(loc="upper right", scatterpoints=1, fontsize=13)
lgnd.legendHandles[0]._sizes = [60]
lgnd.legendHandles[1]._sizes = [60]
lgnd.legendHandles[2]._sizes = [60]
ax.set_xlabel('Torsion Fingerprint Deviation (TFD)')
ax.set_ylabel('TanimotoCombo')
p_fancy=FancyBboxPatch((0.2,0.5),0.4,1.5,boxstyle="square,pad=0",fc='red', ec='k',alpha=0.1, zorder=1)
ax.add_patch(p_fancy)
p_fancy=FancyBboxPatch((0.0,0.25),0.18,1.75,boxstyle="square,pad=0",fc='blue', ec='k',alpha=0.1, zorder=1)
ax.add_patch(p_fancy)
plt.margins(0.00)
fig.set_dpi(100)
fig.set_size_inches(12, 6, forward=True)
plt.margins(0.00)
plt.savefig('dotdist.svg')
fig.show()