In [3]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import random
import pandas as pd

def parallel_coordinates(data_sets, styles):
    new_feature_names = ['Singlet-triplet\ngap', 'Oscillator\nstrength', 
                     'Abs. diff.\nof VEE',
                     'Activation\nenergy', 'Reaction\nenergy']
    ylim_list = [(0.0198, 1.69), (-3.05, -0.000343), (0.642, 3.76), (-110, -78.5), (-14.2, 5.51)]
    dims = len(data_sets[0])
    x = range(dims)
    fig, axes = plt.subplots(1, dims-1, sharey=False)
    colors_first, colors_second = styles
    # Calculate the limits on the data
    min_max_range = list()
    for mn, mx in ylim_list:
        r = float(mx - mn)
        min_max_range.append((mn, mx, r))
    # Normalize the data sets
    norm_data_sets = list()
    for dimension, ds in enumerate(data_sets):
        nds = [(value - min_max_range[dimension][0]) / 
               min_max_range[dimension][2] 
               for dimension, value in enumerate(ds)]
        norm_data_sets.append(nds)
    data_sets = norm_data_sets
    # Plot the datasets on all the subplots
    for i, ax in enumerate(axes):
        for dsi, d in enumerate(data_sets):
            if i in [0, 1]:
                ax.plot([x[i], x[i+1]], [d[i], d[i+1]], colors_first[dsi], linewidth=0.5)
            elif i == 2:
                ax.plot([x[i], x[i+1]], [d[i], None], "white", linewidth=0.5)
            else:
                ax.plot([x[i], x[i+1]], [d[i], d[i+1]], colors_second[dsi], linewidth=0.5)
        ax.set_xlim([x[i], x[i+1]])
        ax.set_xticklabels([new_feature_names[i]], fontsize=13)

#     Plot the reference line
    ref_line = [0.571, -0.171, 1.62, -87.0, -5.46]
    norm_ref_line = [(value - min_max_range[dimension][0]) / 
                     min_max_range[dimension][2] 
                     for dimension, value in enumerate(ref_line)]
    for i, ax in enumerate(axes):
        if i == 2:
            ax.plot([x[i], x[i+1]], [norm_ref_line[i], None], 'k--')
        else:
            ax.plot([x[i], x[i+1]], [norm_ref_line[i], norm_ref_line[i+1]], 'k--')

    # Set the x axis ticks 
    for dimension, (axx, xx) in enumerate(zip(axes, x[:-1])):
        axx.xaxis.set_major_locator(ticker.FixedLocator([xx]))
        ticks = len(axx.get_yticklabels())
        labels = list()
        step = min_max_range[dimension][2] / (ticks - 1)
        mn = min_max_range[dimension][0]
        for i in range(ticks):
            v = mn + i * step
            labels.append('%4.2f' % v)
        axx.set_ylim(0, 1)
        axx.set_yticks([0, norm_ref_line[dimension], 1])
        if dimension in [0, 2]:
            axx.set_yticklabels([ylim_list[dimension][0], ref_line[dimension], ylim_list[dimension][1]], fontsize=11)
        else:
            axx.set_yticklabels([-ylim_list[dimension][0], -ref_line[dimension], -ylim_list[dimension][1]], fontsize=11)
        axx.spines['top'].set_visible(False)
        axx.spines['bottom'].set_visible(False)
        

    # Move the final axis' ticks to the right-hand side
    axx = plt.twinx(axes[-1])
    dimension += 1
    axx.xaxis.set_major_locator(ticker.FixedLocator(x[-1]))
    ticks = len(axx.get_yticklabels())
    step = min_max_range[dimension][2] / (ticks - 1)
    mn = min_max_range[dimension][0]
    axx.set_ylim(*ylim_list[-1])
    axx.set_yticks([ylim_list[-1][0], ref_line[-1], ylim_list[-1][1]])
    axx.set_yticklabels([ylim_list[-1][0], ref_line[-1], ylim_list[-1][1]], fontsize=11)
    axx.spines['top'].set_visible(False)
    axx.spines['bottom'].set_visible(False)
    # Stack the subplots 
    plt.subplots_adjust(wspace=0)

    return plt




In [9]:
overall_benchmark_dict = pd.read_pickle("data_for_parallel_coordinates.pkl")
del overall_benchmark_dict["original data"]
class renamer():
    def __init__(self):
        self.d = dict()

    def __call__(self, x):
        if x not in self.d:
            self.d[x] = 0
            return x
        else:
            self.d[x] += 1
            return "%s_%d" % (x, self.d[x])

overall_benchmark_dict["janus_scaler"].rename(columns=renamer())

Unnamed: 0,singlet-triplet value,oscillator strength,abs_diff_vee,normalized singlet-triplet value,normalized oscillator strength,normalized abs_diff_vee,fitness,fold,activation_energy,reaction_energy,...,fitness_4,similarity_Fexofenadine,tpsa,logP,fold_4,fitness_5,similarity_Ranolazine,tpsa_1,logP_1,fold_5
0,1.514522,2.912081,3.052312,1.291722,18.966984,0.350975,12.49286,11,83.540689,-11.449214,...,22.24432,0.017258,491.87,-13.5736,14,7.748223,0.082286,219.13,5.82377,10
1,1.586015,2.922267,3.19913,1.472261,19.035347,0.492418,12.06143,11,84.059621,-7.671786,...,22.091209,0.027749,490.65,-14.11,4,7.658366,0.099031,251.14,4.79486,4
2,1.479235,2.709331,3.051633,1.202613,17.606248,0.350321,12.018441,14,78.762715,-13.05373,...,21.882208,0.026069,481.76,-13.94151,4,7.592647,0.083524,219.13,6.44867,10
3,1.553003,2.806917,3.129872,1.388896,18.26119,0.425695,11.958397,14,83.305391,-8.959093,...,21.877273,0.019874,490.65,-14.1525,4,7.580663,0.083524,219.13,5.15487,10
4,1.558988,3.045321,3.124974,1.40401,19.861218,0.420977,11.778713,11,87.581216,-9.349952,...,21.866152,0.028481,481.76,-13.94151,4,7.547494,0.086009,222.29,5.71257,10
5,1.371855,2.364789,3.093827,0.931452,15.293887,0.39097,11.62536,11,83.138196,-10.139656,...,21.80255,0.020725,484.75,-13.58901,4,7.527654,0.083524,219.13,4.88717,10
6,1.442467,2.146706,2.948503,1.109765,13.83024,0.250966,11.189391,14,82.209187,-13.1559,...,21.737111,0.023541,484.75,-13.54811,4,7.517142,0.088506,215.22,4.63257,10
7,1.544937,2.482707,3.107737,1.368527,16.085282,0.404371,11.099008,14,84.952448,-8.68512,...,21.732939,0.023541,484.75,-13.00452,4,7.447678,0.077951,219.13,6.01977,10
8,1.445693,2.667536,3.152895,1.117912,17.325745,0.447876,11.088439,11,83.042316,-9.611592,...,21.717852,0.02145,484.75,-13.08551,4,7.421831,0.077556,219.13,5.23697,10
9,1.123413,1.288466,2.406431,0.304074,8.07024,-0.271261,11.069337,2,95.857779,0.417083,...,21.713487,0.02145,484.75,-13.06621,4,7.420627,0.071183,239.94,4.56227,10


In [None]:
name_dict = {"original data": "original data", "janus_scaler":"WS", "janus_hybrid":"NMD-WS",
                  "janus_uncertainty":"PIO", "janus_utopian":"NMD"}
for k, method in enumerate(["janus_scaler","janus_utopian", "janus_hybrid", "janus_uncertainty"]):

    data = combined_data_dict[method][["singlet-triplet value", "oscillator strength", "abs_diff_vee", "activation_energy", "reaction_energy"]].values
    data[:,1] = data[:,1]*(-1)
    data[:,3] = data[:,3]*(-1)

    colors_first = ["0"] * 50
    subdata = combined_data_dict[method]
    subdata_oe_pass = subdata[subdata["singlet-triplet value top%"] <= 15]
    subdata_oe_pass = subdata_oe_pass[subdata_oe_pass["oscillator strength top%"] <= 15]
    subdata_oe_pass = subdata_oe_pass[subdata_oe_pass["abs_diff_vee top%"] <= 15]
    subdata_oe_fail = subdata.drop(subdata_oe_pass.index)
    for idx in subdata_oe_pass.index:
        colors_first[idx] = "#F39C12"
    for idx in subdata_oe_fail.index:
        colors_first[idx] = "#2980B9"

    colors_second = ["0"] * 50
    subdata_rea_pass = subdata[subdata["activation_energy top%"] <= 15]
    subdata_rea_pass = subdata_rea_pass[subdata_rea_pass["reaction_energy top%"] <= 15]
    subdata_rea_fail = subdata.drop(subdata_rea_pass.index)
    for idx in subdata_rea_pass.index:
        colors_second[idx] = "#F39C12"
    for idx in subdata_rea_fail.index:
        colors_second[idx] = "#2980B9"

    pc = parallel_coordinates(data, styles=[colors_first, colors_second])
    new_feature_names = ['Singlet-triplet\ngap', 'Oscillator\nstrength', 
                         'Abs. diff. of vertical\nexcitation energy',
                         'Activation\nenergy', 'Reaction\nenergy']
    pc.xticks([0, 1, 2, 3, 4], new_feature_names)
    print(name_dict[method])
    if k == 0:
        plt.legend(["Successful", "Failed"])
    pc.savefig(f"{k+1}{name_dict[method]}.svg", format="svg", bbox_inches='tight')
    pc.show()