In [8]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import KFold, ShuffleSplit
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge, Lasso, LinearRegression, BayesianRidge
import warnings
warnings.filterwarnings("ignore")
from mmltoolkit.CV_tools import *
from mmltoolkit.featurizations import * 
from mmltoolkit.fingerprints import * 

#Read the data
data = pd.read_excel('../datasets/combined_data.xlsx', skipfooter=46)


#Add some new columns
data['Mols'] = data['SMILES'].apply(Chem.MolFromSmiles)


#important - add hydrogens!!
data['Mols'] = data['Mols'].apply(Chem.AddHs)


#X_Estate = truncated_Estate_featurizer(list(data['Mols']))


num_mols = len(data)



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
data

Unnamed: 0,Name,short name,note,group,density g/cm3,gas-phase formation enthalpy,sublimation enthalpy,"heat of explosion (kJ/g , but note sensitivity data did not report units, and values are different?)",detonation velocity (km/s),detonation pressure (GPa),Gurney energy,h50 (obs),Reference,SMILES,Mols
0,"4-picrylamino-1,2,3-triazole",,,Unst. (N2 loss),1.719,382.3,185.0,5.05,7.203,22.380,4794.0,103.0,"Mathieu, 2017",n1n[nH]cc1Nc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O,<rdkit.Chem.rdchem.Mol object at 0x1115bdbc0>
1,"1-(3',5'-dinitrophenyl)-4-nitro-1,2,3-triazole",,,Unst. (N2 loss),1.701,367.0,162.0,5.27,7.131,21.790,4623.0,56.0,"Mathieu, 2017",n1c(cn(n1)c1cc(cc(c1)N(=O)=O)N(=O)=O)N(=O)=O,<rdkit.Chem.rdchem.Mol object at 0x1115bd300>
2,"1-(3',4'-dinitrophenyl)-4-nitro-1,2,3-triazole",,,Unst. (N2 loss),1.701,403.8,162.0,5.40,7.175,22.060,4739.0,51.0,"Mathieu, 2017",n1c(cn(n1)c1ccc(c(c1)N(=O)=O)N(=O)=O)N(=O)=O,<rdkit.Chem.rdchem.Mol object at 0x112361760>
3,"4,6-dinitro-1-picryl-benzotriazole",,,Unst. (N2 loss),1.783,478.4,240.0,5.46,7.415,24.250,5016.0,40.0,"Mathieu, 2017",c1(c2c(cc(c1)N(=O)=O)n(nn2)c1c(cc(cc1N(=O)=O)N...,<rdkit.Chem.rdchem.Mol object at 0x112361990>
4,"1-Picryl-5,7-dinitrobenzotriazole",BTX,,Unst. (N2 loss),1.783,455.3,240.0,5.41,7.396,24.130,4966.0,40.0,"Mathieu, 2017",c1c(cc(c2c1nnn2c1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)...,<rdkit.Chem.rdchem.Mol object at 0x112361e40>
5,"1,5-dipicrylbenzo-[1,2-d:4,5-d']-bistriazole",,,Unst. (N2 loss),1.768,894.1,319.0,5.27,7.139,22.360,4594.0,40.0,"Mathieu, 2017",c1c2c(cc3c1n(nn3)c1c(cc(cc1N(=O)=O)N(=O)=O)N(=...,<rdkit.Chem.rdchem.Mol object at 0x112361ad0>
6,"1,7-dipicrylbenzo-[1,2-d:4,5-d']-bistriazole",,,Unst. (N2 loss),1.768,891.0,319.0,5.27,7.137,22.350,4589.0,38.0,"Mathieu, 2017",c1c2c(cc3c1n(nn3)c1c(cc(cc1N(=O)=O)N(=O)=O)N(=...,<rdkit.Chem.rdchem.Mol object at 0x112361850>
7,5-picrylaminotetrazole,,,Unst. (tetrazole),1.779,465.1,190.0,5.22,7.617,25.560,5454.0,36.0,"Mathieu, 2017",n1nn[nH]c1Nc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O,<rdkit.Chem.rdchem.Mol object at 0x112361710>
8,"5,6-dinitro-1-picryl-benzotriazole",,,Unst. (N2 loss),1.783,494.0,240.0,5.50,7.428,24.330,5051.0,35.0,"Mathieu, 2017",c12cc(c(cc1nnn2c1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)...,<rdkit.Chem.rdchem.Mol object at 0x1123615d0>
9,"1-picryl-4-picrylamino-1,2,3-triazole",,,Unst. (N2 loss),1.759,532.1,294.0,5.40,7.400,23.950,5030.0,35.0,"Mathieu, 2017",n1c(cn(n1)c1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O)Nc...,<rdkit.Chem.rdchem.Mol object at 0x112361350>


In [11]:
from mmltoolkit.descriptors import *

bond_types, X_LBoB = literal_bag_of_bonds(list(data['Mols'])) 

num_atoms = []
for mol in data['Mols']:
    mol = Chem.AddHs(mol)
    num_atoms += [mol.GetNumAtoms()]
    
max_atoms = int(max(num_atoms))

X_Cmat_as_vec = np.zeros((num_mols, (max_atoms**2-max_atoms)//2 + max_atoms))
X_Cmat_eigs = np.zeros((num_mols, max_atoms))
X_Cmat_unsorted_eigs = np.zeros((num_mols, max_atoms))

X_summedBoB = []
filename_list = []

for i, refcode in enumerate(data['Name']):
    if (i >= 307):
        filename = '../HM_all_xyz_files/'+refcode+'.xyz'
    else:
        filename = '../sensitivity_xyz/'+str(i)+'.xyz'
    
    this_Cmat_eigs, this_Cmat_as_vec = coulombmat_and_eigenvalues_as_vec(filename, max_atoms )
    #this_Cmat_unsorted_eigs, this_Cmat_as_vec = coulombmat_and_eigenvalues_as_vec(filename, max_atoms, sort=False)

    #summed_BoB_feature_names, summedBoB = summed_bag_of_bonds(filename)
    #X_summedBoB += [summedBoB]

    filename_list += [filename]
    
    X_Cmat_eigs[i,:] = this_Cmat_eigs
    #X_Cmat_unsorted_eigs[i,:] = this_Cmat_eigs
    # X_Cmat_as_vec[i,:] = this_Cmat_as_vec

#X_summedBoB = np.array(X_summedBoB)

#BoB_feature_list, X_BoB = bag_of_bonds(filename_list, verbose=False)


#data['Oxygen Balance_100'] = data['Mols'].apply(oxygen_balance_100)
#data['modified OB'] = data['Mols'].apply(modified_oxy_balance)
#data['OB atom counts'] = data['Mols'].apply(return_atom_nums_modified_OB)
#data['combined_nums'] =  data['Mols'].apply(return_combined_nums)


#X_OB100 = np.array(list(data['Oxygen Balance_100'])).reshape(-1,1)     
#X_OB1600 = np.array(list(data['Oxygen Balance_1600'])).reshape(-1,1)     
#X_OBmod = np.array(list(data['modified OB'])).reshape(-1,1)   
#X_OB_atom_counts = np.array(list(data['OB atom counts']))
#X_combined = np.array(list(data['combined_nums']))

#X_Estate_combined = np.concatenate((X_Estate, X_combined), axis=1)
#X_Estate_combined_Cmat_eigs = np.concatenate((X_Estate_combined, X_Cmat_eigs), axis=1)
#X_Estate_combined_lit_BoB = Estate_CDS_LBoB_featurizer(list(data['Mols']))
#X_CustDesrip_lit_BoB = np.concatenate(( X_combined, X_LBoB), axis=1)

                             


In [13]:
data.columns

Index(['Name', 'short name', 'note', 'group', 'density g/cm3',
       'gas-phase formation enthalpy', 'sublimation enthalpy',
       'heat of explosion (kJ/g , but note sensitivity data did not report units, and values are different?) ',
       'detonation velocity (km/s)', 'detonation pressure (GPa)',
       'Gurney energy', 'h50 (obs)', 'Reference', 'SMILES', 'Mols'],
      dtype='object')

In [22]:
Y1.shape

(416, 1)

In [21]:
Y1 = np.array(data['detonation velocity (km/s)']).reshape(-1,1)
Y2 = np.array(data['detonation pressure (GPa)']).reshape(-1,1)
Y3 = np.array(data['density g/cm3']).reshape(-1,1)

In [20]:
X_Cmat_eigs.shape

(416, 87)

In [23]:
np.savetxt('combined_dataset_Coulomb_matrix_eigenvalues_416_mols.csv', np.concatenate([X_Cmat_eigs,Y1, Y2, Y3],axis=1), delimiter=',')

In [None]:
featurization_dict = {
                 "Estate": X_Estate,
                 #"Oxygen balance$_{100}$": X_OB100, 
                 #"Oxygen balance$_{1600}$": X_OB1600, 
                 #"Oxygen balance atom counts": X_OB_atom_counts,
                 #"CDS": X_combined,
                 #"SoB" : X_LBoB,
                 #'Estate+CDS':   X_Estate_combined,
                 #"Coulomb matrices as vec" :   X_Cmat_as_vec,
                 #"CM eigs": X_Cmat_eigs,
                 #"Bag of Bonds": X_BoB,
                 #"Summed Bag of Bonds (sBoB)": X_summedBoB, 
                 #"\\footnotesize{Estate+CDS+SoB}":X_Estate_combined_lit_BoB,
                 #"C.D.S + LBoB": X_CustDesrip_lit_BoB,
                 #"LBoB + OB100": np.concatenate(( X_LBoB, X_OB100), axis=1)
                }

targets = [
 #'Density (g/cm3)',
 #'Delta Hf solid (kj/mol)',
 'Explosive energy (kj/cc)',
 #'Shock velocity (km/s)',
 #'Particle velocity (km/s)',
 #'Speed of sound (km/s)',
 #'Pressure (Gpa)',
 #'T(K)',
 #'TNT Equiv (per cc)'
  ]


In [24]:
y = np.array(list(data['Explosive energy (kj/cc)']))


In [17]:

nested_grid_search_CV(X_LBoB, y, Ridge(), {"alpha": np.logspace(-6, 6, 150)}, name='',  verbose=True, n_jobs=-1)

doing outer fold 1 of 20
(87,)
best params:  {'random_state': None, 'alpha': 17.714224633510817, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'solver': 'auto', 'tol': 0.001, 'max_iter': None}
doing outer fold 2 of 20
(87,)
best params:  {'random_state': None, 'alpha': 21.323527981697591, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'solver': 'auto', 'tol': 0.001, 'max_iter': None}
doing outer fold 3 of 20
(87,)
best params:  {'random_state': None, 'alpha': 21.323527981697591, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'solver': 'auto', 'tol': 0.001, 'max_iter': None}
doing outer fold 4 of 20
(87,)
best params:  {'random_state': None, 'alpha': 25.668233015746946, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'solver': 'auto', 'tol': 0.001, 'max_iter': None}
doing outer fold 5 of 20
(87,)
best params:  {'random_state': None, 'alpha': 14.715846019280558, 'copy_X': True, 'fit_intercept': True, 'normalize': False, 'solver': 'auto', 't

{'MAE': 0.3600940333069545,
 'MAE_std': 0.061267474783225237,
 'MAE_train': 0.3110925613307477,
 'MAPE': 10.821934462726896,
 'R2': 0.702264509845824,
 'RMSE': 0.4689342754460305,
 'rP': 0.75214479447882265,
 'rP_train': 0.80715726161527568}

In [None]:
nested_grid_search_CV(X_LBoB, y, KernelRidge(), {"alpha": np.logspace(-15, 2, 300), "gamma": np.logspace(-15, -2, 100), "kernel" : ['rbf']}, name='', 
                       outer_cv=ShuffleSplit(n_splits=20, test_size=0.2), verbose=True, n_jobs=-1)

doing outer fold 1 of 20
best params:  {'alpha': 0.0009923286228832545, 'coef0': 1, 'degree': 3, 'gamma': 0.00019630406500402764, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 2 of 20
best params:  {'alpha': 0.0442063797202853, 'coef0': 1, 'degree': 3, 'gamma': 0.0008902150854450393, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 3 of 20
best params:  {'alpha': 1.2030053494233242e-14, 'coef0': 1, 'degree': 3, 'gamma': 1e-15, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 4 of 20
best params:  {'alpha': 4.221026320156903e-15, 'coef0': 1, 'degree': 3, 'gamma': 4.750810162102813e-12, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 5 of 20
best params:  {'alpha': 0.03402299765678064, 'coef0': 1, 'degree': 3, 'gamma': 0.000657933224657571, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 6 of 20
best params:  {'alpha': 0.009187782428863322, 'coef0': 1, 'degree': 3, 'gamma': 0.0012045035402587837, 'kernel': 'rbf', 'kernel_params': None}
doing o

In [18]:
from mmltoolkit.test_everything import * 
from sklearn.model_selection import ShuffleSplit 

(results, best) = test_everything(data, featurization_dict, targets, verbose=True, normalize=True )

## started 12:55 pm

running target Explosive energy (kj/cc)
    testing featurization Estate
doing outer fold 1 of 20
best params:  {'alpha': 0.1259895277795026, 'coef0': 1, 'degree': 3, 'gamma': 0.000657933224657571, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 2 of 20
best params:  {'alpha': 0.08506793416364136, 'coef0': 1, 'degree': 3, 'gamma': 0.0012045035402587837, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 3 of 20
best params:  {'alpha': 0.2126959386668683, 'coef0': 1, 'degree': 3, 'gamma': 0.0008902150854450393, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 4 of 20
best params:  {'alpha': 0.029848079169243213, 'coef0': 1, 'degree': 3, 'gamma': 0.005462277217684359, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 5 of 20
best params:  {'alpha': 0.1259895277795026, 'coef0': 1, 'degree': 3, 'gamma': 0.0012045035402587837, 'kernel': 'rbf', 'kernel_params': None}
doing outer fold 6 of 20
best params:  {'alpha': 4.221026320156903e-15, 'coef0': 1, 'degree': 3,

In [19]:
results

{'Explosive energy (kj/cc)': {'Estate': {'KRR': {'MAE': 0.6131341189081234,
    'MAE_std': 0.08252668296901972,
    'MAE_train': 0.48573988141301544,
    'MAPE': 21.51181007946688,
    'R2': 0.29733084993409253,
    'RMSE': 0.8006401004855188,
    'rP': 0.4293485513989098,
    'rP_train': 0.5237574495145655},
   'mean': {'MAE': 0.6618840909090908,
    'MAE_std': 0.1041197516691472,
    'MAE_train': 0.6854746730083235,
    'MAPE': 23.783918880880545,
    'R2': -0.05429922814186698,
    'RMSE': 0.8737481301849408,
    'rP': 3.2733838238459338e-49,
    'rP_train': 1.2234623058579694e-48}}}}

In [None]:
from pprint import PrettyPrinter

pp = PrettyPrinter()

pp.pprint(results)


In [None]:
import pickle
pickle.dump( results, open( "test_all_results3.pkl", "wb" ) )
pickle.dump( best, open( "test_all_best3.pkl", "wb" ) )

In [2]:
import pickle

results = pickle.load(open( "test_all_results.pkl", "rb" ))
best = pickle.load( open( "test_all_best.pkl", "rb" ) )

In [9]:
best

{'Delta Hf solid (kj/mol)': ['\\footnotesize{Estate+CDS+SoB}', 'KRR'],
 'Density (g/cm3)': ['\\footnotesize{Estate+CDS+SoB}', 'Ridge'],
 'Explosive energy (kj/cc)': ['\\footnotesize{Estate+CDS+SoB}', 'KRR'],
 'Particle velocity (km/s)': ['SoB', 'Ridge'],
 'Pressure (Gpa)': ['SoB', 'KRR'],
 'Shock velocity (km/s)': ['SoB', 'KRR'],
 'Speed of sound (km/s)': ['Bag of Bonds', 'Ridge'],
 'T(K)': ['SoB', 'KRR'],
 'TNT Equiv (per cc)': ['SoB', 'Ridge']}

In [14]:
featurization_dictOB = {
                 "OB$_{1600}$": X_OB1600
}


from mmltoolkit.mmltoolkit.test_everything import * 
(results, best) = test_everything(data, featurization_dictOB, targets, cv=KFold(n_splits=5,shuffle=True), verbose=True )

running target Density (g/cm3)
    doing hyperparameter search for featurization OB$_{1600}$
running target Delta Hf solid (kj/mol)
    doing hyperparameter search for featurization OB$_{1600}$
running target Explosive energy (kj/cc)
    doing hyperparameter search for featurization OB$_{1600}$
running target Shock velocity (km/s)
    doing hyperparameter search for featurization OB$_{1600}$
running target Particle velocity (km/s)
    doing hyperparameter search for featurization OB$_{1600}$
running target Speed of sound (km/s)
    doing hyperparameter search for featurization OB$_{1600}$
running target Pressure (Gpa)
    doing hyperparameter search for featurization OB$_{1600}$
running target T(K)
    doing hyperparameter search for featurization OB$_{1600}$
running target TNT Equiv (per cc)
    doing hyperparameter search for featurization OB$_{1600}$


In [None]:
# print table of from results dictionary 

# With standard deviation values 

In [8]:
 

target_short_names = {
 'Density (g/cm3)':'\\footnotesize{$\\rho ,\\frac{\\hbox{g}}{\\hbox{cc}}$ }',
 'Delta Hf solid (kj/mol)': '\\footnotesize{$\Delta H_f^{\\ff{s}} ,\\frac{\\hbox{kJ}}{\\hbox{mol}}$ }',
 'Explosive energy (kj/cc)': '\\footnotesize{$E_{\\ff{e}} ,\\frac{\\hbox{kJ}}{\\hbox{cc}}$ }',
 'Shock velocity (km/s)': '\\footnotesize{$V_{\\ff{s}} ,\\frac{\\hbox{km}}{\\hbox{s}}$ }',
 'Particle velocity (km/s)': '\\footnotesize{$V_{\\ff{p}},\\frac{\\hbox{km}}{\\hbox{s}}$ }',
 'Speed of sound (km/s)': '\\footnotesize{$V_{\\ff{snd}},\\frac{\\hbox{km}}{\\hbox{s}}$ }',
 'Pressure (Gpa)': '\\footnotesize{$P$, GPa}',
 'T(K)': '\\footnotesize{$T$, K}',
 'TNT Equiv (per cc)': '\\footnotesize{$\\frac{\\hbox{TNT}_{\\ff{equiv}}}{\\hbox{cc}}$ }' 
}



print("\\begin{table*}")
print("\\begin{tabular}{cc",end='')
for l in range(len(targets)):
      print("c",end='')
print("}")
print(" & ",end='')
for target in targets:
    print(" & "+target_short_names[target], end='')
print(" \\\\")
print("\\hline")
featurizations = list(results[targets[0]].keys())
models = list(results[targets[0]][featurizations[0]].keys())
for model in models:
    for (i, featurization) in enumerate(featurizations):
        if(i == 0):
            print(model+" & ", end='')
        else:
            print(" & ", end='')
        print(featurization+" & ", end='')
        for (j, target) in enumerate(targets): 
            scores_dict = results[target][featurization][model]
            #print(" %5.2f, %4.2f  " % (scores_dict['MAPE'], scores_dict['r2']), end='')
            #print(" %5.2f " % (scores_dict['MAPE']), end='')
            #print("%4.2f" % (scores_dict['r2']), end='')

            if ([featurization, model] == best[target]):
                print("\\bf{%5.2f}$^{%4.2f}$" % (scores_dict['MAE'], scores_dict['MAE_std']), end='')
            else:
                print("%5.2f$^{%4.2f}$" % (scores_dict['MAE'], scores_dict['MAE_std']), end='')
            
            if (j == len(targets)-1):
                print("\\\\")
            else:
                print(" & ", end='')

        
print("\\end{tabular}")
print("\\end{table*}")

\begin{table*}
\begin{tabular}{ccccccccccc}
 &  & \footnotesize{$\rho ,\frac{\hbox{g}}{\hbox{cc}}$ } & \footnotesize{$\Delta H_f^{\ff{s}} ,\frac{\hbox{kJ}}{\hbox{mol}}$ } & \footnotesize{$E_{\ff{e}} ,\frac{\hbox{kJ}}{\hbox{cc}}$ } & \footnotesize{$V_{\ff{s}} ,\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{p}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{snd}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$P$, GPa} & \footnotesize{$T$, K} & \footnotesize{$\frac{\hbox{TNT}_{\ff{equiv}}}{\hbox{cc}}$ } \\
\hline
KRR & Estate &  0.08$^{0.02}$ & 250.68$^{64.95}$ &  0.56$^{0.10}$ &  0.47$^{0.05}$ &  0.14$^{0.02}$ &  0.46$^{0.09}$ &  4.07$^{0.50}$ & 546.86$^{79.63}$ &  0.17$^{0.03}$\\
 & Oxygen balance$_{1600}$ &  0.08$^{0.01}$ & 1493.86$^{5055.91}$ &  0.53$^{0.09}$ &  0.61$^{0.11}$ &  0.13$^{0.02}$ &  0.53$^{0.08}$ &  4.47$^{0.67}$ & 458.29$^{75.84}$ &  0.20$^{0.03}$\\
 & CDS &  0.07$^{0.01}$ & 188.27$^{31.77}$ &  0.52$^{0.06}$ &  0.41$^{0.08}$ &  0.13$^{0.02}$ &  0.33$^{0.06}

# With confidence intervals from t-values

In [14]:
from scipy.stats import t
t.interval(0.95, 20)[1]

2.0859634472658364

In [15]:
from scipy.stats import t
from numpy import average, std
from math import sqrt

num_DOF = 19
 
t_bounds = t.interval(0.95, num_DOF)
t_value = t_bounds[1]   
    

print("\\begin{table*}")
print("\\begin{tabular}{cc",end='')
for l in range(len(targets)):
      print("c",end='')
print("}")
print(" & ",end='')
for target in targets:
    print(" & "+target_short_names[target], end='')
print(" \\\\")
print("\\hline")
featurizations = list(results[targets[0]].keys())
models = list(results[targets[0]][featurizations[0]].keys())
for model in models:
    for (i, featurization) in enumerate(featurizations):
        if(i == 0):
            print(model+" & ", end='')
        else:
            print(" & ", end='')
        print(featurization+" & ", end='')
        for (j, target) in enumerate(targets): 
            scores_dict = results[target][featurization][model]
            #print(" %5.2f, %4.2f  " % (scores_dict['MAPE'], scores_dict['r2']), end='')
            #print(" %5.2f " % (scores_dict['MAPE']), end='')
            #print("%4.2f" % (scores_dict['r2']), end='')
            mean = scores_dict['MAE']
            stddev = scores_dict['MAE_std']
            upper = mean + t_value*stddev/sqrt(num_DOF)
            lower = mean - t_value*stddev/sqrt(num_DOF)

            if ([featurization, model] == best[target]):
                print("\\bf{%5.2f}$^{%4.2f}_{%4.2f}$" % (scores_dict['MAE'], upper, lower), end='')
            else:
                print("%5.2f$^{%4.2f}_{%4.2f}$" % (scores_dict['MAE'], upper, lower), end='')
            
            if (j == len(targets)-1):
                print("\\\\")
            else:
                print(" & ", end='')

        
print("\\end{tabular}")
print("\\end{table*}")

\begin{table*}
\begin{tabular}{ccccccccccc}
 &  & \footnotesize{$\rho ,\frac{\hbox{g}}{\hbox{cc}}$ } & \footnotesize{$\Delta H_f^{\ff{s}} ,\frac{\hbox{kJ}}{\hbox{mol}}$ } & \footnotesize{$E_{\ff{e}} ,\frac{\hbox{kJ}}{\hbox{cc}}$ } & \footnotesize{$V_{\ff{s}} ,\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{p}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{snd}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$P$, GPa} & \footnotesize{$T$, K} & \footnotesize{$\frac{\hbox{TNT}_{\ff{equiv}}}{\hbox{cc}}$ } \\
\hline
KRR & Estate &  0.08$^{0.09}_{0.08}$ & 250.68$^{281.86}_{219.49}$ &  0.56$^{0.60}_{0.51}$ &  0.47$^{0.50}_{0.45}$ &  0.14$^{0.15}_{0.13}$ &  0.46$^{0.51}_{0.42}$ &  4.07$^{4.31}_{3.83}$ & 546.86$^{585.10}_{508.63}$ &  0.17$^{0.19}_{0.16}$\\
 & Oxygen balance$_{1600}$ &  0.08$^{0.09}_{0.08}$ & 1493.86$^{3921.57}_{-933.85}$ &  0.53$^{0.58}_{0.49}$ &  0.61$^{0.66}_{0.56}$ &  0.13$^{0.14}_{0.12}$ &  0.53$^{0.57}_{0.49}$ &  4.47$^{4.80}_{4.15}$ & 458.29$^{494.70}_{421.87

In [135]:
num_targets = len(targets)


print("\\begin{tabular}{ccc",end='')
for l in range(len(targets)):
      print("c",end='')
print("}")
print(" & ",end='')
for target in targets:
    print(" & "+target_short_names[target], end='')
print(" & avg  \\\\")
print("\\hline")
featurizations = list(results[targets[0]].keys())
models = list(results[targets[0]][featurizations[0]].keys())
for model in models:
    for (i, featurization) in enumerate(featurizations):
        if(i == 0):
            print(model+" & ", end='')
        else:
            print(" & ", end='')
        print(featurization+" & ", end='')
        avg_over_targets = 0 
        for (j, target) in enumerate(targets): 
            scores_dict = results[target][featurization][model]
            this_r2 = scores_dict['r2']
#           print(" %5.2f, %4.2f  " % (scores_dict['MAE'], scores_dict['r2']), end='')
            print("%4.2f" % (this_r2 ), end='')
            avg_over_targets += this_r2 
            #if ([featurization, model] == best[target]):
            #    print("\\bf{%5.2f}" % (scores_dict['MAE']), end='')
            #else:
            #    print("%5.2f" % (scores_dict['MAE']), end='')
            if (j == len(targets)-1):
                print(" & %5.2f \\\\" % (avg_over_targets/num_targets) )
            else:
                print(" & ", end='')

        
print("\\end{tabular}")

\begin{tabular}{cccccccccccc}
 &  & \footnotesize{$\rho ,\frac{\hbox{g}}{\hbox{cc}}$ } & \footnotesize{$\Delta H_f^{\ff{s}} ,\frac{\hbox{kJ}}{\hbox{mol}}$ } & \footnotesize{$E_{\ff{e}} ,\frac{\hbox{kJ}}{\hbox{cc}}$ } & \footnotesize{$V_{\ff{s}} ,\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{p}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$V_{\ff{snd}},\frac{\hbox{km}}{\hbox{s}}$ } & \footnotesize{$P$ (GPa)} & \footnotesize{$T$ (K)} & \footnotesize{$\frac{\hbox{TNT}_{\ff{equiv}}}{\hbox{cc}}$ } & avg  \\
\hline
KR & Estate & 0.69 & 0.39 & 0.72 & 0.53 & 0.58 & 0.56 & 0.55 & 0.51 & 0.60 &  0.57 \\
 & CDS & 0.57 & 0.63 & 0.56 & 0.57 & 0.44 & 0.50 & 0.45 & 0.50 & 0.42 &  0.52 \\
 & SoB & 0.70 & 0.95 & 0.72 & 0.70 & 0.67 & 0.73 & 0.67 & 0.61 & 0.70 &  0.72 \\
 & CM eigs & 0.40 & 0.33 & 0.52 & 0.25 & 0.51 & 0.20 & 0.33 & 0.21 & 0.34 &  0.34 \\
 & Bag of Bonds & 0.61 & 0.78 & 0.72 & 0.73 & 0.56 & 0.75 & 0.61 & 0.46 & 0.49 &  0.63 \\
 & \footnotesize{Estate+CDS+SoB} & 0.74 & 0.94 & 0.7

In [131]:
targets

['Density (g/cm3)',
 'Delta Hf solid (kj/mol)',
 'Explosive energy (kj/cc)',
 'Shock velocity (km/s)',
 'Particle velocity (km/s)',
 'Speed of sound (km/s)',
 'Pressure (Gpa)',
 'T(K)',
 'TNT Equiv (per cc)']

In [121]:
best

{'Delta Hf solid (kj/mol)': ['Estate+CDS+SoB', 'mean'],
 'Density (g/cm3)': ['Estate+CDS+SoB', 'mean'],
 'Explosive energy (kj/cc)': ['Estate+CDS+SoB', 'mean'],
 'Particle velocity (km/s)': ['Estate+CDS+SoB', 'mean'],
 'Pressure (Gpa)': ['Estate+CDS+SoB', 'mean'],
 'Shock velocity (km/s)': ['Estate+CDS+SoB', 'mean'],
 'Speed of sound (km/s)': ['Estate+CDS+SoB', 'mean'],
 'T(K)': ['Estate+CDS+SoB', 'mean'],
 'TNT Equiv (per cc)': ['Estate+CDS+SoB', 'mean']}