In [1]:
import numpy as np
import pandas as pd

## Table S1, LaTeX source

In [2]:
import scipy.stats
mean = scipy.stats.mstats.gmean
title_mode = ["All", "Random (a)", "Random (b)", "Diagonal", "Edge"]
titles = ['Meta-learning', 'Langmuir', 'Sips', 'Quadratic', 'DSL', 'Best AIF']
names = ['nn', 'langmuir', 'sips', 'quadratic', 'dslangmuir','best']
for t, mode in zip(title_mode, ['regular', 'random8', 'random8_1', 'diagonal', 'edge']):
    line = t + " & "
    fits = {}
    for dataset in ['iza', 'pcod', 'hcp', 'mof']:
        fits[dataset] = {}
        fits[dataset]['nn'] = np.loadtxt('results/%s/nn-%s.csv' % (dataset, mode), delimiter=',')[:, -1]
        for x in names[1:-1]:
            fits[dataset][x] = np.loadtxt('results/%s/%s-fit-%s.csv' % (dataset, x, mode), delimiter=',')[:, -1]
        fits[dataset]['best'] = np.loadtxt('results/%s/best-fit-%s.csv' % (dataset, mode), delimiter=',')
    fits['all'] = {}
    for x in names:
        fits["all"][x] = np.concatenate([fits[y][x] for y in ['iza', 'pcod', 'hcp', 'mof']])
    for dataset in ['iza', 'pcod', 'hcp', 'mof', 'all']:
        line += dataset.upper()  
        #fits_all = np.vstack([fits[x] for x in names[1:-1]])
        #fits['best'] = np.min(fits_all, axis=1)
        vals = []
        for title, name in zip(titles, names):
            n = 1/1000
            mse_list = fits[dataset][name]
            mse_list = mse_list[np.logical_not(np.isnan(mse_list))]
            vals.append(mean(mse_list/n))
        max_col = np.argmin(vals)
        for i, v in enumerate(vals):
            if i == max_col:
                line += " & \\textbf{%.3f}" % v
            else:
                line += " & %.3f" % v
        print(line + "\\\\")
        line = "    & "
    print("\\hline")

All & IZA & \textbf{0.075} & 1.316 & 0.477 & 0.175 & 0.151 & 0.149\\
    & PCOD & \textbf{0.073} & 1.303 & 0.486 & 0.149 & 0.134 & 0.138\\
    & HCP & \textbf{0.064} & 1.677 & 0.629 & 0.106 & 0.168 & 0.137\\
    & MOF & \textbf{0.037} & 0.412 & 0.235 & 0.090 & 0.057 & 0.045\\
    & ALL & \textbf{0.056} & 0.867 & 0.375 & 0.128 & 0.104 & 0.093\\
\hline
Random (a) & IZA & \textbf{0.229} & 2.090 & 2.373 & 0.724 & 4.081 & 2.842\\
    & PCOD & \textbf{0.337} & 1.964 & 1.752 & 0.611 & 3.079 & 2.786\\
    & HCP & \textbf{0.217} & 2.121 & 1.012 & 3.734 & 1.456 & 1.046\\
    & MOF & 0.695 & 1.182 & 0.883 & \textbf{0.656} & 0.716 & 0.902\\
    & ALL & \textbf{0.365} & 1.675 & 1.458 & 0.777 & 2.025 & 1.690\\
\hline
Random (b) & IZA & \textbf{0.269} & 2.369 & 1.317 & 1.257 & 29.687 & 1.507\\
    & PCOD & \textbf{0.261} & 2.380 & 1.478 & 1.636 & 11.270 & 1.737\\
    & HCP & \textbf{0.140} & 2.875 & 1.294 & 0.793 & 6.997 & 1.646\\
    & MOF & \textbf{0.045} & 0.514 & 0.837 & 11.325 & 1.753 & 1.535\\


## Table S2
The numerical data for Table S2 can be found at `results/iza/temps-p02.71-p30-f0.7-swing0.csv` by sorting columns. The $T_\mathrm{val,max}$ column is Table S2 can be repdocuced by the cell for Figure 4/S5 in `figures.ipynb.`

## Table S3

In [3]:
zeolite_names = [x.split("-")[0] for x in np.genfromtxt("../data/iza/names.csv", dtype=str)]
fingerprints = np.loadtxt("results/iza/nn-regular.csv", delimiter=",")[:, :-1] # last column stored is mean square error
df_izasc = pd.read_csv("results/supplementary/IZA-parameters.csv", index_col=0).set_index("Code")
df_izasc["Max diffusion diameter"] = np.max(df_izasc.iloc[:, 4:7], axis=1)

cols = ["Property"] + ["Fingerprint %d" % x for x in range(fingerprints.shape[1])]
rows = []
for col in df_izasc.columns:
    prop = df_izasc.loc[zeolite_names][col].values
    rows.append([col] + [np.corrcoef(prop, fingerprints[:, x])[0, 1] for x in range(fingerprints.shape[1])])
df_correlation = pd.DataFrame(rows, columns=cols).set_index("Property")

def color_correlated(val):
    color = 'green' if abs(val) > 2/3 else 'black' if abs(val) > 1/3 else 'lightgrey'
    return 'color: %s' % color
df_correlation.style

df_correlation \
    .loc[["Framework density", "Max inclusion diameter", "Max diffusion diameter", "Accessible volume (%)"]] \
    .round(decimals=3) \
    .style.applymap(color_correlated)\

Unnamed: 0_level_0,Fingerprint 0,Fingerprint 1,Fingerprint 2,Fingerprint 3,Fingerprint 4
Property,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Framework density,-0.419,-0.69,-0.157,0.158,-0.377
Max inclusion diameter,0.472,0.772,0.084,0.035,0.395
Max diffusion diameter,0.361,0.679,0.202,-0.118,0.354
Accessible volume (%),0.463,0.789,0.227,-0.148,0.424


## Table S5, LaTeX source
**Note:** The exact coefficients and $R^2$ values may subject to change due to versioning of scipy/numpy packages.

In [4]:
!cd ../supp_info/; python3 ./compressibility.py

The system cannot find the path specified.
