### Abstract
This notebook reproduces the benchmark results from Table 2 of the paper, predicting model properties.

In [None]:
# imports
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import numpy as np
import json
import pandas as pd
import tqdm

# set environment variables to limit cpu usage
import os
os.environ["OMP_NUM_THREADS"] = "4"  # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "4"  # export OPENBLAS_NUM_THREADS=4
os.environ["MKL_NUM_THREADS"] = "6"  # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "4"  # export VECLIB_MAXIMUM_THREADS=4
os.environ["NUMEXPR_NUM_THREADS"] = "6"  # export NUMEXPR_NUM_THREADS=6

from checkpoints_to_datasets.dataset_base import ModelDatasetBase

from model_definitions.def_baseline_models import (
    IdentityModel,
    LayerQuintiles,
)
from model_definitions.def_downstream_module import (
    DownstreamTaskLearner,
)


In [None]:
def load_dataset_compute_benchmarks(dataset_path:Path, index_dict_path:Path):
    """
    input datset_path: pathlib.Path to the preprocessed dataset.pt
    input index_dict_path: pathlib.Path to the index dict of the dataset
    return; dictionaries with R^2/accuracy for each property on _train, _val, _test splits of the zoo.
    """
    print('try to load dataset from file')
    dataset = torch.load(dataset_path)
    
    trainset = dataset["trainset"]
    valset = dataset["valset"]
    testset = dataset["testset"]

    # index_dict
    index_dict = json.load(index_dict_path.open("r"))


    # instanciate baseline models
    im = IdentityModel()
    lq = LayerQuintiles(index_dict)
    # instanciate downstream task wrapper
    dtl = DownstreamTaskLearner()
    #

    # weights
    results_weights = dtl.eval_dstasks(
        model=im, trainset=trainset, testset=testset, valset=valset, batch_size=100
    )
    # statistics
    results_stats = dtl.eval_dstasks(
        model=lq, trainset=trainset, testset=testset, valset=valset, batch_size=100
    )
    return results_weights, results_stats


In [None]:
# download one of the datasets and the corresponding index_dict
dataset_path = Path('./path/to/dataset.pt')
index_dict_path = Path('./path/to/index_dict.json')

In [None]:
# by calling the load_dataset_compute_benchmarks function, all linear models to predict 
# all properties in the datasets are fitted and the R^2 / accuracy saved in res_w / res_stats.
res_w, res_stats = load_dataset_compute_benchmarks(dataset_path, index_dict_path)
print("results:")
for key in res_w.keys():
    print(f'{key} - weights: {res_w[key]*100:2.1f} - stats: {res_stats[key]*100:2.1f}')