In [None]:
import comet_ml
from comet_ml.api import API
from collections import defaultdict
import pandas as pd
import numpy as np
import os
import pickle
comet_api = API()

types = ["0", "3", "20", "23"]
energies = [
    '0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8',
    '0.9', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9'
]

In [None]:
def get_predictions_from_experiment(experiment_id):
    exp = comet_api.get("schattengenie/juno/{}".format(experiment_id))
    # get predictions
    asset_id = [a for a in exp.get_asset_list() if "datatable_predictions" in a["fileName"]][0]["assetId"]
    d = pickle.loads(exp.get_asset(asset_id), encoding='bytes')
    # first experiments were without train_type, for them default train dataset was 0
    train_type = [t for t in exp.get_parameters_summary() if "train_type" in t['name']]
    if len(train_type) == 0:
        train_type = "0"
    else:
        train_type = train_type[0]["valueCurrent"]
    return train_type, d

In [None]:
def preprocess_energy_predictions_from_comet_to_pandas(d):
    energy_predictions = defaultdict(list)

    # concatenate predictions by the network
    for energy in energies:
        for type in types:
            energy_predictions[energy].append(d[(type, energy)][:, 1])
        energy_predictions[energy] = pd.DataFrame(np.array(energy_predictions[energy]).T, columns=["pred" + t for t in types])

    # add true column
    for energy in energies:
        # true energy is the same for all types of source(or I hope so)
        energy_predictions[energy]["true"] = d[("0", energy)][:, 0]
    
    return energy_predictions

In [None]:
experiment_id = "fa5d69a09f5d43ad88f1be92f2352047"
train_type, d = get_predictions_from_experiment(experiment_id)
energy_predictions_0 = preprocess_energy_predictions_from_comet_to_pandas(d)
print(train_type)

In [None]:
experiment_id = "891b664b298144bebc183b6c6e66089e"
train_type, d = get_predictions_from_experiment(experiment_id)
energy_predictions_23 = preprocess_energy_predictions_from_comet_to_pandas(d)
print(train_type)

In [None]:
experiment_id = "a78d360794c74ccba1357532a047f9f2"
train_type, d = get_predictions_from_experiment(experiment_id)
energy_predictions_20 = preprocess_energy_predictions_from_comet_to_pandas(d)
print(train_type)

In [None]:
experiment_id = "36428e6694554bd9854b29562e766b55"
train_type, d = get_predictions_from_experiment(experiment_id)
energy_predictions_3 = preprocess_energy_predictions_from_comet_to_pandas(d)
print(train_type)

In [None]:
predictions_by_train_dataset = {
    "0": energy_predictions_0,
    "23": energy_predictions_23,
    "20": energy_predictions_20,
    "3": energy_predictions_3
}

In [None]:
# save predictions for each net
for type in types:
    path = os.path.abspath("./net_trained_on_{}".format(type))
    if not os.path.exists(os.path.expanduser(path)):
        os.mkdir(os.path.expanduser(path))
    for energy in energies:
        predictions_by_train_dataset[type][energy].to_csv(os.path.join(path, "{}MeV.csv".format(energy)), index=False)

In [None]:
# save predictions by type
path = os.path.abspath("./predictions".format(type))
if not os.path.exists(os.path.expanduser(path)):
    os.mkdir(os.path.expanduser(path))
for energy in energies:
    predictions = []
    # append predictions
    for type in types:
        predictions.append(predictions_by_train_dataset[type][energy]["pred" + type].values)
    # append true
    predictions.append(predictions_by_train_dataset[type][energy]["true"])
    df = pd.DataFrame(np.array(predictions).T, columns=["pred" + t for t in types] + ["true"])
    df.to_csv(os.path.join(path, "{}MeV.csv".format(energy)), index=False)