In [1]:
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA

from empiricalgalo import utils, models

import joblib

%load_ext autoreload
%autoreload 2

try:
    # Disable all GPUS
    tf.config.set_visible_devices([], 'GPU')
    visible_devices = tf.config.get_visible_devices()
    for device in visible_devices:
        assert device.device_type != 'GPU'
except:
    # Invalid device or cannot modify virtual devices once initialized.
    pass

In [2]:
data = np.load("../data/HAGNmatch.npy")

m = (data["H_level"] == 1) & (data["H_mhalo"] > 1e10) & (data["G_mgal"] > 1e9)
data = data[m]


features = ["H_mhalo", "H_concentration", "H_spin"]
log_features = ["H_mhalo", "H_concentration", "H_spin"]

target = "G_mgal"

In [3]:
Xtrain, Xtest, ytrain, ytest = utils.stratify_split(data, features, target, target, True)

In [4]:
from sklearn.preprocessing import StandardScaler

In [5]:
features_pipe = Pipeline([('selector', utils.DataFrameSelector(features, log_features)),
                          ('PCA', PCA(whiten=True, n_components=len(features)))])
# features_pipe = Pipeline([('selector', utils.DataFrameSelector(features, log_features)),
                        #   ('scaler', StandardScaler())])
target_pipe = Pipeline([('selector', utils.DataFrameSelector(target, target))])

In [6]:
Xtrain, Xtest = utils.apply_preprocess(Xtrain, Xtest, features_pipe)
ytrain, ytest = utils.apply_preprocess(ytrain, ytest, target_pipe)

In [7]:
checkpoint_dir = "./checkpoints"
batch_size = 500


clr = tfa.optimizers.CyclicalLearningRate(initial_learning_rate=0.001,
    maximal_learning_rate=0.01,
    scale_fn=lambda x: 1/(2.**(x-1)),
    step_size=2 * len(ytrain) // batch_size)
optimizer = tf.optimizers.Adamax(learning_rate=clr)

Nensemble = 2

In [33]:
cdirs = models.make_checkpoint_dirs(checkpoint_dir, Nensemble)
seeds = models.get_random_seeds(Nensemble, 42)

In [None]:
for i in range(Nensemble):
    models.GaussianLossNN.fit_directly(
        Xtrain, ytrain, batch_size, cdirs[i], seeds[i],
        optimizer=optimizer)

In [162]:
ensemble = models.SummaryEnsembleGaussianLossNN(checkpoint_dir, optimizer)

Found 8 models in `./checkpoints`.


In [168]:
ensemble.score_reduced_chi2(Xtest, ytest)

[1.0316183552719453,
 0.998103745227621,
 1.0316183552719453,
 0.998103745227621,
 1.0316183552719453,
 0.998103745227621,
 1.0316183552719453,
 0.998103745227621]