In [None]:
from importlib import reload
import model
model = reload(model)

# import tf_keras as keras
import keras
import pandas as pd
import tensorflow as tf
import crystal_loader

from tqdm import tqdm
import tqdm.keras

import numpy as np
from symmetry import *
import dill
import h5py
from sklearn.model_selection import train_test_split

from MLPtools import scale_ragged, atomic_MSE

from keras.losses import MeanSquaredError

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from scikeras.wrappers import KerasRegressor

import multiprocess as mp

tf.config.run_functions_eagerly(False)

try:
  physical_devices = tf.config.list_physical_devices('GPU')

  tf.config.experimental.set_memory_growth(physical_devices[0], True)
  print("GPU:", tf.config.list_physical_devices('GPU'))
  print("Num GPUs:", len(physical_devices))
except:
  print("No GPU")

In [None]:
dset_name = "TiO2_2015_angfixed_x3"

features_path = f"../pickles/{dset_name}_features.h5"
labels_path = f"../pickles/{dset_name}_labeldata.h5"



with h5py.File(features_path, "r") as f:
    features = [f[f"array_{i}"][:] for i in range(len(f))]

label_df = pd.read_hdf(labels_path, key="labels")
n_atoms = pd.read_hdf(labels_path, key="n_atoms")

# (labels.div(n_atoms, axis="rows"))
print(label_df.columns)

In [None]:
# select label to use
labels = label_df["cohesive_energy"].to_numpy().reshape(-1, 1)

scaled_features = scale_ragged(features)

Xtrain, Xtest, y_train, y_test, c_train, c_test = train_test_split(scaled_features, labels, n_atoms, shuffle=True, random_state=12, test_size=0.2)
Xval, Xtest, y_val, y_test, c_val, c_test = train_test_split(Xtest, y_test, c_test, shuffle=True, random_state=12, test_size=0.5)

Xtrain = tf.ragged.constant(Xtrain, ragged_rank=1, inner_shape=(70,))
Xval = tf.ragged.constant(Xval, ragged_rank=1, inner_shape=(70,))
Xtest = tf.ragged.constant(Xtest, ragged_rank=1, inner_shape=(70,))

In [None]:
def build_MLP(n_neurons=10, learning_rate=0.0004, atomic_loss=True, activation="relu"):
    layers = [keras.layers.Dense(n_neurons, activation=activation),
              keras.layers.Dense(n_neurons, activation=activation)]
    if atomic_loss:
        ULM = MeanSquaredError()
        LM = None
    else:
        ULM = None
        LM = "mse"

    MLP1 = model.MLPNet(layers=layers,
                        N_features=70,
                        ragged_processing=False,
                        unitwise_loss_model=ULM
    )

    MLP1.compile(
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate),
        loss=LM
    )

    return MLP1



# Parameter search 1
Grid search over neuron count per hidden layer (extreme ends of the aenet paper params), as well as various batch sizes and a different loss function

In [None]:
MLP = KerasRegressor(build_MLP, batch_size=32, epochs=200, n_neurons=10, learning_rate=0.0004, atomic_loss=True, activation="relu")

params = dict(n_neurons=[10, 50],
              learning_rate=[0.0004],
              batch_size=[16, 32, 64],
              atomic_loss=[True, False])

In [None]:
from itertools import product

# Get all keys and all value combinations
keys = list(params.keys())
values = list(params.values())

# Create list of dictionaries for each combination
search_grid = [dict(zip(keys, v)) for v in product(*values)]

# Optionally print or inspect
for config in search_grid:
    print(config)

def without_keys(d, keys):
    return {x: d[x] for x in d if x not in keys}

In [None]:
results = []
train_scores = []
test_scores = []

for config in search_grid:
    parameters = without_keys(config, "batch_size")
    MLP = build_MLP(**parameters)
    #seed = np.random.randint(1, 20000000)
    res = MLP.fit(
        # tf.random.shuffle(Xtrain, seed=seed), tf.random.shuffle(y_train, seed=seed),
        Xtrain, y_train,
        batch_size = config["batch_size"],
        epochs = 250,
        verbose = 0
    )

    train_score = MLP.evaluate(Xtrain, y_train)
    test_score = MLP.evaluate(Xval, y_val)

    train_scores.append(train_score)
    test_scores.append(test_score)
    results.append(res)

In [None]:
for config, train_score, test_score in zip(search_grid, train_scores, test_scores):
    print(config)
    print(f"train score: {train_score:.4f}, test score: {test_score:.4f}\n")

Atomic loss doesn't seem to be permitting any learning to occur; losses in every trial bottomed out at 30 eV **per atom** which is wildly higher than the non-atomic loss MSE (although one should note that the two losses are different and not directly comparable). From a single fold, the best model appears to be one runnning with 70-50-50-1 subnet architecture and a batch size of 32. I would run this with more folds to validate these results, but this puts my computer out of commission for a few days and I need to study...

## Round 2 of Hyperparameter Optimization

Testing different learning rates, as well as neuron counts again. I have reduced the total number of epochs as I am more interested in how quickly things converge.

In [None]:
params_2 = dict(n_neurons=[10, 50],
              learning_rate=[0.0004, 0.004, 0.00004],
              batch_size=[32],
              atomic_loss=[False])

In [None]:
# Get all keys and all value combinations
keys = list(params_2.keys())
values = list(params_2.values())

# Create list of dictionaries for each combination
search_grid_2 = [dict(zip(keys, v)) for v in product(*values)]

In [None]:
results_r2 = []
train_scores_r2 = []
test_scores_r2 = []

for config in search_grid_2:
    parameters = without_keys(config, "batch_size")
    MLP = build_MLP(**parameters)
    #seed = np.random.randint(1, 20000000)
    res = MLP.fit(
        Xtrain, y_train,
        batch_size = config["batch_size"],
        epochs = 100,
        verbose = 0
    )

    train_score = MLP.evaluate(Xtrain, y_train)
    test_score = MLP.evaluate(Xval, y_val)

    train_scores_r2.append(train_score)
    test_scores_r2.append(test_score)
    results_r2.append(res)

In [None]:
for config, train_score, test_score in zip(search_grid_2, train_scores_r2, test_scores_r2):
    print(config)
    print(f"train score: {train_score:.4f}, test score: {test_score:.4f}\n")

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(num=1, figsize=(10, 8))
for num, (result, sets) in enumerate(zip(results_r2, search_grid_2)):
    ax.plot(result.epoch, result.history["loss"], label=str(sets))

ax.legend()
ax.set_ylabel("MSE loss $(eV^2)$")
ax.set_yscale("log")
ax.set_xlabel("Epoch")