In [None]:
from importlib import reload
import model
model = reload(model)

import tf_keras as keras
import pandas as pd
import tensorflow as tf
import crystal_loader
crystal_loader = reload(crystal_loader)
from tqdm import tqdm
import tqdm.keras
import numpy as np
from symmetry import *
import dill
from sklearn.preprocessing import StandardScaler
import h5py
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


import tensorflow as tf

In [None]:
features_path = "../pickles/TiO2_2015_corrected_features.h5"
labels_path = "../pickles/TiO2_2015_corrected_labels.h5"

with h5py.File(features_path, "r") as f:
    features = [f[f"array_{i}"][:] for i in range(len(f))]

with h5py.File(labels_path, "r") as f:
    labels = [f[f"array_{i}"][:] for i in range(len(f))]

labels = np.array(labels)

def scale_ragged(features):
    stacked = np.vstack(features)
    SSC = StandardScaler().fit(stacked)
    scaled_features = [SSC.transform(struct) for struct in features]

    return scaled_features

scaled_features = scale_ragged(features)

Xtrain, Xtest, y_train, y_test = train_test_split(scaled_features, labels, shuffle=True, random_state=12, test_size=0.4)
Xval, Xtest, y_val, y_test = train_test_split(Xtest, y_test, shuffle=True, random_state=12, test_size=0.5)

Xtrain = tf.ragged.constant(Xtrain, ragged_rank=1, inner_shape=(70,))
Xval = tf.ragged.constant(Xval, ragged_rank=1, inner_shape=(70,))
Xtest = tf.ragged.constant(Xtest, ragged_rank=1, inner_shape=(70,))

from sklearn.neighbors import KernelDensity

def inverse_density_weights(y, **kwargs):
    kde = KernelDensity(kernel='gaussian', bandwidth=5.0).fit(y.reshape(-1, 1))

    # 2. Estimate density
    log_density = kde.score_samples(y.reshape(-1, 1))
    density = np.exp(log_density)

    # 3. Invert density for weights
    weights = 1 / (density + 1e-6)
    weights /= np.mean(weights)  # optional normalization

    return weights.reshape(-1, 1)

y_train_weights = inverse_density_weights(y_train)
y_val_weights = inverse_density_weights(y_val)

In [None]:
layers = [keras.layers.Dense(100, activation="relu"),
          keras.layers.Dropout(0.3),
          keras.layers.Dense(200, activation="relu"),
          keras.layers.Dropout(0.3),
          keras.layers.Dense(70, activation="relu")]

MLP1 = model.MLPNet(layers=layers, N_features=70, ragged_processing=False)
MLP1.built = True

In [None]:
MLP1.load_weights('./saved_models/v5_weighted.h5')

In [None]:
y_pred = MLP1.predict(Xtest)
y_exam = np.squeeze(y_test)

filt_80k = (y_exam >= -9e4) & (y_exam <= -7e4)
filt_40k = (y_exam >= -5e4) & (y_exam <= -3e4)
filt_20k = (y_exam >= -3e4) & (y_exam <= -1e4)
filt_10k = (y_exam >= -1e4) & (y_exam <= 1e4)

yt_80k = y_exam[filt_80k]
yt_40k = y_exam[filt_40k]
yt_20k = y_exam[filt_20k]
yt_10k = y_exam[filt_10k]

y_pred_80k = y_pred[filt_80k]
y_pred_40k = y_pred[filt_40k]
y_pred_20k = y_pred[filt_20k]
y_pred_10k = y_pred[filt_10k]

In [None]:
fig, ax = plt.subplots(1, 4, num=2, figsize=(20, 5))

for axes, ytest, ypred in zip(ax, [yt_80k, yt_40k, yt_20k, yt_10k], [y_pred_80k, y_pred_40k, y_pred_20k, y_pred_10k]):
    print(len(ytest), len(ypred))
    axes.hist(ytest, bins="auto", label="true", alpha=0.6)
    axes.hist(ypred, bins="auto", label="predicted", alpha=0.7)

    axes.legend()

In [None]:
from sklearn.metrics import r2_score, root_mean_squared_error

fig, ax = plt.subplots(num=3, figsize=(10, 6))
ax.scatter(y_exam, y_pred)
ax.set_title("y pred vs y test overview")

x = np.linspace(-90000, -10000, 200)
ax.plot(x, x, c="r")

fig, ax = plt.subplots(1, 4, num=2, figsize=(20, 5))
fig.suptitle("Predicted vs Actual plots in various energy ranges")
print(len(y_exam))
for axes, ytest, ypred in zip(ax, [yt_80k, yt_40k, yt_20k, yt_10k], [y_pred_80k, y_pred_40k, y_pred_20k, y_pred_10k]):
    axes.scatter(ytest, ypred, lw=0, marker=".", c=np.arange(np.size(ytest)))
    axes.set_xlabel(f"true (eV) (N={len(ytest)})")
    axes.set_ylabel("predicted (eV)")
    axes.set_aspect("equal")
    axmin = np.min([np.min(ytest), np.min(ypred)])
    axmax = np.max([np.max(ytest), np.max(ypred)])
    x = np.linspace(axmin, axmax)
    axes.plot(x, x, c="r")
    axes.set_xlim(axmin, axmax)
    axes.set_ylim(axmin, axmax)

print(r2_score(y_exam, y_pred))
print(root_mean_squared_error(y_exam, y_pred))

In [None]:
import pandas as pd
import crystal_loader

structures = []
LVs = []

LV = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
# x = pd.DataFrame({"Atom": ["Ti", "O", "Ti"], "x": [0, d, 0.01], "y": [0, 0, 0.9], "z": [0, 0, 0]})
for d in np.linspace(0.001, 1, 1200):
    x = pd.DataFrame({"Atom": ["Ti", "O", "Ti"], "x": [0, d, 1], "y": [0, 0, 0], "z": [0, 0, 0]})
    # print(x)
    structures.append(x)
    LVs.append(LV)

LVs = np.array(LVs)

print(x)

In [None]:
Rc = 6.5 # A
Rs = 0

# build radial basis params:

radial_etas = [0.003214, 0.214264, 0.035711, 0.357106, 0.071421, 0.714213, 0.124987, 1.428426]
params_radial = tuple([{"Rs": Rs, "eta": eta} for eta in radial_etas])

angular_etas = [0.000357, 0.028569, 0.089277, 0.000357, 0.028569, 0.089277, 0.000357, 0.028569, 0.089277] * 2
angular_lambdas = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
angular_zetas = [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0]

params_angular = tuple([{"Rs": Rs, "eta": eta, "lmbda": lam, "zeta": zeta} for eta, lam, zeta in zip(angular_etas, angular_lambdas, angular_zetas)])

tio_params_ang = {("Ti", "Ti"): params_angular, ("Ti", "O"): params_angular, ("O", "O"): params_angular}
tio_params_rad = {"Ti": params_radial, "O": params_radial}

In [None]:
# features = crystal_loader.build_features(structures, LVs, Rc=6.5, params_rad=tio_params_rad, params_ang=tio_params_ang)
import symmetry

# features = symmetry.get_features(structures, LVs, Rc=6.5, params_rad=tio_params_rad, params_ang=tio_params_ang)

features = crystal_loader.build_features(structures, LVs, Rc=6.5, params_rad=tio_params_rad, params_ang=tio_params_ang)

In [None]:
%matplotlib widget

feats = tf.ragged.constant(features, ragged_rank=1, inner_shape=(70,))
y_pred = MLP1.predict(feats)



In [None]:
plt.close(15)
d = np.linspace(0.001, 1, 1200)
fig, ax = plt.subplots(num=15)
ax.plot(d, y_pred)
ax.set_xlabel("middle atom position (angstroms)")
ax.set_ylabel('predicted E (eV)')


In [None]:
deez = [[1, 2], [3, 4], [5, 6]]

deez[0:1][0][0]

[(('Ti', ([0., 0., 0.])), ('O', ([0.001, 0.   , 0.   ])))][0][0]