In [None]:
import os

import random

import pandas as pd
import tensorflow as tf
import numpy as np
df = pd.read_json("mp.json")
df2 = df.copy()

In [None]:
print(tf.__version__)

In [None]:
df2.drop(columns=["G_VRH", "G_Reuss", "G_Voigt", "K_VRH", "K_Voigt", "K_Reuss","composition","formation_energy_per_atom","hardness"],inplace=True)

In [None]:
df3 = df2.join(df['K_VRH'])

In [None]:
df3

In [None]:
from sklearn.model_selection import train_test_split

x = df3.values[:,2:-1] # Select all rows for all columns except for last (index -1)
y = df3.values[:,-1] # Select all rows for last column (index -1)

print(f'features shape: {x.shape}, target shape: {y.shape}')

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.85, test_size=0.15, shuffle=True, random_state=67) 

print(f'\nX_train shape: {x_train.shape}, y_train shape: {y_train.shape}\n',
      f'X_test shape: {x_test.shape}, y_test.shape:{y_test.shape}')

In [None]:
from sklearn.preprocessing import StandardScaler

# Normalize input data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
x_train = np.array(x_train_scaled)
y_train = np.array(y_train)

x_test = np.array(x_test_scaled)
y_test = np.array(y_test)

In [None]:
x_train_tensor = tf.convert_to_tensor(x_train, dtype=tf.float64)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float64)

x_test_tensor = tf.convert_to_tensor(x_test, dtype=tf.float64)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float64)

In [None]:
def reset_random_seeds():  #makes results reproducible
    os.environ['PYTHONHASHSEED']=str(2)
    tf.random.set_seed(2)
    np.random.seed(2)
    random.seed(2)

reset_random_seeds()

In [None]:
from keras.models import Sequential
from keras.layers import Dense, BatchNormalization
import keras_tuner as kt


def build_model(hp):
    
    reset_random_seeds()
    
    model = Sequential()

    
    
    hp_layer_1 = hp.Int('layer_1',min_value=101, max_value=241, step=10)
    hp_layer_2 = hp.Int('layer_2', min_value=51, max_value=241, step=10)
    hp_layer_3 = hp.Int('layer_3', min_value=11, max_value=91, step=10)
    learning_rates = hp.Choice("learning_rate", values=[0.001,0.0015,0.002])
    
    
    model.add(Dense(units=hp_layer_1, activation='relu', input_dim=120))
    model.add(BatchNormalization())  # Add BatchNormalization layer
    model.add(Dense(units=hp_layer_2, activation='relu'))
    model.add(BatchNormalization())  # Add BatchNormalization layer
    model.add(Dense(units=hp_layer_3, activation='relu'))
    

    model.add(Dense(1, activation='linear'))  # Use linear activation for regression

   

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rates),loss='mse',metrics=['mae','R2Score','mse'])  # Use mean squared error for regression loss='mse' metrics=['mae','R2Score']

    return model

In [None]:
tuner = kt.GridSearch(
    build_model,
    objective=['mse'],
    max_trials=1000,  # Adjust as needed
    executions_per_trial=1,
    directory='random_search3',
    project_name='bulk_modulus_random_search'
)

# Perform the random search
tuner.search(x=x_train_tensor,
             y=y_train_tensor,
             epochs=10,
             batch_size=32,
             validation_split=0.1)

# Get the best hyperparameters
best_hps = tuner.oracle.get_best_trials(num_trials=1)[0].hyperparameters.values
print(f"Best Hyperparameters: {best_hps}")

In [None]:
model.evaluate(x_test_tensor, y_test_tensor)

In [None]:
model = tuner.hypermodel.build(best_hps)

In [None]:
import matplotlib.pyplot as plt

# Predict the values using the trained model
y_pred = model.predict(x_test_tensor)

# Plot the actual vs predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_pred,y_test, alpha=0.5)
plt.title('MLP NN Bulk Modulus')
plt.xlabel('Actual Values (y_test)')
plt.ylabel('Predicted Values (y_pred)')
plt.grid(True)
plt.show()

In [None]:
from keras.saving import load_model

model.save("Bulk_model.keras")