In [None]:
## This notebook includes simulation and experimental data
## in a database using PyMongo
## Author : Revathi Jambunathan
## Date : January, 2025

%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt

from Neural_Net_Classes import CombinedNN as CombinedNN
import torch
from botorch.models.transforms.input import AffineInputTransform
import pymongo
import pandas as pd
import os
import re
import yaml

In [None]:
# Select experimental setup for which we are training a model
setup = "ip2"

In [None]:
# Open credential file for database
with open(os.path.join(os.getenv('HOME'), 'db.profile')) as f:
    db_profile = f.read()

# Connect to the MongoDB database with read-only access
db = pymongo.MongoClient(
    host="mongodb05.nersc.gov",
    username="bella_sf_ro",
    password=re.findall('SF_DB_READONLY_PASSWORD=(.+)', db_profile)[0],
    authSource="bella_sf")["bella_sf"]

In [None]:
# Extract simulation and experiment data from the database
ip2_collection=db[setup]

expt_training_set_df = pd.DataFrame( list(ip2_collection.find({"experiment_flag":1})) )

sim_training_set_df = pd.DataFrame( list(ip2_collection.find({"experiment_flag":0})) )

In [None]:
# Extract the name of inputs and outputs for this setup
with open("../../config/variables.yml") as f:
    yaml_dict = yaml.safe_load( f.read() )
input_variables = yaml_dict[setup]["input_variables"]
input_names = [ v['name'] for v in input_variables.values() ] 
output_variables = yaml_dict[setup]["output_variables"]
output_names = [ v['name'] for v in output_variables.values() ]

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    expt_training_set_df[input_names[0]], 
    expt_training_set_df[input_names[-1]], 
    expt_training_set_df[output_names[0]], c='b',alpha=0.3)
ax.scatter( 
    sim_training_set_df[input_names[0]], 
    sim_training_set_df[input_names[-1]], 
    sim_training_set_df[output_names[0]], c='g',alpha=0.3)
ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

<h2> Normalize with Affine Input Transformer

In [None]:
# Define the input and output normalizations, based on the training set from experiments

X = torch.tensor(
    expt_training_set_df[ input_names ].values, 
    dtype=torch.float
)
input_transform = AffineInputTransform( 
    len(input_names), 
    coefficient=X.std(axis=0), 
    offset=X.mean(axis=0)
)

y = torch.tensor(
    expt_training_set_df[ output_names ].values, 
    dtype=torch.float
).reshape(-1,1)
output_transform = AffineInputTransform( 
    len(output_names), 
    coefficient=y.std(axis=0),
    offset=y.mean(axis=0)
)

if (min(X.mean(axis=0)) == 0):
    print("Mean value used for normalization is 0. This will lead to NaNs ",X.mean(axis=0))
if (min(X.std(axis=0)) == 0):
    print("RMS value used for normalization is 0. This will lead to NaNs ", X.std(axis=0))


In [None]:
# Apply normalization to the sim training set
norm_sim_training_set_df = sim_training_set_df.copy()
norm_sim_training_set_df[input_names] = input_transform( torch.tensor( sim_training_set_df[input_names].values ) )
norm_sim_training_set_df[output_names] = output_transform( torch.tensor( sim_training_set_df[output_names].values ) )

In [None]:
# Apply normalization to the training set from experiments
norm_expt_training_set_df = expt_training_set_df.copy()
norm_expt_training_set_df[input_names] = input_transform( torch.tensor( expt_training_set_df[input_names].values ) )
norm_expt_training_set_df[output_names] = output_transform( torch.tensor( expt_training_set_df[output_names].values ) )

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    norm_expt_training_set_df[input_names[0]], 
    norm_expt_training_set_df[input_names[-1]], 
    norm_expt_training_set_df[output_names[0]], c='b',alpha=0.3)
ax.scatter( 
    norm_sim_training_set_df[input_names[0]], 
    norm_sim_training_set_df[input_names[-1]], 
    norm_sim_training_set_df[output_names[0]], c='g',alpha=0.3)
ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

In [None]:
norm_expt_inputs_training = torch.tensor( norm_expt_training_set_df[input_names].values, dtype=torch.float)
norm_expt_outputs_training = torch.tensor( norm_expt_training_set_df[output_names].values, dtype=torch.float)
norm_sim_inputs_training = torch.tensor( norm_sim_training_set_df[input_names].values, dtype=torch.float)
norm_sim_outputs_training = torch.tensor( norm_sim_training_set_df[output_names].values, dtype=torch.float)

<h2> Train combined NN

In [None]:
calibrated_nn = CombinedNN(learning_rate=0.0005)
calibrated_nn.train_model(norm_sim_inputs_training, norm_sim_outputs_training,
                    norm_expt_inputs_training, norm_expt_outputs_training, num_epochs=40000)

calibrated_nn.plot_loss()
plt.yscale('log')

In [None]:
# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for training set
ax.scatter( 
    norm_sim_training_set_df[input_names[0]], 
    norm_sim_training_set_df[input_names[-1]], 
    norm_sim_training_set_df[output_names[0]], 
    label='sim training set', alpha=0.7)

sim_train_predictions = calibrated_nn.predict_sim(norm_sim_inputs_training)
ax.scatter( 
    norm_sim_training_set_df[input_names[0]], 
    norm_sim_training_set_df[input_names[-1]],
    sim_train_predictions.flatten(), 
    label='predictions 1', s=50, facecolors='none', edgecolors='r')

ax.view_init(elev=40., azim=40)
# Set labels and title
ax.set_title('Simulation Data v Predictions')
ax.set_xlabel(input_names[0])
ax.set_ylabel(input_names[-1])
ax.set_zlabel(output_names[0])

# Add legend
ax.legend()
# Show plot
plt.show()

<h2> Saving the Lume Model - TO do for combined NN

In [None]:
from lume_model.models import TorchModel
from lume_model.variables import ScalarVariable #, ScalarVariable
model = TorchModel

In [None]:
calibration_transform = AffineInputTransform( 
    len(output_names), 
    coefficient=calibrated_nn.sim_to_exp_calibration.weight.clone(), 
    offset=calibrated_nn.sim_to_exp_calibration.bias.clone() )

In [None]:


model = TorchModel(
    model=calibrated_nn,
    input_variables=[ ScalarVariable(**input_variables[k]) for k in input_variables.keys() ],
    output_variables=[ ScalarVariable(**output_variables[k]) for k in output_variables.keys() ],
    input_transformers=[input_transform],
    output_transformers=[calibration_transform,output_transform] # saving calibration before normalization
)

model.dump( file='base_simulation_model_with_transformers_new.yml', save_jit=True)

<h2> Load Model

In [None]:
loaded_model = torch.jit.load('base_simulation_model_with_transformers_new_model.jit')
loaded_model = loaded_model.to(torch.float)
plt.clf()
ax = plt.figure().add_subplot()
#ax.scatter(norm_sim_training_set_df['z_target_um'], norm_sim_training_set_df['n_protons'], label='Simulation training set')
ax.scatter(norm_expt_training_set_df['z_target_um'], norm_expt_training_set_df['n_protons'], label='Expt training set')
ax.scatter(norm_expt_test_set_df['z_target_um'], norm_expt_test_set_df['n_protons'], label='Expt test set')

with torch.no_grad():
    output = loaded_model.calibrate(loaded_model(norm_expt_inputs_training))
    numpy_array = output.numpy()
ax.scatter(norm_expt_training_set_df['z_target_um'], numpy_array, label=' expt train predictions', s=50, facecolors='none', edgecolors='m')

with torch.no_grad():
    output = loaded_model.calibrate(loaded_model(norm_expt_inputs_test))
    numpy_array = output.numpy()
ax.scatter(norm_expt_test_set_df['z_target_um'], numpy_array,label=' expt test predictions', s=50, facecolors='none', edgecolors='r')
plt.legend()
plt.show()# loaded_model.eval()