In [None]:
## This notebook includes simulation and experimental data
## in a database using PyMongo
## Author : Revathi Jambunathan
## Date : January, 2025

%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt

from Neural_Net_Classes import CombinedNN as CombinedNN
import torch
from botorch.models.transforms.input import AffineInputTransform
import pymongo
import pandas as pd
import os
import re
import yaml

In [None]:
# Select experimental setup for which we are training a model
setup = "ip2"

In [None]:
# Open credential file for database
with open(os.path.join(os.getenv('HOME'), 'db.profile')) as f:
    db_profile = f.read()

# Connect to the MongoDB database with read-only access
db = pymongo.MongoClient(
    host="mongodb05.nersc.gov",
    username="bella_sf_ro",
    password=re.findall('SF_DB_READONLY_PASSWORD=(.+)', db_profile)[0],
    authSource="bella_sf")["bella_sf"]

# Extract data from the database as pandas dataframe
collection=db[setup]
df = pd.DataFrame( list(collection.find()) )

In [None]:
# Extract the name of inputs and outputs for this setup
with open("../../dashboard/config/variables.yml") as f:
    yaml_dict = yaml.safe_load( f.read() )
input_variables = yaml_dict[setup]["input_variables"]
input_names = [ v['name'] for v in input_variables.values() ] 
output_variables = yaml_dict[setup]["output_variables"]
output_names = [ v['name'] for v in output_variables.values() ]

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    df[input_names[0]], 
    df[input_names[-1]], 
    df[output_names[0]], 
    c=df.experiment_flag, 
    alpha=0.3)

ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

<h2> Normalize with Affine Input Transformer

In [None]:
# Define the input and output normalizations

X = torch.tensor( df[ input_names ].values, dtype=torch.float )
input_transform = AffineInputTransform( 
    len(input_names), 
    coefficient=X.std(axis=0), 
    offset=X.mean(axis=0)
)

y = torch.tensor( df[ output_names ].values, dtype=torch.float )
output_transform = AffineInputTransform( 
    len(output_names), 
    coefficient=y.std(axis=0),
    offset=y.mean(axis=0)
)

if (min(X.mean(axis=0)) == 0):
    print("Mean value used for normalization is 0. This will lead to NaNs ",X.mean(axis=0))
if (min(X.std(axis=0)) == 0):
    print("RMS value used for normalization is 0. This will lead to NaNs ", X.std(axis=0))


In [None]:
# Apply normalization to the data set
norm_df = df.copy()
norm_df[input_names] = input_transform( torch.tensor( df[input_names].values ) )
norm_df[output_names] = output_transform( torch.tensor( df[output_names].values ) )

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    norm_df[input_names[0]], 
    norm_df[input_names[-1]], 
    norm_df[output_names[0]], 
    c=norm_df.experiment_flag, 
    alpha=0.3)

ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

In [None]:
norm_expt_inputs_training = torch.tensor( norm_df[norm_df.experiment_flag==1][input_names].values, dtype=torch.float)
norm_expt_outputs_training = torch.tensor( norm_df[norm_df.experiment_flag==1][output_names].values, dtype=torch.float)
norm_sim_inputs_training = torch.tensor( norm_df[norm_df.experiment_flag==0][input_names].values, dtype=torch.float)
norm_sim_outputs_training = torch.tensor( norm_df[norm_df.experiment_flag==0][output_names].values, dtype=torch.float)

<h2> Train combined NN

In [None]:
calibrated_nn = CombinedNN( len(input_names), len(output_names), learning_rate=0.0005)
calibrated_nn.train_model(
    norm_sim_inputs_training, norm_sim_outputs_training,
    norm_expt_inputs_training, norm_expt_outputs_training, 
    num_epochs=20000)

calibrated_nn.plot_loss()
plt.yscale('log')

In [None]:
# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for simulation training set
ax.scatter( 
    norm_df[norm_df.experiment_flag==0][input_names[0]], 
    norm_df[norm_df.experiment_flag==0][input_names[-1]], 
    norm_df[norm_df.experiment_flag==0][output_names[0]],  
    alpha=0.7)

# Scatter plot for the predictions
sim_train_predictions = calibrated_nn.predict_sim(norm_sim_inputs_training)
ax.scatter( 
    norm_df[norm_df.experiment_flag==0][input_names[0]], 
    norm_df[norm_df.experiment_flag==0][input_names[-1]],
    sim_train_predictions.flatten(), 
    label='predictions 1', s=50, facecolors='none', edgecolors='r')

ax.view_init(elev=40., azim=40)
# Set labels and title
ax.set_title('Simulation Data v Predictions')
ax.set_xlabel(input_names[0])
ax.set_ylabel(input_names[-1])
ax.set_zlabel(output_names[0])

# Add legend
ax.legend()
# Show plot
plt.show()

<h2> Saving the Lume Model - TO do for combined NN

In [None]:
!pip uninstall --yes lume-model

In [None]:
!pip install git+https://github.com/slaclab/lume-model.git@0c62723b04b78fe858a9b349433b5aacfd267f50

In [None]:
from lume_model.models import TorchModel
from lume_model.variables import ScalarVariable

In [None]:
calibration_transform = AffineInputTransform( 
    len(output_names), 
    coefficient=calibrated_nn.sim_to_exp_calibration.weight.clone(), 
    offset=calibrated_nn.sim_to_exp_calibration.bias.clone() )

In [None]:
# Fix mismatch in name between the config file and the expected lume-model format
for k in input_variables:
    print(input_variables[k])
    input_variables[k]['default_value'] = input_variables[k]['default']
    del input_variables[k]['default']  

In [None]:
model = TorchModel(
    model=calibrated_nn,
    input_variables=[ ScalarVariable(**input_variables[k]) for k in input_variables.keys() ],
    output_variables=[ ScalarVariable(**output_variables[k]) for k in output_variables.keys() ],
    input_transformers=[input_transform],
    output_transformers=[calibration_transform,output_transform] # saving calibration before normalization
)
model.dump( file=os.path.join('saved_models', setup+'.yml'), save_jit=True )

<h2> Testing loading the model, using the interface that the dashboard uses

In [None]:
os.chdir('saved_models')
with open(setup+'.yml') as f:
    loaded_model = TorchModel(f.read());

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    expt_training_set_df[input_names[0]], 
    expt_training_set_df[input_names[-1]], 
    expt_training_set_df[output_names[0]], c='g',alpha=0.3)

predictions = loaded_model.evaluate(
    { name: torch.tensor(expt_training_set_df[name].values) for name in input_names }    
)
ax.scatter( 
    expt_training_set_df[input_names[0]], 
    expt_training_set_df[input_names[-1]],
    predictions[output_names[0]], 
    label='predictions 1', s=50, facecolors='none', edgecolors='r')

ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])