In [None]:
%matplotlib widget
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import pymongo
import os
import sys
import re
import time
import yaml

import botorch
from botorch.models.transforms.input import AffineInputTransform
from botorch.models import MultiTaskGP
from botorch.fit import fit_gpytorch_mll
from gpytorch.kernels import ScaleKernel, RBFKernel

import gpytorch
from gpytorch.mlls import ExactMarginalLogLikelihood

In [None]:
# Select experimental setup for which we are training a model
setup = "ip2"

In [None]:
# Open credential file for database
with open(os.path.join(os.getenv('HOME'), 'db.profile')) as f:
    db_profile = f.read()

# Connect to the MongoDB database with read-only access
db = pymongo.MongoClient(
    host="mongodb05.nersc.gov",
    username="bella_sf_ro",
    password=re.findall('SF_DB_READONLY_PASSWORD=(.+)', db_profile)[0],
    authSource="bella_sf")["bella_sf"]

# Extract data from the database as pandas dataframe
collection = db[setup]
df = pd.DataFrame( list(collection.find()) )

In [None]:
# Extract the name of inputs and outputs for this setup
with open("../../config/variables.yml") as f:
    yaml_dict = yaml.safe_load( f.read() )
input_variables = yaml_dict[setup]["input_variables"]
input_names = [ v['name'] for v in input_variables.values() ] 
output_variables = yaml_dict[setup]["output_variables"]
output_names = [ v['name'] for v in output_variables.values() ]

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    df[input_names[0]], 
    df[input_names[-1]], 
    df[output_names[0]], 
    c=df.experiment_flag, 
    alpha=0.3)

ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

<h2> Normalize with Affine Input Transformer

In [None]:
# Define the input and output normalizations

X = torch.tensor( df[ input_names ].values, dtype=torch.float )
input_transform = AffineInputTransform( 
    len(input_names), 
    coefficient=X.std(axis=0), 
    offset=X.mean(axis=0)
)

y = torch.tensor( df[ output_names ].values, dtype=torch.float )
output_transform = AffineInputTransform( 
    len(output_names), 
    coefficient=y.std(axis=0),
    offset=y.mean(axis=0)
)

if (min(X.mean(axis=0)) == 0):
    print("Mean value used for normalization is 0. This will lead to NaNs ",X.mean(axis=0))
if (min(X.std(axis=0)) == 0):
    print("RMS value used for normalization is 0. This will lead to NaNs ", X.std(axis=0))

In [None]:
# Apply normalization to the data set
norm_df = df.copy()
norm_df[input_names] = input_transform( torch.tensor( df[input_names].values ) )
norm_df[output_names] = output_transform( torch.tensor( df[output_names].values ) )

In [None]:
# Visualize the dimensional data
ax = plt.figure().add_subplot(projection='3d')

ax.scatter( 
    norm_df[input_names[0]], 
    norm_df[input_names[-1]], 
    norm_df[output_names[0]], 
    c=norm_df.experiment_flag, 
    alpha=0.3)

ax.view_init(elev=40., azim=40, roll=0)
plt.xlabel(input_names[0])
plt.ylabel(input_names[-1])

# Define a multi-input multi-task GP model

In [None]:
%%time
model = MultiTaskGP(
    torch.tensor( norm_df[['experiment_flag']+input_names].values ),
    torch.tensor( norm_df[output_names].values ),
    task_feature=0,
    covar_module=ScaleKernel(RBFKernel())
)
    
# Fit the model
mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_mll(mll)

cov = model.task_covar_module._eval_covar_matrix()
print( 'Correlation: ', cov[1,0]/torch.sqrt(cov[0,0]*cov[1,1]).item() )

In [None]:
exp_flag = 0

# Create a 3D plot
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# Scatter plot for simulation training set
ax.scatter( 
    norm_df[norm_df.experiment_flag==exp_flag][input_names[0]], 
    norm_df[norm_df.experiment_flag==exp_flag][input_names[-1]], 
    norm_df[norm_df.experiment_flag==exp_flag][output_names[0]],  
    alpha=0.7)

# Scatter plot for the predictions
predictions = model.posterior(
    torch.tensor( norm_df[norm_df.experiment_flag==exp_flag][input_names].values, dtype=torch.float)
).mean.detach()

ax.scatter( 
    norm_df[norm_df.experiment_flag==exp_flag][input_names[0]], 
    norm_df[norm_df.experiment_flag==exp_flag][input_names[-1]],
    predictions[:,exp_flag], 
    label='predictions', s=50, facecolors='none', edgecolors='r')

ax.view_init(elev=40., azim=40)
# Set labels and title
ax.set_xlabel(input_names[0])
ax.set_ylabel(input_names[-1])
ax.set_zlabel(output_names[0])

# Add legend
ax.legend()
# Show plot
plt.show()

In [None]:
fig, ax = plt.subplots()

# Plot data for fixed TOD
tod_max = norm_df['TOD_fs3'].max()
select = norm_df['TOD_fs3'] > 0.8*tod_max
ax.scatter(
    norm_df[input_names[-1]][select], 
    norm_df[output_names[0]][select], 
    c=norm_df['experiment_flag'][select]
)

# Plot predictions
zmin, zmax = norm_df['z_target_um'].min(), norm_df['z_target_um'].max()
z_test_array = torch.tensor (np.linspace(zmin,zmax,100).reshape(-1, 1), dtype=torch.float32) 
TOD_test_array  = torch.tensor (np.linspace(tod_max,tod_max,100).reshape(-1, 1), dtype=torch.float32) 
GVD_test_array  = torch.tensor (np.linspace(0.1,0.1,100).reshape(-1, 1), dtype=torch.float32) 
predictions = model.posterior( torch.cat([ TOD_test_array, GVD_test_array, z_test_array], dim=1) )
with torch.no_grad():
    m = predictions.mean
    l,u = predictions.mvn.confidence_region()

exp_flag = 1    
plt.plot( z_test_array.numpy(), m[:,exp_flag].detach().numpy() )
plt.fill_between( z_test_array.numpy().flatten(), l[:,exp_flag], u[:,exp_flag], alpha = 0.25, lw = 0, color='C0')

# Visualize predictions along with confidence interval for the first task (simulation data)

In [None]:
fig, ax = plt.subplots(figsize=(12,9))

ax.scatter(z_exp, protons_exp, label='Experimental data set', alpha=0.6, color = 'blue')
ax.scatter(z_sim, protons_sim, label='Simulation data set', alpha=0.6, color = 'orange')

plt.plot(z_test_array.numpy(),predictions.mean[:,0].detach().numpy(), label='Predictions', c='r' )
plt.fill_between(z_test_array.numpy().flatten(), lower_bound_sim, upper_bound_sim, color='orange', alpha=0.1, label='Confidence interval for the second task (simulation data)')

plt.title("Normalized predictions of number of protons")
plt.xlabel('z_target_um')
plt.ylabel('n_protons')
plt.savefig('./' + 'n_protons_predictions_split_zval__' + '.png')
plt.legend()

# Visualize predictions along with confidence interval for the second task (experimental data)

In [None]:
fig, ax = plt.subplots(figsize=(12,9))

ax.scatter(z_exp, protons_exp, label='Experimental data set', alpha=0.6, color = 'blue')
ax.scatter(z_sim, protons_sim, label='Simulation data set', alpha=0.6, color = 'orange')

plt.plot(z_test_array.numpy(),predictions.mean[:,1].detach().numpy(), label='Predictions', c='r' )
plt.fill_between(z_test_array.numpy().flatten(), lower_bound_exp, upper_bound_exp, color='lightblue', alpha=0.25, label='Confidence interval for the second task (experimental data)')

plt.title("Normalized predictions of number of protons")
plt.xlabel('z_target_um')
plt.ylabel('n_protons')
plt.savefig('./' + 'n_protons_predictions_split_zval__' + '.png')
plt.legend()