In [1]:
import pandas as pd
import numpy as np
import os
import GPy
import contextlib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

### Read in synthetic data

In [2]:
folder_path = '/Users/pmzff/Documents/GitHub/ModularCircFF/Emulation'

In [3]:
# Read Input Data
df_x = pd.read_csv(f'{folder_path}/Input/input_100.csv')

# Select varying inputs only
#columns_with_multiple_values = df_x.nunique() > 1
#filtered_input = df_x.loc[:, columns_with_multiple_values]

# Select only first 5 inputs 
filtered_input = df_x.iloc[:,:5]

dataframes = {}

# Read PCA data
for i in range(3):
    df_name = f'y_PC{i+1}'  # Create the dataframe name
    dataframes[df_name] = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/PC{i+1}.csv')  # Read and store the dataframe
    
PC1 = dataframes['y_PC1']
PC2 = dataframes['y_PC2']
PC3 = dataframes['y_PC3']

df_pressure = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/pressure_traces_r_pat/all_pressure_traces.csv')

CO = df_pressure.iloc[:,100:101]
dt = df_pressure['dt']
mean_press = df_pressure.iloc[:,:100].mean(axis=1).to_frame(name='mean_press')
max_press = df_pressure.iloc[:,:100].max(axis=1).to_frame(name='max_press')
min_press = df_pressure.iloc[:,:100].min(axis=1).to_frame(name='min_press')


### Take a look and input true input X 

In [4]:
filtered_input

Unnamed: 0,# svn.c,pat.r,pat.c,rv.E_act,T
0,27.687002,0.443723,3.278327,1.623082,0.478369
1,10.935100,0.243385,4.286711,2.717826,0.805784
2,18.671167,0.356841,2.549143,0.822205,0.950488
3,24.992088,0.155291,5.101243,3.333386,0.614289
4,22.700741,0.326891,3.862239,2.779415,0.551895
...,...,...,...,...,...
95,27.406567,0.196824,3.333852,1.704984,0.646159
96,26.960391,0.395660,2.043640,0.645535,0.810764
97,10.379113,0.271468,5.547834,3.156299,0.473453
98,17.956148,0.327547,3.782205,1.440794,0.618088


### Create vector of observations, y

In [5]:
combined_outptut_data = pd.concat([PC1, PC2, PC3, CO, max_press], axis=1)
combined_outptut_data

Unnamed: 0,PC1,PC2,PC3,CO,max_press
0,14.672208,-1.820989,-0.242595,3.562928,31.023888
1,-4.957084,0.857850,-1.151066,3.927774,22.766915
2,1.729320,2.687650,3.080620,3.299307,28.992385
3,-17.616349,-1.624573,-0.841759,3.877907,14.946666
4,4.124239,-0.868110,-0.813737,3.838552,25.967403
...,...,...,...,...,...
95,-13.008117,0.123681,0.457627,3.777376,18.757382
96,4.072604,2.115061,3.988298,3.206075,30.445996
97,-0.066577,-2.343926,-1.035500,4.117032,22.751738
98,3.896077,-1.031787,0.695567,3.801754,26.389662


### Import linear models for each output we are calibrating

In [6]:
# Read in dataframe of fitted linear models
model_dataframes = pd.read_pickle(f'{folder_path}/Outputs/Emulators/linear_models_and_r2_scores.csv')

selected_rows = model_dataframes.loc[['y_PC1', 'y_PC2', 'y_PC3', 'CO', 'max_press']]
selected_rows

Unnamed: 0,R2_Score,Model
y_PC1,0.987922,LinearRegression()
y_PC2,0.882277,LinearRegression()
y_PC3,0.647736,LinearRegression()
CO,0.9409,LinearRegression()
max_press,0.980859,LinearRegression()


### Conduct the inverse problem i.e. calibrate the model for X
Remember this is the minimum norm solution of X

In [50]:
# Build beta matrix (d * p, where d is dimension of y_obs and p is dinemnsion of X)
beta_matrix = []
intercept = []

for index, row_entry in selected_rows.iterrows():
    model = row_entry['Model']
    coeffs = model.coef_[0]
    b0 = model.intercept_

    beta_matrix.append(coeffs)
    intercept.append(b0)

# Convert the list to a NumPy array
beta_matrix = np.array(beta_matrix)
intercept = np.array(intercept)

# Select observation and reshape to be (d, 1)
Y_obs = np.array(combined_outptut_data.T[0])
Y_obs = Y_obs.reshape(len(Y_obs), 1)

# Scale observation by intercepts of models
Y_scaled = Y_obs - intercept

# Compute the pseudo-inverse of the coefficient matrix
beta_inv = np.linalg.inv(beta_matrix.T @ beta_matrix) @ beta_matrix.T
x_hat = beta_inv @ Y_scaled


array([[-9.69911218e+01],
       [-2.57687627e-02],
       [ 7.14121553e+00],
       [-2.64981170e+00],
       [ 1.13181921e+00]])

In [16]:
# Step 1: Convert the NumPy array to a DataFrame
x_hat_df = pd.DataFrame(x_hat, index=filtered_input[:1].T.index)

# Step 2: Concatenate the two DataFrames horizontally
result = pd.concat([filtered_input[:1].T, x_hat_df], axis=1)

# Step 3: Rename the columns
result.columns = ['x_calibrated', 'x_true']

result


Unnamed: 0,x_calibrated,x_true
# svn.c,27.687002,-69.30412
pat.r,0.443723,0.417955
pat.c,3.278327,10.419542
rv.E_act,1.623082,-1.02673
T,0.478369,1.610189


In [17]:
result.to_csv('multiple_output_calibration_result_x.csv')

### Check that calibrated X can recover original observational data.  

In [None]:
# Feed calibrated x_hat back into linear model 
y_calibrated = (beta_matrix @ x_hat) + intercept 

y_compare = np.hstack([Y_obs, y_calibrated])
y_compare = pd.DataFrame(y_compare)
y_compare.columns = ("y_true", "y_calibrated")

y_compare.to_csv('multiple_output_calibration_result_y.csv', index=False)

In [None]:
y_compare

Calibrate multiple observations and calculate MSE

In [91]:
# Build beta matrix (d * p, where d is dimension of y_obs and p is dinemnsion of X)
beta_matrix = []
intercept = []

for index, row_entry in selected_rows.iterrows():
    model = row_entry['Model']
    coeffs = model.coef_[0]
    b0 = model.intercept_

    beta_matrix.append(coeffs)
    intercept.append(b0)

# Convert the list to a NumPy array
beta_matrix = np.array(beta_matrix)
intercept = np.array(intercept)

x_differences = []

for row in range(10):
 # Select observation and reshape to be (d, 1)
 Y_obs = np.array(combined_outptut_data.T[row])
 Y_obs = Y_obs.reshape(len(Y_obs), 1)

 # Scale observation by intercepts of models
 Y_scaled = Y_obs - intercept

 # Compute the pseudo-inverse of the coefficient matrix
 beta_inv = np.linalg.inv(beta_matrix.T @ beta_matrix) @ beta_matrix.T
 x_hat = beta_inv @ Y_scaled

 # Compute squared-diff between true and calibrated x
 true = np.array(filtered_input.loc[row].T)
 true = true.reshape(5,1)
 diff = (x_hat - true)**2

 # Append arrary
 x_differences.append(diff)

# Compute MSE
mse_x = np.mean(np.hstack(x_differences), axis=1)

mse_x_df = pd.DataFrame(mse_x)
mse_x_df.columns = ['MSE']
mse_x_df.index = filtered_input.columns
mse_x_df.to_csv('MSE_multi_output_x.csv')

mse_x_df

Unnamed: 0,MSE
# svn.c,4581.203286
pat.r,0.000582
pat.c,22.615799
rv.E_act,4.106932
T,0.663989
