In [1]:
import pandas as pd
import numpy as np
import os
import GPy
import contextlib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [2]:
folder_path = '../Emulation'

In [3]:
# Read Input Data
df_x = pd.read_csv(f'{folder_path}/Input/input_100.csv')

# Select varying inputs only
#columns_with_multiple_values = df_x.nunique() > 1
#filtered_input = df_x.loc[:, columns_with_multiple_values]

# Select only first 5 inputs 
filtered_input = df_x.iloc[:,:5]

dataframes = {}

# Read PCA data
for i in range(3):
    df_name = f'y_PC{i+1}'  # Create the dataframe name
    dataframes[df_name] = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/PC{i+1}.csv')  # Read and store the dataframe
    dataframes[f'all_{df_name}'] =  pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/all_PC{i+1}.csv') 
    dataframes[f'CO_{df_name}'] =  pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/CO_PC{i+1}.csv') 

y_PC1 = dataframes['y_PC1']
y_PC2 = dataframes['y_PC2']
y_PC3 = dataframes['y_PC3']

all_y_PC1 = dataframes['all_y_PC1']
all_y_PC2 = dataframes['all_y_PC2']
all_y_PC3 = dataframes['all_y_PC3']

CO_y_PC1 = dataframes['CO_y_PC1']
CO_y_PC2 = dataframes['CO_y_PC2']
CO_y_PC3 = dataframes['CO_y_PC3']

df_pressure = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/pressure_traces_r_pat/all_pressure_traces.csv')
cardiac_output = df_pressure.iloc[:,100:101]

mean_press = df_pressure.iloc[:,:100].mean(axis=1).to_frame(name='mean_press')
max_press = df_pressure.iloc[:,:100].max(axis=1).to_frame(name='max_press')
min_press = df_pressure.iloc[:,:100].min(axis=1).to_frame(name='min_press')

### Fit the linear regression model to the data

In [4]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Example input and output data
X = filtered_input.copy()
Y = all_y_PC1.copy()

# Initialize the model
model = LinearRegression()

#for i in list(range(100)):
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)



In [5]:
Y

Unnamed: 0,PC1
0,14.700361
1,-4.983016
2,1.709376
3,-17.609176
4,4.133606
...,...
95,-13.002847
96,4.074903
97,-0.057281
98,3.898398


In [6]:
# Fit the model to the training data
model.fit(X_train, y_train)

coeffs = model.coef_[0][None].T
intercept = model.intercept_

print(f"Coefficient: {coeffs}")
print(f"Intercept: {intercept}")

# Scale y_obs by beta_0
scale_y_obs = y_test - intercept

print("beta.shape:", coeffs.shape)
print(f"y shape: {y_test.shape}")

Coefficient: [[ -0.30368849]
 [101.86174671]
 [ -1.49538539]
 [  1.13034543]
 [-11.22474474]]
Intercept: [-14.25576199]
beta.shape: (5, 1)
y shape: (20, 1)


### $R^2$ of Linear Emulator



In [7]:
# Predict the output for the test data
y_pred = model.predict(X_test)

# Compute R² score for the predictions versus actual test data
r2 = r2_score(y_test, y_pred)

# Output predictions and R² score
#print("Predictions on test set:", y_pred)
print("R² score:", r2)



R² score: 0.9879708429634575


### Calculate $x^*$ using $$x^*=(y_{obs}-\beta_0)(\beta^T \beta)^{-1} \beta^T$$

In [8]:
prod = np.dot(coeffs.T,coeffs)
beta_inv = np.linalg.inv(coeffs.T @ coeffs) @ coeffs.T
x_hat = scale_y_obs @ beta_inv


Remember that the psuedo inverse will only find the minimum norm solution

### Print dataframe containing X and $x^*$ (minimum norm solution)

In [9]:
x_hat_headers = [f'{col}_\u0302' for col in X_test.columns]
x_hat.columns = x_hat_headers


df = pd.concat([X_test, x_hat], axis=1)
df

Unnamed: 0,# svn.c,pat.r,pat.c,rv.E_act,T,# svn.c_̂,pat.r_̂,pat.c_̂,rv.E_act_̂,T_̂
83,24.3055,0.18222,1.987147,2.302793,0.71903,-1.7e-05,0.005671,-8.3e-05,6.3e-05,-0.000625
53,17.038387,0.181593,5.68716,0.760623,0.800493,4.9e-05,-0.016512,0.000242,-0.000183,0.00182
70,14.065468,0.350416,3.446892,1.257576,0.479755,-0.00067,0.224601,-0.003297,0.002492,-0.02475
45,20.214281,0.213802,2.072494,2.498254,0.666226,-0.000178,0.059666,-0.000876,0.000662,-0.006575
44,24.133311,0.336774,5.574529,1.393334,0.898538,-0.00033,0.110834,-0.001627,0.00123,-0.012213
39,30.071423,0.212066,4.76963,2.060813,0.483559,-8.1e-05,0.027199,-0.000399,0.000302,-0.002997
22,14.75206,0.376787,4.664729,1.479225,0.8645,-0.000533,0.178631,-0.002622,0.001982,-0.019684
80,26.757012,0.455507,4.017807,1.126331,0.863622,-0.00065,0.217996,-0.0032,0.002419,-0.024022
10,16.843858,0.447749,4.127918,1.859428,0.671982,-0.000829,0.278036,-0.004082,0.003085,-0.030638
0,27.687002,0.443723,3.278327,1.623082,0.478369,-0.000837,0.280762,-0.004122,0.003116,-0.030939


### Calculate output of emulator using $x^*$

$$ y_{obs} = X\beta + \beta_0 $$

In [10]:
y_calibrated = (x_hat @ coeffs) + intercept 

In [11]:
y_compare = pd.concat([y_test, y_calibrated], axis=1)
y_compare.columns = ("y_true", "y_calibrated")
y_compare

Unnamed: 0,y_true,y_calibrated
83,-13.670844,-13.670844
53,-15.958746,-15.958746
70,8.90821,8.90821
45,-8.102175,-8.102175
44,-2.824968,-2.824968
39,-11.450656,-11.450656
22,4.167202,4.167202
80,8.227045,8.227045
10,14.419235,14.419235
0,14.700361,14.700361


### Mean squared Error

In [12]:
np.mean((y_compare.loc[:, 'y_calibrated'] - y_compare.loc[:, 'y_true'])**2)

3.540259831212172e-29