In [None]:
import pandas as pd
import numpy as np
import os
import GPy
import contextlib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [None]:
folder_path = '/Users/pmzff/Documents/GitHub/ModularCircFF/Emulation'

In [None]:
# Read Input Data
df_x = pd.read_csv(f'{folder_path}/Input/input_100.csv')

# Select varying inputs only
#columns_with_multiple_values = df_x.nunique() > 1
#filtered_input = df_x.loc[:, columns_with_multiple_values]

# Select only first 5 inputs 
filtered_input = df_x.iloc[:,:5]

dataframes = {}

# Read PCA data
for i in range(3):
    df_name = f'y_PC{i+1}'  # Create the dataframe name
    dataframes[df_name] = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/PC{i+1}.csv')  # Read and store the dataframe
    dataframes[f'all_{df_name}'] =  pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/all_PC{i+1}.csv') 
    dataframes[f'CO_{df_name}'] =  pd.read_csv(f'{folder_path}/Outputs/Out_fixed/PCA/CO_PC{i+1}.csv') 

y_PC1 = dataframes['y_PC1']
y_PC2 = dataframes['y_PC2']
y_PC3 = dataframes['y_PC3']

all_y_PC1 = dataframes['all_y_PC1']
all_y_PC2 = dataframes['all_y_PC2']
all_y_PC3 = dataframes['all_y_PC3']

CO_y_PC1 = dataframes['CO_y_PC1']
CO_y_PC2 = dataframes['CO_y_PC2']
CO_y_PC3 = dataframes['CO_y_PC3']

df_pressure = pd.read_csv(f'{folder_path}/Outputs/Out_fixed/pressure_traces_r_pat/all_pressure_traces.csv')
cardiac_output = df_pressure.iloc[:,100:101]

mean_press = df_pressure.iloc[:,:100].mean(axis=1).to_frame(name='mean_press')
max_press = df_pressure.iloc[:,:100].max(axis=1).to_frame(name='max_press')
min_press = df_pressure.iloc[:,:100].min(axis=1).to_frame(name='min_press')

### Fit the linear regression model to the data

In [None]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Example input and output data
X = filtered_input.copy()
Y = all_y_PC1.copy()

# Initialize the model
model = LinearRegression()

#for i in list(range(100)):
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)



In [None]:
Y

In [None]:
# Fit the model to the training data
model.fit(X_train, y_train)

coeffs = model.coef_[0][None].T
intercept = model.intercept_

print(f"Coefficient: {coeffs}")
print(f"Intercept: {intercept}")

# Scale y_obs by beta_0
scale_y_obs = y_test - intercept

print("beta.shape:", coeffs.shape)
print(f"y shape: {y_test.shape}")

### $R^2$ of Linear Emulator



In [None]:
# Predict the output for the test data
y_pred = model.predict(X_test)

# Compute R² score for the predictions versus actual test data
r2 = r2_score(y_test, y_pred)

# Output predictions and R² score
#print("Predictions on test set:", y_pred)
print("R² score:", r2)



### Calculate $x^*$ using $$x^*=(y_{obs}-\beta_0)(\beta^T \beta)^{-1} \beta^T$$

In [None]:
prod = np.dot(coeffs.T,coeffs)
beta_inv = np.linalg.inv(coeffs.T @ coeffs) @ coeffs.T
x_hat = scale_y_obs @ beta_inv


Remember that the psuedo inverse will only find the minimum norm solution

### Print dataframe containing X and $x^*$ (minimum norm solution)

In [None]:
x_hat_headers = [f'{col}_\u0302' for col in X_test.columns]
x_hat.columns = x_hat_headers


df = pd.concat([X_test, x_hat], axis=1)
df

### Calculate output of emulator using $x^*$

$$ y_{obs} = X\beta + \beta_0 $$

In [None]:
y_calibrated = (x_hat @ coeffs) + intercept 

In [None]:
y_compare = pd.concat([y_test, y_calibrated], axis=1)
y_compare.columns = ("y_true", "y_calibrated")
y_compare

### Mean squared Error

In [None]:
np.mean((y_compare.loc[:, 'y_calibrated'] - y_compare.loc[:, 'y_true'])**2)