In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from SALib import ProblemSpec
import contextlib

# Set random seed for reproducibility
np.random.seed(42)

# Number of runs
n_runs = 2000

# Read Input Data
df_x = pd.read_csv(f'../ExploreModularCirc/Input/input_{n_runs}.csv')

# Select relevant inputs only
relevant_columns = []
for col in df_x.columns:
    relevant_columns.append(col)
    if col == 'T': break

# Select only first relevant inputs 
filtered_input = df_x[relevant_columns]

# Load all sheets as a dictionary of DataFrames
sheets_dict = pd.read_excel(f"../ExploreModularCirc/Outputs/Output_{n_runs}/combined_all_outputdata_{n_runs}.xlsx", sheet_name=None)

# Define the outputs to loop over
output_names = ['y_PC1', 'y_PC2', 'y_PC3', 'cardiac_output', 'EF', 'max_pressure']

# Directory to save results
output_dir = "RH_sensitivity_results"
os.makedirs(output_dir, exist_ok=True)

# Iterate over each target variable for modeling and sensitivity analysis
for emulator_name in output_names:
    Y = sheets_dict[emulator_name]

    # Initialize and train the linear regression model
    linear_model = LinearRegression()
    X_train, X_test, y_train, y_test = train_test_split(filtered_input, Y, test_size=0.2, random_state=42)
    linear_model.fit(X_train, y_train)

    # Make predictions and compute R² score
    y_pred = linear_model.predict(X_test)
    r2 = r2_score(y_test.iloc[:, 0], y_pred)
    print(f"R² score for {emulator_name}: {r2}")

    # Define problem spec for sensitivity analysis
    problem = ProblemSpec({
        'num_vars': len(relevant_columns),
        'names': relevant_columns,
        'bounds': filtered_input[relevant_columns].describe().loc[['min', 'max']].T.values,
        "outputs": [emulator_name],
    })

    # Sample inputs
    problem.sample_sobol(1024)
    X_samples = problem.samples

    # Import emulator parameters
    beta_matrix = np.array(linear_model.coef_)
    intercept = np.array(linear_model.intercept_).reshape(-1)

    # Compute emulator outputs for the sampled inputs
    num_samples = X_samples.shape[0]
    num_outputs = beta_matrix.shape[0]
    Y_samples = np.zeros((num_samples, num_outputs))
     

    for i in range(num_samples):
        Y_samples[i] = beta_matrix @ X_samples[i, :] + intercept
    
    Y_reshape = Y_samples.reshape(-1)
    
    # Set and analyze results
    problem.set_results(Y_reshape)
    sobol_indices = problem.analyze_sobol()
    
    print(sobol_indices)

    total, first, second = sobol_indices.to_df()
      
    # Sort the data in descending order of 'ST'
    total.sort_values('ST', inplace=True, ascending=False)

    # Assuming 'total' is a DataFrame with an index as labels and a column 'ST' for sizes
    labels = total.index
    sizes = total['ST']
    size_conf = total['ST_conf']


    # Extract the parameter indices and total Sobol indices (ST values)
    result_data = pd.DataFrame({
        "Parameter": labels,
        "ST": sizes,
        "ST_conf": size_conf
    })

    # Save to CSV file
    result_file = os.path.join(output_dir, f"sensitivity_{emulator_name}.csv")
    result_data.to_csv(result_file, index=False)
    print(f"Saved results for {emulator_name} to {result_file}")

print("All analyses completed.")

