In [1]:
from prepare_data import split_patient_samples, extract_data_and_labels, preprocess_data
from select_features import forward_feature_selection
from train_model import train_model
from confidence_interval import bootstrap_confidence_interval
from save_model import save_model
from mse_with_constant_prediction import calculate_baseline_mse
from utils import update_mse_with_confidence_interval
from sklearn.metrics import mean_squared_error
from visualize_results import residuals_plot, actual_vs_predicted_plot

# Model selection
model_type = 'random_forest'  # Options: 'cnn', 'lstm', 'cnn_lstm', 'random_forest', 'gradient_boosting'

# Load the dataset
data_splits = split_patient_samples()

# Forward feature selection
selected_features, best_overall_config = forward_feature_selection(data_splits, model_type)

# Final model training with selected features and best configuration
all_data, y = extract_data_and_labels(data_splits, features_to_include=selected_features)
X_train, X_test, y_train, y_test = preprocess_data(all_data, y, model_type)
final_model = train_model(best_overall_config, X_train, y_train)

# Save the final model
save_model(final_model, model_type)

# Eventually, load the saved model

# Generate predictions
y_pred = final_model.predict(X_test)

# Calculate MSE and baseline MSE for comparison
mse = mean_squared_error(y_test, y_pred)
baseline_mse = calculate_baseline_mse(y_train, y_test)

# Bootstrap confidence interval for the MSE
mse_ci = bootstrap_confidence_interval(final_model, X_test, y_test)

# Print the baseline MSE, MSE, and MSE confidence interval
print(f"Baseline MSE (predicting mean): {baseline_mse}")
print(f'MSE: {mse}')
print(f'MSE confidence interval: {mse_ci}')

# Save the results to a CSV file
update_mse_with_confidence_interval(model_type, mse, mse_ci)

# Visualization of the results
residuals_plot(y_test, y_pred, model_type)
actual_vs_predicted_plot(y_test, y_pred, model_type)

2023-11-25 20:48:59,771	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2023-11-25 20:48:59,878	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


X_train shape: (78, 256)
X_test shape: (20, 256)
y_train shape: 78
y_test shape: 20


2023-11-25 20:49:02,586	INFO worker.py:1673 -- Started a local Ray instance.
2023-11-25 20:49:03,774	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2023-11-25 20:49:03,776	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-11-25 20:49:14
Running for:,00:00:10.50
Memory:,14.2/63.9 GiB

Trial name,status,loc,max_depth,min_samples_leaf,min_samples_split,model_type,n_estimators,iter,total time (s),mean_squared_error
lambda_fa535_00000,TERMINATED,127.0.0.1:7028,20,2,10,random_forest,300,1,2.484,201.989
lambda_fa535_00001,TERMINATED,127.0.0.1:23788,20,1,5,random_forest,200,1,2.359,210.854
lambda_fa535_00002,TERMINATED,127.0.0.1:18184,10,2,10,random_forest,200,1,1.735,202.607
lambda_fa535_00003,TERMINATED,127.0.0.1:26096,30,2,2,random_forest,300,1,3.252,203.461
lambda_fa535_00004,TERMINATED,127.0.0.1:7120,20,4,2,random_forest,200,1,1.62,192.671
lambda_fa535_00005,TERMINATED,127.0.0.1:29444,30,2,2,random_forest,300,1,3.143,203.461
lambda_fa535_00006,TERMINATED,127.0.0.1:24580,20,4,10,random_forest,100,1,0.787,196.886
lambda_fa535_00007,TERMINATED,127.0.0.1:1524,20,2,10,random_forest,100,1,0.899001,207.612
lambda_fa535_00008,TERMINATED,127.0.0.1:33864,20,2,5,random_forest,200,1,2.121,204.215
lambda_fa535_00009,TERMINATED,127.0.0.1:34460,20,4,2,random_forest,300,1,2.392,193.288


Trial name,mean_squared_error
lambda_fa535_00000,201.989
lambda_fa535_00001,210.854
lambda_fa535_00002,202.607
lambda_fa535_00003,203.461
lambda_fa535_00004,192.671
lambda_fa535_00005,203.461
lambda_fa535_00006,196.886
lambda_fa535_00007,207.612
lambda_fa535_00008,204.215
lambda_fa535_00009,193.288


[36m(<lambda> pid=24580)[0m Average MSE: 196.88600472065937 +/- 189.89136071644


2023-11-25 20:49:14,313	INFO tune.py:1047 -- Total run time: 10.54 seconds (10.49 seconds for the tuning loop).
2023-11-25 20:49:14,649	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


X_train shape: (78, 256)
X_test shape: (20, 256)
y_train shape: 78
y_test shape: 20


0,1
Current time:,2023-11-25 20:49:28
Running for:,00:00:13.97
Memory:,14.1/63.9 GiB

Trial name,status,loc,max_depth,min_samples_leaf,min_samples_split,model_type,n_estimators,iter,total time (s),mean_squared_error
lambda_00ce2_00000,TERMINATED,127.0.0.1:9648,30,4,2,random_forest,100,1,1.79851,169.906
lambda_00ce2_00001,TERMINATED,127.0.0.1:27856,10,4,10,random_forest,100,1,1.84535,170.202
lambda_00ce2_00002,TERMINATED,127.0.0.1:25248,20,2,10,random_forest,300,1,5.76452,172.426
lambda_00ce2_00003,TERMINATED,127.0.0.1:8716,20,2,2,random_forest,200,1,5.00452,172.041
lambda_00ce2_00004,TERMINATED,127.0.0.1:8332,30,2,5,random_forest,300,1,6.76753,172.548
lambda_00ce2_00005,TERMINATED,127.0.0.1:32936,20,2,5,random_forest,300,1,6.71352,172.548
lambda_00ce2_00006,TERMINATED,127.0.0.1:17452,30,1,5,random_forest,200,1,5.36752,171.199
lambda_00ce2_00007,TERMINATED,127.0.0.1:25372,30,1,2,random_forest,200,1,6.74053,171.28
lambda_00ce2_00008,TERMINATED,127.0.0.1:26960,10,4,2,random_forest,200,1,3.53451,168.037
lambda_00ce2_00009,TERMINATED,127.0.0.1:31524,30,4,2,random_forest,200,1,3.61351,168.037


Trial name,mean_squared_error
lambda_00ce2_00000,169.906
lambda_00ce2_00001,170.202
lambda_00ce2_00002,172.426
lambda_00ce2_00003,172.041
lambda_00ce2_00004,172.548
lambda_00ce2_00005,172.548
lambda_00ce2_00006,171.199
lambda_00ce2_00007,171.28
lambda_00ce2_00008,168.037
lambda_00ce2_00009,168.037


[36m(<lambda> pid=27856)[0m Average MSE: 170.20249513960556 +/- 161.41568280211285[32m [repeated 10x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


2023-11-25 20:49:28,634	INFO tune.py:1047 -- Total run time: 13.98 seconds (13.96 seconds for the tuning loop).
2023-11-25 20:49:29,402	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


X_train shape: (78, 256)
X_test shape: (20, 256)
y_train shape: 78
y_test shape: 20


0,1
Current time:,2023-11-25 20:49:46
Running for:,00:00:16.74
Memory:,13.9/63.9 GiB

Trial name,status,loc,max_depth,min_samples_leaf,min_samples_split,model_type,n_estimators,iter,total time (s),mean_squared_error
lambda_09995_00000,TERMINATED,127.0.0.1:9644,20,2,2,random_forest,200,1,5.00702,164.646
lambda_09995_00001,TERMINATED,127.0.0.1:4960,20,4,2,random_forest,100,1,1.98651,152.67
lambda_09995_00002,TERMINATED,127.0.0.1:27476,10,2,2,random_forest,100,1,2.88551,165.611
lambda_09995_00003,TERMINATED,127.0.0.1:13040,10,4,5,random_forest,200,1,3.44251,154.743
lambda_09995_00004,TERMINATED,127.0.0.1:5816,10,1,2,random_forest,300,1,9.7713,170.378
lambda_09995_00005,TERMINATED,127.0.0.1:26560,30,4,10,random_forest,100,1,1.712,152.887
lambda_09995_00006,TERMINATED,127.0.0.1:7452,30,4,10,random_forest,100,1,1.78951,152.887
lambda_09995_00007,TERMINATED,127.0.0.1:24168,10,1,10,random_forest,100,1,2.62851,163.443
lambda_09995_00008,TERMINATED,127.0.0.1:14068,20,1,10,random_forest,200,1,4.63602,164.423
lambda_09995_00009,TERMINATED,127.0.0.1:31604,10,2,5,random_forest,100,1,2.80851,164.987


Trial name,mean_squared_error
lambda_09995_00000,164.646
lambda_09995_00001,152.67
lambda_09995_00002,165.611
lambda_09995_00003,154.743
lambda_09995_00004,170.378
lambda_09995_00005,152.887
lambda_09995_00006,152.887
lambda_09995_00007,163.443
lambda_09995_00008,164.423
lambda_09995_00009,164.987


[36m(<lambda> pid=26560)[0m Average MSE: 152.8873344286403 +/- 149.40513110066524[32m [repeated 10x across cluster][0m


2023-11-25 20:49:46,150	INFO tune.py:1047 -- Total run time: 16.75 seconds (16.73 seconds for the tuning loop).


[36m(<lambda> pid=5816)[0m Average MSE: 170.37757425768325 +/- 146.40150846844097[32m [repeated 9x across cluster][0m


2023-11-25 20:49:46,568	INFO tune.py:586 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


X_train shape: (78, 256)
X_test shape: (20, 256)
y_train shape: 78
y_test shape: 20


0,1
Current time:,2023-11-25 20:50:01
Running for:,00:00:15.25
Memory:,14.9/63.9 GiB

Trial name,status,loc,max_depth,min_samples_leaf,min_samples_split,model_type,n_estimators,iter,total time (s),mean_squared_error
lambda_13d4b_00002,RUNNING,127.0.0.1:11428,20,1,2,random_forest,300,,,
lambda_13d4b_00005,RUNNING,127.0.0.1:27172,20,1,2,random_forest,300,,,
lambda_13d4b_00006,RUNNING,127.0.0.1:6108,10,1,2,random_forest,300,,,
lambda_13d4b_00008,RUNNING,127.0.0.1:26892,30,1,5,random_forest,300,,,
lambda_13d4b_00000,TERMINATED,127.0.0.1:34652,10,4,10,random_forest,300,1.0,5.204,165.168
lambda_13d4b_00001,TERMINATED,127.0.0.1:31720,30,2,2,random_forest,200,1.0,5.537,163.384
lambda_13d4b_00003,TERMINATED,127.0.0.1:24988,10,2,2,random_forest,100,1.0,3.037,162.571
lambda_13d4b_00004,TERMINATED,127.0.0.1:25964,20,1,2,random_forest,100,1.0,4.235,157.338
lambda_13d4b_00007,TERMINATED,127.0.0.1:20124,20,1,10,random_forest,100,1.0,2.764,153.914
lambda_13d4b_00009,TERMINATED,127.0.0.1:23336,30,4,5,random_forest,200,1.0,3.89,165.021


Trial name,mean_squared_error
lambda_13d4b_00000,165.168
lambda_13d4b_00001,163.384
lambda_13d4b_00003,162.571
lambda_13d4b_00004,157.338
lambda_13d4b_00007,153.914
lambda_13d4b_00009,165.021


[36m(<lambda> pid=20124)[0m Average MSE: 153.91406363338876 +/- 145.68796259504234
[36m(<lambda> pid=24988)[0m Average MSE: 162.57139066261024 +/- 150.36131112563322
