In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import pickle
import json
import sys
import argparse
import torch
import random
import os

from pathlib import Path
# Add the parent directory to the system path
sys.path.append(str(Path().resolve().parent))

from causal_meta_learners.causal_inference_modeling import *
from causal_meta_learners.experiment_setup import *
from causal_meta_learners.survival_models import *

## Initialize the Arguments

In [2]:
from datetime import datetime
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")

# Simulating command-line arguments in Jupyter Notebook
sys.argv = [
    "notebook", 
    "--data_address", "../data_splits/mental-health-full/people_dict_unfiltered_expanded.pickle", 
    "--dataframe_address", "../data_generation/adherence_export_expanded.csv", 
    "--output_address", f"./results_ablation_{current_datetime}",
    "--num_repeats", "5"
]

parser = argparse.ArgumentParser(description="Run experiments with causal inference models.")
parser.add_argument("--data_address", type=str, required=True, help="Path to the data pickle file.")
parser.add_argument("--dataframe_address", type=str, required=True, help="Path to the dataframe CSV file.")
parser.add_argument("--output_address", type=str, required=True, help="Path to save the output JSON file.")
parser.add_argument("--horizon", type=int, default=12, help="Horizon in months.")
parser.add_argument("--non_adherence_threshold", type=float, default=1./3, help="Non-adherence threshold.")
parser.add_argument("--minimum_num_time_steps", type=int, default=4, help="Minimum number of time steps.")
parser.add_argument("--low_occurrency_threshold", type=int, default=2, help="Low occurrence threshold.")
parser.add_argument("--experiment_task", type=str, default="survival", help="Experiment task type.")
parser.add_argument("--experiment_type", type=str, default="Composite Event", help="Experiment type.")
parser.add_argument("--experiment_num", type=str, default="SA", help="Experiment number.")
parser.add_argument("--handle_imbalance", type=bool, default=True, help="Handle imbalance in the data.")
parser.add_argument("--num_repeats", type=int, default=5, help="Number of random seeds to use.")

args = parser.parse_args()

print(args.output_address)

./results_ablation_20250130041810


In [3]:
# Generate random seeds
np.random.seed(0)
random_seeds = np.random.randint(0, 10000, 10).tolist()
random_seeds = random_seeds[:args.num_repeats]
print(random_seeds)

[2732, 9845, 3264, 4859, 9225]


In [4]:
def set_all_seeds(seed):
  random.seed(seed)
  os.environ['PYTHONHASHSEED'] = str(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  torch.cuda.manual_seed(seed)

In [5]:
import pickle
import pandas as pd
import numpy as np
import os

def run_experiment(
    data_address, 
    dataframe_address, 
    output_address,
    horizon=12, 
    non_adherence_threshold=1./3, 
    minimum_num_time_steps=4, 
    low_occurrency_threshold=2, 
    experiment_task="survival", 
    experiment_type="Composite Event", 
    experiment_num="SA", 
    handle_imbalance=True,
    continuous_covariates_lst=None, 
    post_hoc_covariates_lst=None,
    ML_models=None,
    num_matches_list=[1, 2, 5, 10, 20, 50, 100],
    random_seeds=[42]
):
    # Set default covariates if not provided
    if continuous_covariates_lst is None:
        continuous_covariates_lst = [
            'age'
        ]
    if post_hoc_covariates_lst is None:
        post_hoc_covariates_lst = ['covered_by', 'covered_by_injectable']
    if ML_models is None:
        ML_models = [{"CoxPH": {'penalizer': 0.0001}}, {"RandomSurvivalForest": {}}, {"DeepSurv": {}}, {"DeepHit": {}}]
    
    # Load data
    with open(data_address, 'rb') as handle:
        people_dict = pickle.load(handle)
    data_df = pd.read_csv(dataframe_address)
    
    # Initialize or load existing results
    results_file = output_address + '.pickle'
    if os.path.exists(results_file):
        with open(results_file, 'rb') as f:
            results = pickle.load(f)
    else:
        results = {}
    
    # Save hyperparameters
    hyper_params = {
        "data_address": data_address,
        "dataframe_address": dataframe_address,
        "output_address": output_address,
        "horizon": horizon,
        "non_adherence_threshold": non_adherence_threshold,
        "minimum_num_time_steps": minimum_num_time_steps,
        "low_occurrency_threshold": low_occurrency_threshold,
        "experiment_task": experiment_task,
        "experiment_type": experiment_type,
        "experiment_num": experiment_num,
        "handle_imbalance": handle_imbalance,
        "continuous_covariates_lst": continuous_covariates_lst,
        "post_hoc_covariates_lst": post_hoc_covariates_lst,
        "ML_models": ML_models,
        "num_matches_list": num_matches_list,
        "random_seeds": random_seeds
    }
    results["hyper_params"] = hyper_params

    # Run experiments for each model and seed
    for ML_model in ML_models:
        model_name = list(ML_model.keys())[0]
        if model_name not in results:
            results[model_name] = {}

        for random_seed in random_seeds:
            if str(random_seed) in results[model_name]:
                print(f"Skipping {model_name} with random seed {random_seed} as it already exists.")
                continue

            set_all_seeds(random_seed)

            # Initialize patient data
            patient_data = PatientData(
                people_dict, data_df, 
                experiment_type=experiment_type,
                task=experiment_task, 
                horizon=horizon, 
                non_adherence_threshold=non_adherence_threshold, 
                minimum_num_time_steps=minimum_num_time_steps, 
                low_occurrency_threshold=low_occurrency_threshold,
                continuous_covariates_lst=continuous_covariates_lst,
                post_hoc_covariates_lst=post_hoc_covariates_lst,
                random_seed=random_seed
            )
            causal_data_dict = patient_data.get_causal_data_setup_for_each_experiment(experiment_num, random_state=random_seed)

            print(f"Running {model_name} with random seed {random_seed}...")
            results[model_name][str(random_seed)] = get_meta_learner_results(
                causal_data_dict, ML_model, handle_imbalance, 
                ['t-learner', 's-learner', 'matching'], num_matches_list, 
                experiment_num, experiment_type, 
                task=experiment_task, 
                task_setup_dict={"metric": "mean", "max_time": np.inf, "extrapolate_median": True},
                is_trimmed=False, 
                plot_results=False, 
                print_results=False, 
                verbose_output=False
            )
            # Save the causal data dict for the run
            results[model_name][str(random_seed)]['causal_data_dict'] = causal_data_dict
            print("-" * 100)

            # Save results incrementally
            temp_file = results_file + '.tmp'
            with open(temp_file, 'wb') as f:
                pickle.dump(results, f)
            os.replace(temp_file, results_file)  # Atomically replace the old file with the new one
    
    print(f"Results saved to {results_file}")

In [6]:
# current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
# args.output_address = f"./results_{current_datetime}"
# args.minimum_num_time_steps = 4
# print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               random_seeds=random_seeds)

Running CoxPH with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running CoxPH with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running CoxPH with random seed 3264...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) 

In [None]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 7
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               random_seeds=random_seeds)

./results_20250130042515
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running CoxPH with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running CoxPH with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
--------------------------------------------------

  delta = solve(
  risk_set += np.exp(xw[k])
  risk_set2 += np.exp(xw[k])
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(
  delta = solve(


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:4859] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running CoxPH with random seed 4859...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running CoxPH with random seed 9225

In [None]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 10
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               random_seeds=random_seeds)

./results_20250130043044
Running CoxPH with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running CoxPH with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running CoxPH with random seed 3264...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...

  delta = solve(
  risk_set2 += np.exp(xw[k])


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running RandomSurvivalForest with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running RandomSurvivalForest with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ..

In [None]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 13
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               ML_models=[{"RandomSurvivalForest": {}}, {"DeepSurv": {}}, {"DeepHit": {}}],
               random_seeds=random_seeds)

./results_20250130043637
[Random-Seed:2732] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Running RandomSurvivalForest with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:3264] Standard deviation of columns in

In [None]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 19
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               ML_models=[{"RandomSurvivalForest": {}}, {"DeepSurv": {}}, {"DeepHit": {}}],
               random_seeds=random_seeds)

./results_20250130044059
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
--------------------

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 9225...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv wit

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 9225...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 9225...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Results saved to ./results_20250130044059.pickle


In [None]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 25
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               ML_models=[{"RandomSurvivalForest": {}}, {"DeepSurv": {}}, {"DeepHit": {}}],
               random_seeds=random_seeds)

./results_20250130044201
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 2732...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:3264] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurviv

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:4859] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 4859...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 9225...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 2732...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:3264] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random see

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:4859] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 4859...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 9225...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 2732...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:3264] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:4859] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 4859...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9225] Standard deviation of columns in the total set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 9225...
Running t-learner ...


  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Results saved to ./results_20250130044201.pickle


In [12]:
current_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
args.output_address = f"./results_ablation_{current_datetime}"
args.minimum_num_time_steps = 37
print(args.output_address)

# Call the function with parsed arguments
run_experiment(data_address=args.data_address, dataframe_address=args.dataframe_address,output_address=args.output_address,
               horizon=args.horizon, non_adherence_threshold=args.non_adherence_threshold, minimum_num_time_steps=args.minimum_num_time_steps,
               low_occurrency_threshold=args.low_occurrency_threshold,
               experiment_task=args.experiment_task, experiment_type=args.experiment_type, experiment_num=args.experiment_num,
               handle_imbalance=args.handle_imbalance, 
               ML_models=[{"RandomSurvivalForest": {}}, {"DeepSurv": {}}, {"DeepHit": {}}],
               random_seeds=random_seeds)

./results_20250130044258
[Random-Seed:2732] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running RandomSurvivalForest with random seed 9845...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
--------------

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepSurv with rand

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:2732] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random seed 2732...
Running t-learner ...
Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
[Random-Seed:9845] Standard deviation of columns in the training set is 0 (for one of the treatment assignments). Rearranging the data...
Running DeepHit with random

  true_pos = cumsum_tp / cumsum_tp[-1]


Running s-learner ...
Running matching (K=1) ...
Running matching (K=2) ...
Running matching (K=5) ...
Running matching (K=10) ...
Running matching (K=20) ...
Running matching (K=50) ...
Running matching (K=100) ...
----------------------------------------------------------------------------------------------------
Results saved to ./results_20250130044258.pickle
