In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys
from scipy.stats import norm

sys.path.append('../model_generator')
sys.path.append('../patient_sample_generator')
sys.path.append('../ebm')

from model_generator.model_factory import ModelFactory
from patient_sample_generator.sample_generator import SampleGenerator
from ebm.ebm_transformer import EBMModel

In [4]:
# Function to run the experiment
def run_single_experiment(model_type, params, n_patients, random_state, use_prior):
    """
    Runs a single experiment with the specified parameters.
    
    Parameters:
        model_type (str): The model type ('acp', 'logistic', 'transition').
        params (dict): Parameters for the model.
        n_patients (int): Number of patients for sample generation.
        random_state (int): Random state for the EBM model.
        use_prior (bool): Whether to use a prior or not.
    
    Returns:
        dict: Dictionary containing results for this experiment.
    """
    # Step 1: Create the disease progression model
    model = ModelFactory.create_model(model_type, **params)
    model.fit()
    
    # Step 2: Get the prior (connectivity matrix) if needed
    prior = model.get_connectivity_matrix() if use_prior else None
    
    # Step 3: Transform stage values
    stage_values = model.transform(X=None)
    
    # Step 4: Fit the SampleGenerator (pass stage_values into fit)
    sample_gen = SampleGenerator(n_patients=n_patients,
                                 distribution=norm,
                                 dist_params={'loc': 3, 'scale': 4},
                                 add_noise=True,
                                 noise_std=0.15,
                                 random_state=random_state)
    
    # Pass stage_values into the fit method of the SampleGenerator
    sample_gen.fit(stage_values)
    
    # Generate synthetic patient data using the transform method
    X = sample_gen.transform(X=None)
    y = sample_gen.get_y()

    # Step 5: Run the Event-Based Model (EBM)
    ebm = EBMModel(prior=prior, random_state=random_state, greedy_iters=10000, mcmc_iters=500000)
    ebm.fit(X, y)
    
    # Collect results
    results = {
        'model_type': model_type,
        'n_patients': n_patients,
        'random_state': random_state,
        'use_prior': use_prior,
        'spearman_score': ebm.score(),
        'kendall_score': ebm.results['best_kendalltau'],
        'best_order': ebm.results['best_order']
    }
    
    return results


In [5]:
params = {
    'n_stages': 10,
    'coeff': 0.55e-1, 
    'start_time': 0,
    'end_time': 100,
    'steps': 100,
    'convergence_threshold': 1e-4
}

# Running three experiments (Transition, Logistic, ACP) with varying n_patients and random states
n_patients_list = [100, 500, 1000]  # Vary patient sizes
random_states = range(1, 11)  # Random states 1 to 10
results = []

# Transition model experiments
for n_patients in n_patients_list:
    for random_state in random_states:
        # Run with prior
        results.append(run_single_experiment('transition', params, n_patients, random_state, use_prior=True))
        
        # Run without prior
        results.append(run_single_experiment('transition', params, n_patients, random_state, use_prior=False))

# After running, save the results to a CSV
df_results = pd.DataFrame(results)
df_results.to_csv('transition_model_results.csv', index=False)
print("Transition model experiment complete. Results saved to 'transition_model_results.csv'.")

ValueError: setting an array element with a sequence.