# Example workflow
This notebook showcases some complete workflow examples.

In [None]:
%pip install pandas numpy scikit-survival scikit-learn scipy

### Generate mock data

In [None]:
import os
import pandas as pd
import numpy as np

# Generate mock data
n_samples = 200  # Number of samples
n_features = 5   # Number of feature columns
n_bootstraps = 10  # Number of bootstrap iterations

data = {
    "outcome": np.random.choice([0, 1], size=n_samples),  # Binary outcome
    "outcome_time": np.random.exponential(scale=10, size=n_samples),  # Survival times
}

# Add feature columns
for i in range(1, n_features + 1):
    data[f"feature_{i}"] = np.random.normal(size=n_samples)

# Create a DataFrame
mock_data = pd.DataFrame(data)

# Save to CSV for script compatibility
os.makedirs("dataset/mock", exist_ok=True)
mock_data.to_csv("dataset/mock/mock_data.csv", index=False)
print("Mock data created and saved as 'dataset/mock/mock_data.csv'.")


In [None]:
import pandas as pd
import numpy as np

# Load the mock data
mock_data = pd.read_csv("dataset/mock/mock_data.csv")

# Number of bootstrap iterations
n_bootstraps = 10
n_samples = len(mock_data)

# Create a list to store bootstrap results
bootstrap_results = []

for i in range(1, n_bootstraps + 1):
    # Random sampling with replacement
    sampled_indices = np.random.choice(mock_data.index, size=n_samples, replace=True)
    not_sampled_indices = list(set(mock_data.index) - set(sampled_indices))

    # Create bootstrap DataFrame for sampled data
    sampled_data = mock_data.loc[sampled_indices].copy()
    sampled_data["boot"] = i*2 - 1

    # Create bootstrap DataFrame for not sampled data
    not_sampled_data = mock_data.loc[not_sampled_indices].copy()
    not_sampled_data["boot"] = i*2

    # Append both sampled and not sampled data for this bootstrap iteration
    bootstrap_results.append(sampled_data)
    bootstrap_results.append(not_sampled_data)

# Concatenate all bootstrap results into a single DataFrame
final_bootstrap_data = pd.concat(bootstrap_results, ignore_index=True)

# Save to CSV
final_bootstrap_data.to_csv("dataset/mock/mock_boots.csv", index=False)

print("Bootstrap data created and saved as 'dataset/mock/mock_boots.csv'.")

### Run Cox model on mock data

In [None]:
import importlib
import pipelineBackward
importlib.reload(pipelineBackward)


pipelineBackward.run_pipeline_backward("Cox", "dataset/mock/mock_boots.csv")
