#### Import the necessary files

In [1]:
import os
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt


#### Load and Prepare Data

In [2]:
class TargetTrialEmulator:
    def __init__(self, estimand="ITT"):
        self.estimand = estimand
        self.weights = None
        self.model = None
        self.expanded_data = None
        
    def prepare_data(self, data_path):
        self.data = pd.read_csv(data_path)
        self.data['age_s'] = self.data['age'] + self.data['period']/12
        return self

#### Modified Code segment added which helps to identify subgroups of patients with similar baseline characteristics and analyze treatment effects within clusters.

In [3]:
def cluster_patients(self, n_clusters=3):
        """Cluster patients based on baseline features."""
        from sklearn.cluster import KMeans
        
        # Extract baseline features (e.g., first observation per patient)
        baseline = self.data.groupby('id').first()[['age', 'x1', 'x2', 'x3']]
        
        # Fit KMeans
        kmeans = KMeans(n_clusters=n_clusters).fit(baseline)
        
        # Map cluster labels back to original data
        self.data['cluster'] = self.data['id'].map(
            pd.Series(kmeans.labels_, index=baseline.index)
        )
        return self    

In [4]:
def calculate_weights(self):
        # Treatment switching weights (PP only)
        if self.estimand == "PP":
            switch_num = LogisticRegression()
            switch_num.fit(self.data[['age']], self.data['treatment'])
            numer = switch_num.predict_proba(self.data[['age']])[:,1]
            
            switch_den = LogisticRegression()
            switch_den.fit(self.data[['age', 'x1', 'x3']], self.data['treatment'])
            denom = switch_den.predict_proba(self.data[['age', 'x1', 'x3']])[:,1]
            
            switch_weights = numer / denom
        else:
            switch_weights = np.ones(len(self.data))
            
        # Censoring weights
        censor_model = LogisticRegression()
        censor_model.fit(self.data[['x2', 'x1']], self.data['censored'])
        censor_probs = censor_model.predict_proba(self.data[['x2', 'x1']])[:,0]
        censor_weights = 1 / censor_probs
        
        # Combine weights
        self.weights = switch_weights * censor_weights
        return self
    
def expand_trials(self, chunk_size=500):
        # Simplified trial expansion (conceptual)
        expanded = []
        for period in self.data['period'].unique():
            period_data = self.data[self.data['period'] == period].copy()
            period_data['trial_period'] = period
            expanded.append(period_data)
        
        self.expanded_data = pd.concat(expanded)
        return self
    
def fit_msm(self):
        # Winsorize weights
        q99 = np.quantile(self.weights, 0.99)
        self.expanded_data['weights'] = np.minimum(self.weights, q99)
        
        # Fit Cox PH model
        self.model = CoxPHFitter()
        self.model.fit(self.expanded_data[['treatment', 'x2', 'period', 'outcome', 'weights']],
                      duration_col='period',
                      event_col='outcome',
                      weights_col='weights',
                      robust=True 
        )
        return self
    
def predict_survival(self, periods=10):
        prediction_data = self.expanded_data[self.expanded_data['period'] == 1].copy()
        return self.model.predict_survival_function(prediction_data)
    
def plot_survival(self):
        self.model.plot()
        plt.title(f"{self.estimand} Survival Curves")
        plt.show()

### Execution Workflow

In [None]:
print("Running ITT Analysis...")
emulator_itt = TargetTrialEmulator(estimand="ITT")
emulator_itt.prepare_data("data_censored.csv")
emulator_itt.cluster_patients(n_clusters=3)
emulator_itt.calculate_weights()
emulator_itt.expand_trials()
emulator_itt.fit_cluster_msm()

print("\nITT Model Summary:")
print(emulator_itt.model.print_summary())

# Initialize and run PP analysis
print("\nRunning PP Analysis...")
emulator_pp = TargetTrialEmulator(estimand="PP")
emulator_pp.prepare_data("data_censored.csv")
emulator_pp.calculate_weights()
emulator_pp.expand_trials()
emulator_pp.fit_msm()

print("\nPP Model Summary:")
print(emulator_pp.model.print_summary())

for cluster, model in emulator_itt.cluster_models.items():
    print(f"Cluster {cluster} Summary:")
    print(model.print_summary())

# Generate predictions and plots
print("\nGenerating Survival Predictions...")
emulator_itt.plot_survival()
emulator_pp.plot_survival()

# Save model outputs
emulator_itt.expanded_data.to_csv("itt_expanded_data.csv", index=False)
emulator_pp.expanded_data.to_csv("pp_expanded_data.csv", index=False)

Running ITT Analysis...


AttributeError: 'TargetTrialEmulator' object has no attribute 'calculate_weights'