In [115]:
import os
import pandas as pd
import statsmodels.api as sm

# Function to create directories
def create_trial_directory(dir_name):
    dir_path = os.path.join(os.getcwd(), dir_name)
    os.makedirs(dir_path, exist_ok=True)
    return dir_path

# Creating directories
trial_pp_dir = create_trial_directory("trial_pp")
trial_itt_dir = create_trial_directory("trial_itt")

# Load dataset
data_censored = pd.read_csv('c:/Users/USER/Documents/3rd year 2nd sem/Data Analytics/Assignments_Data_Analytics/Assignment_1_Clustering_Data_Analytics/data_censored.csv')
print(data_censored.head())

# Define logistic model fitting function
def fit_logistic_model(data, predictors, outcome):
    X = sm.add_constant(data[predictors])
    y = data[outcome]
    model = sm.Logit(y, X).fit(disp=0)
    return model

# Define the trial sequence class
class TrialSequence:
    def __init__(self, estimand, data=None):
        self.estimand = estimand
        self.data = data
        self.switch_weights = None
        self.weights = {}
    
    def set_data(self, data, id_col, period_col, treatment_col, outcome_col, eligible_col):
        self.data = data[[id_col, period_col, treatment_col, outcome_col, eligible_col, 'x1', 'x2', 'x3', 'x4', 'age', 'age_s', 'censored']].copy()
    
    def set_switch_weight_model(self, numerator_formula, denominator_formula):
        if self.estimand != "PP":
            print("Switch weight model is only applicable to PP estimand.")
            return
        data = self.data.copy()
        numerator_model = fit_logistic_model(data, numerator_formula, outcome='treatment')
        denominator_model = fit_logistic_model(data, denominator_formula, outcome='treatment')
        
        data["switch_weight"] = numerator_model.predict() / denominator_model.predict()
        self.switch_weights = data[["id", "period", "treatment", "switch_weight"]]
    
    def set_censor_weight_model(self, censor_event, numerator, denominator):
        self.censor_weight_model = {'censor_event': censor_event, 'numerator': numerator, 'denominator': denominator}
    
    def calculate_weights(self):
        if self.censor_weight_model:
            self.fit_censoring_models()
        print(f"Weight models calculated for {self.estimand}.")
    
    def fit_censoring_models(self):
        numerator_model = fit_logistic_model(self.data, ['x2'], outcome='censored')

        if self.estimand == "PP":
            denominator_model = fit_logistic_model(self.data, ['x2', 'x1'], outcome='censored')
            self.weights['denominator'] = denominator_model

        elif self.estimand == "ITT":
            # Filter for previous treatment = 0
            data_d0 = self.data[self.data['treatment'] == 0].copy()
            denominator_model_d0 = fit_logistic_model(data_d0, ['x2', 'x1'], outcome='censored')

            # Filter for previous treatment = 1
            data_d1 = self.data[self.data['treatment'] == 1].copy()
            denominator_model_d1 = fit_logistic_model(data_d1, ['x2', 'x1'], outcome='censored')

            self.weights['denominator_d0'] = denominator_model_d0
            self.weights['denominator_d1'] = denominator_model_d1

        self.weights['numerator'] = numerator_model
    
    def show_weight_models(self):
        if not self.weights:
            print("Weight models not fitted. Use calculate_weights()")
            return

        print(f"\n## Weight Models for Informative Censoring ({self.estimand})")
        print("## ---------------------------------------")
        print("\n[[n]]")
        print("Model: P(censor_event = 0 | X) for numerator")
        print(self.weights['numerator'].summary())
        
        if 'denominator_d0' in self.weights:
            print("\n[[d0]]")
            print("Model: P(censor_event = 0 | X, previous treatment = 0) for denominator")
            print(self.weights['denominator_d0'].summary())
        
        if 'denominator_d1' in self.weights:
            print("\n[[d1]]")
            print("Model: P(censor_event = 0 | X, previous treatment = 1) for denominator")
            print(self.weights['denominator_d1'].summary())

# Initialize and process ITT trial first
trial_itt = TrialSequence("ITT")
trial_itt.set_data(data_censored, "id", "period", "treatment", "outcome", "eligible")
trial_itt.set_censor_weight_model("censored", ["x2"], ["x2", "x1"])
trial_itt.calculate_weights()
trial_itt.show_weight_models()

# Initialize and process PP trial next
trial_pp = TrialSequence("PP")
trial_pp.set_data(data_censored, "id", "period", "treatment", "outcome", "eligible")
trial_pp.set_switch_weight_model(["age"], ["age", "x1", "x3"])
trial_pp.set_censor_weight_model("censored", ["x2"], ["x2", "x1"])
trial_pp.calculate_weights()
trial_pp.show_weight_models()


   id  period  treatment  x1        x2  x3        x4  age     age_s  outcome  \
0   1       0          1   1  1.146148   0  0.734203   36  0.083333        0   
1   1       1          1   1  0.002200   0  0.734203   37  0.166667        0   
2   1       2          1   0 -0.481762   0  0.734203   38  0.250000        0   
3   1       3          1   0  0.007872   0  0.734203   39  0.333333        0   
4   1       4          1   1  0.216054   0  0.734203   40  0.416667        0   

   censored  eligible  
0         0         1  
1         0         0  
2         0         0  
3         0         0  
4         0         0  
Weight models calculated for ITT.

## Weight Models for Informative Censoring (ITT)
## ---------------------------------------

[[n]]
Model: P(censor_event = 0 | X) for numerator
                           Logit Regression Results                           
Dep. Variable:               censored   No. Observations:                  725
Model:                          Logit 