In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm
from statsmodels.stats.weightstats import DescrStatsW


In [23]:

# Function to generate synthetic data for testing IPSW.
def generate_ipsw_test_data(n_samples=1000, seed=42, treatment_effect=2.5):
    """
    Generates a synthetic dataset suitable for testing Inverse Propensity Score Weighting (IPSW).
    
    Args:
        n_samples (int): Number of samples.
        seed (int): Random seed.
        treatment_effect (float): The treatment effect to apply.
        
    Returns:
        pd.DataFrame: Synthetic dataset.
    """
    np.random.seed(seed)
    # Generate covariates
    covariate_1 = np.random.normal(0, 1, n_samples)
    covariate_2 = np.random.uniform(-2, 2, n_samples)
    covariate_3 = np.random.binomial(1, 0.4, n_samples)  # Binary covariate

    # Simulate treatment assignment using a logistic model on the covariates.
    propensity_score = 1 / (1 + np.exp(-(0.5 * covariate_1 + 0.8 * covariate_2 + 1.2 * covariate_3)))
    treatment = np.random.binomial(1, propensity_score, n_samples)

    # Simulate outcome. Here the outcome depends on the covariates, treatment, and added noise.
    outcome = (1.5 * covariate_1 + 2 * covariate_2 - 1 * covariate_3 +
               treatment_effect * treatment + np.random.normal(0, 2, n_samples))

    data = pd.DataFrame({
        'covariate_1': covariate_1,
        'covariate_2': covariate_2,
        'covariate_3': covariate_3,
        'treated': treatment,
        'outcome': outcome,
    })
    data['ID'] = range(n_samples)
    return data


In [24]:

def run_ipsw_no_streamlit(df, treatment_col='treated', outcome_col='outcome',
                           ipsw_predictors=['covariate_1', 'covariate_2', 'covariate_3'],
                           epsilon=1e-3):
    """
    Runs the IPSW analysis on the given DataFrame without using Streamlit.
    
    Returns:
        tuple: (ATE, ci_lower, ci_upper)
    """
    # Prepare variables
    X = df[ipsw_predictors]
    T = df[treatment_col].astype(int)  # Ensure binary treatment indicator
    Y = df[outcome_col]
    
    # Fit a logistic regression model for propensity score estimation
    propensity_model = LogisticRegression(random_state=42, max_iter=1000)
    propensity_model.fit(X, T)
    propensity_scores = propensity_model.predict_proba(X)[:, 1]
    
    # Apply dynamic clipping threshold
    propensity_scores = np.clip(propensity_scores, epsilon, 1 - epsilon)
    
    # Calculate weights
    weights = np.zeros_like(T, dtype=float)
    weights[T == 1] = 1 / propensity_scores[T == 1]
    weights[T == 0] = 1 / (1 - propensity_scores[T == 0])
    
    # Compute weighted means for treated and control groups
    treated_mean = np.average(Y[T == 1], weights=weights[T == 1])
    control_mean = np.average(Y[T == 0], weights=weights[T == 0])
    ATE = treated_mean - control_mean
    
    # Function to compute weighted standard error of the mean
    def weighted_se(y, weights):
        weighted_stats = DescrStatsW(y, weights=weights, ddof=1)
        return weighted_stats.std_mean
    
    se_treated = weighted_se(Y[T == 1], weights[T == 1])
    se_control = weighted_se(Y[T == 0], weights[T == 0])
    se_ATE = np.sqrt(se_treated**2 + se_control**2)
    
    # Calculate 95% confidence interval for ATE
    z_critical = norm.ppf(0.975)
    ci_lower = ATE - z_critical * se_ATE
    ci_upper = ATE + z_critical * se_ATE
    
    return ATE, ci_lower, ci_upper


In [25]:
from tqdm import tqdm


# Simulation settings
n_simulations = 2000  # Number of simulated experiments
n_samples = 3000      # Samples per experiment
false_positive_count = 0

# Run the simulation with treatment_effect set to 0 (i.e., no true effect)
for i in tqdm(range(n_simulations)):
    df_test = generate_ipsw_test_data(n_samples=n_samples, seed=42 + i, treatment_effect=0)
    ATE, ci_lower, ci_upper = run_ipsw_no_streamlit(df_test)
    # If the 95% confidence interval does not include 0, consider it a false positive
    if ci_lower > 0 or ci_upper < 0:
        false_positive_count += 1

# Calculate the percentage of false detections
false_positive_percentage = 100 * false_positive_count / n_simulations
print(f"False positive rate: {false_positive_percentage:.2f}% out of {n_simulations} simulations")


100%|██████████| 2000/2000 [00:05<00:00, 342.98it/s]

False positive rate: 11.30% out of 2000 simulations



