In [None]:
import numpy as np
from scipy import stats
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots
from statsmodels.api import OLS
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from ISLP import load_data
from ISLP.models import ModelSpec as MS
from functools import partial
import statsmodels.api as sm
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.tree import (DecisionTreeClassifier as DTC,
                          DecisionTreeRegressor as DTR,
                          plot_tree,
                          export_text)
from sklearn.metrics import (accuracy_score,
                             log_loss)
from sklearn.ensemble import \
     (RandomForestRegressor as RF,
      GradientBoostingRegressor as GB, 
    GradientBoostingClassifier as GC)
from ISLP.bart import BART
import sklearn.model_selection as skm
import seaborn as sns
from sklearn.metrics import log_loss
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
import networkx as nx
import graphviz
from networkx.drawing.nx_agraph import graphviz_layout
import dowhy
from dowhy import CausalModel

In [None]:
data = pd.read_pickle("data.pkl")

In [None]:
# List of triple interactions
triple_interactions = [
    "type_1_female_public",
    "type_2_female_public",
    "type_3_female_public",
    "type_4_female_public"
]

# Outcome
y = data["callback"]

# All regressors (main + two-way + triple interactions)
X_vars = [
    "female",
    "public_facing",
    "type_1", "type_2", "type_3", "type_4",
    "female_public",
    "type_1_female", "type_2_female", "type_3_female", "type_4_female",
    "type_1_public", "type_2_public", "type_3_public", "type_4_public",
    "type_1_female_public", "type_2_female_public", "type_3_female_public", "type_4_female_public"
]

# Function to assign significance stars
def significance_stars(p):
    if p < 0.01:
        return "***"
    elif p < 0.05:
        return "**"
    elif p < 0.1:
        return "*"
    else:
        return ""

# Full sample logit
X_full = sm.add_constant(data[X_vars])
model_full = sm.Logit(y, X_full).fit(disp=0)
margeffs_full = model_full.get_margeff()

# Subset refutation logit
subset = data.sample(frac=0.4, random_state=1)
X_sub = sm.add_constant(subset[X_vars])
y_sub = subset["callback"]
model_sub = sm.Logit(y_sub, X_sub).fit(disp=0)
margeffs_sub = model_sub.get_margeff()

# Prepare table with Logit and AME
results = []
for t in triple_interactions:
    # Logit coefficients
    coef_full = model_full.params[t]
    se_full = model_full.bse[t]
    p_full = model_full.pvalues[t]
    
    coef_sub = model_sub.params[t]
    se_sub = model_sub.bse[t]
    p_sub = model_sub.pvalues[t]
    
    stars_full = significance_stars(p_full)
    stars_sub = significance_stars(p_sub)
    
    # AMEs
    ame_full = margeffs_full.summary_frame().loc[t, "dy/dx"]
    ame_se_full = margeffs_full.summary_frame().loc[t, "Std. Err."]
    
    ame_sub = margeffs_sub.summary_frame().loc[t, "dy/dx"]
    ame_se_sub = margeffs_sub.summary_frame().loc[t, "Std. Err."]
    
    results.append({
        "Treatment": t,
        "Logit (Full)": f"{coef_full:.3f}{stars_full}\n({se_full:.3f})",
        "Logit (Subset)": f"{coef_sub:.3f}{stars_sub}\n({se_sub:.3f})",
        "AME (Full)": f"{ame_full:.3f}\n({ame_se_full:.3f})",
        "AME (Subset)": f"{ame_sub:.3f}\n({ame_se_sub:.3f})"
    })

# Convert to DataFrame
results_df = pd.DataFrame(results)

# Save to LaTeX
results_df.to_latex("triple_interactions_logit_ame.tex", index=False, escape=False)

print(results_df)