In [23]:
import pandas as pd
path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank.xlsx"
df = pd.read_excel(path)

In [24]:
import pandas as pd
import statsmodels.formula.api as smf

df = df.copy()

# If these are 0/1 indicators, ensure they are numeric ints
#for c in ["bio", "top150", "ytp_3"]:
#    df[c] = pd.to_numeric(df[c], errors="coerce")

df = df[df["bio"]==1].copy()

# Triple interaction of interest
df["double"] = df["ytp_0"] * df["top250"]

# Optional: drop rows with missing required fields
df = df.dropna(subset=["indication_count", "appyear", "top250", "ytp_0", "double"])

# --- 2) Fixed effects + triple interaction ---
formula = "indication_count ~ double + C(appyear)  + C(top250) + C(ytp_0)"

# Cluster-robust SEs by appyear (change 'appyear' to your preferred cluster)
res = smf.ols(formula, data=df).fit(
    cov_type="cluster",
    cov_kwds={"groups": df["appyear"]}
)


In [25]:
print(res.summary())

# --- 3) Pull the triple interaction estimate neatly ---
coef = res.params["double"]
se   = res.bse["double"]
pval = res.pvalues["double"]
ci_l, ci_u = res.conf_int().loc["double"].tolist()

print("\n=== Triple Interaction (bio × ytp_3 × top150) ===")
print(f"Coef: {coef:,.6f}")
print(f"SE  : {se:,.6f}")
print(f"95% CI: [{ci_l:,.6f}, {ci_u:,.6f}]")
print(f"P-value: {pval:,.6g}")

                            OLS Regression Results                            
Dep. Variable:       indication_count   R-squared:                       0.090
Model:                            OLS   Adj. R-squared:                  0.082
Method:                 Least Squares   F-statistic:                     51.17
Date:                Wed, 13 Aug 2025   Prob (F-statistic):           8.17e-11
Time:                        02:01:23   Log-Likelihood:                -8021.5
No. Observations:                3281   AIC:                         1.610e+04
Df Residuals:                    3252   BIC:                         1.628e+04
Df Model:                          28                                         
Covariance Type:              cluster                                         
                         coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------
Intercept              1.1415      0



In [1]:
import pandas as pd
import statsmodels.formula.api as smf
path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank.xlsx"
df = pd.read_excel(path)

df = df[~(df["ApplType"] == "ANDA")].copy()
df["nonbio"] = (df["ApplType"] == "NDA").astype(int)

# If these are 0/1 indicators, ensure they are numeric ints
#for c in ["bio", "top150", "ytp_3"]:
#    df[c] = pd.to_numeric(df[c], errors="coerce")


# Optional: drop rows with missing required fields
df = df.dropna(subset=["indication_count", "appyear", "bio", "top50", "ytp_2"])

# --- 2) Fixed effects + triple interaction ---
formula = "indication_count ~ C(bio) * C(ytp_2) * C(top50) + C(appyear)"

# Cluster-robust SEs by appyear (change 'appyear' to your preferred cluster)
res = smf.ols(formula, data=df).fit(
    cov_type="cluster",
    cov_kwds={"groups": df["appyear"]}
)


In [2]:
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:       indication_count   R-squared:                       0.041
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     252.0
Date:                Wed, 13 Aug 2025   Prob (F-statistic):           4.15e-21
Time:                        02:37:34   Log-Likelihood:                -69130.
No. Observations:               35046   AIC:                         1.383e+05
Df Residuals:                   35013   BIC:                         1.386e+05
Df Model:                          32                                         
Covariance Type:              cluster                                         
                                              coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------------------

