In [1]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
import pandas as pd
import numpy as np
import statsmodels.api as sm
from tqdm import trange
from sklearn.linear_model import Ridge  
import warnings, numpy as np, pandas as pd

In [2]:
df_raw = pd.read_csv("Trump_First_Term.csv")

# --- monthly aggregation ---
agg = {
    "Approve":               "mean",
    "Disapprove":            "mean",
    "No_Oppinion":           "mean",
    # macro variables are identical within a month → mean == first value
    "Unemployment_rate":     "mean",
    "Consumer_Index_Sentiment": "mean",
    "Real_Broad_Dollar_Index":  "mean"
}

df_month = (df_raw
            .groupby(["Year", "Month"], as_index=False)
            .agg(agg)
            .sort_values(["Year", "Month"]))

In [3]:
df_month["t"] = (df_month["Year"] - 2017) * 12 + (df_month["Month"] - 1)
df_month["months_into_term"] = np.where(df_month["Year"] < 2025,
                                        df_month["t"],
                                        df_month["t"] - ((2025-2017)*12))

# seasonality
df_month["sin_month"] = np.sin(2*np.pi*df_month["Month"]/12)
df_month["cos_month"] = np.cos(2*np.pi*df_month["Month"]/12)

# term dummy
df_month["term"] = (df_month["Year"] >= 2025).astype(int)

# lag features (need shift after aggregation)
df_month["approve_lag1"]  = df_month["Approve"].shift(1)
df_month["approve_ma3"]   = df_month["Approve"].rolling(3, min_periods=1).mean()

# macro differentials
for col in ["Unemployment_rate", "Consumer_Index_Sentiment", "Real_Broad_Dollar_Index"]:
    df_month[f"{col}_diff1"] = df_month[col].diff(1)

# economical anxiety composite
z = df_month[["Unemployment_rate","Consumer_Index_Sentiment"]].apply(
        lambda x: (x - x.mean())/x.std())
df_month["econ_anxiety"] = z["Unemployment_rate"] - z["Consumer_Index_Sentiment"]

In [4]:
feature_cols = [c for c in df_month.columns if c not in
                ["Approve", "Disapprove", "No_Oppinion"]]

# training through May‑2025
train_mask = (df_month["Year"] < 2025) | ((df_month["Year"] == 2025) & (df_month["Month"] <= 5))
df_train   = df_month.loc[train_mask].copy()

X_train = sm.add_constant(df_train[feature_cols])
y_train = df_train["Approve"]

X_train = X_train.apply(pd.to_numeric, errors="coerce").fillna(0)

glm = sm.GLM(y_train, X_train, family=sm.families.Gaussian()).fit()
print(glm.summary())


                 Generalized Linear Model Regression Results                  
Dep. Variable:                Approve   No. Observations:                   54
Model:                            GLM   Df Residuals:                       40
Model Family:                Gaussian   Df Model:                           13
Link Function:               Identity   Scale:                          3.1399
Method:                          IRLS   Log-Likelihood:                -99.413
Date:                Mon, 05 May 2025   Deviance:                       125.60
Time:                        19:42:30   Pearson chi2:                     126.
No. Iterations:                     3   Pseudo R-squ. (CS):             0.9611
Covariance Type:            nonrobust                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
const       

In [None]:
# -------------------------------------------------
# Clone May‑2025 as a *DataFrame*  (note the double brackets)
# -------------------------------------------------
june = df_month[(df_month["Year"] == 2025) & (df_month["Month"] == 5)].iloc[[0]].copy()

# --- update calendar fields ---
june["Month"]            = 6
june["t"]                += 1
june["months_into_term"] += 1
june["sin_month"]         = np.sin(2*np.pi*6/12)
june["cos_month"]         = np.cos(2*np.pi*6/12)

# --- compute lag features BEFORE add_constant / fillna ---
june["approve_lag1"] = df_month.loc[(df_month["Year"] == 2025) & (df_month["Month"] == 5), "Approve"].values[0]
june["approve_ma3"]  = df_month.tail(3)["Approve"].mean()    # Mar–May avg

# -------------------------------------------------
# Build design matrix with identical columns
# -------------------------------------------------
X_june = sm.add_constant(june[feature_cols], has_constant="add")
X_june = X_june.reindex(columns=X_train.columns, fill_value=0)   # keep order

# -------------------------------------------------
# Predict + 80 % CI
# -------------------------------------------------
pred_res = glm.get_prediction(X_june)
pt       = float(pred_res.predicted_mean[0])
lo, hi   = pred_res.conf_int(alpha=0.10)[0]

print(f"June‑2025 approval (aggregated polls): {pt:.2f}%   "
      f"(90 % CI: {lo:.2f} – {hi:.2f}%)")

June‑2025 approval (aggregated polls): 45.64%   (90 % CI: 44.02 – 47.25%)
