In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from linearmodels.panel import PanelOLS
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [None]:
# Simulated dataset for demonstration
np.random.seed(42)

n_firms = 50
n_years = 10
index = pd.MultiIndex.from_product([range(n_firms), range(2010, 2010+n_years)], names=["firm_id","year"])

panel_data = pd.DataFrame(index=index)
panel_data["salary"] = np.random.normal(5, 1, len(panel_data))
panel_data["bonus"] = np.random.normal(2, 0.5, len(panel_data))
panel_data["equity"] = np.random.normal(8, 2, len(panel_data))
panel_data["firm_size"] = np.random.normal(10, 1, len(panel_data))
panel_data["leverage"] = np.random.normal(0.5, 0.2, len(panel_data))
panel_data["ceo_tenure"] = np.random.randint(1,15, len(panel_data))
panel_data["roe"] = 0.3*panel_data["equity"] + 0.1*panel_data["bonus"] + np.random.normal(0,2,len(panel_data))
panel_data["stock_return"] = 0.25*panel_data["equity"] + 0.2*panel_data["salary"] + np.random.normal(0,3,len(panel_data))

panel_data = panel_data.reset_index().set_index(["firm_id","year"])
panel_data.head()

In [None]:
panel_data.describe()

In [None]:
model = PanelOLS.from_formula(
    "stock_return ~ 1 + salary + bonus + equity + firm_size + leverage + ceo_tenure + EntityEffects + TimeEffects",
    data=panel_data
)
results = model.fit(cov_type="robust")
print(results.summary)

In [None]:
panel_data["equity_lag2"] = panel_data.groupby("firm_id")["equity"].shift(2)

lagged_model = PanelOLS.from_formula(
    "roe ~ 1 + equity_lag2 + firm_size + leverage + EntityEffects + TimeEffects",
    data=panel_data
).fit(cov_type="robust")

print(lagged_model.summary)

In [None]:
X = panel_data[["salary","bonus","equity","firm_size","leverage"]].dropna()
vif_data = pd.DataFrame()
vif_data["Variable"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif_data

In [None]:
from linearmodels.panel import RandomEffects
fe = PanelOLS.from_formula("roe ~ bonus + equity + EntityEffects", data=panel_data).fit()
re = RandomEffects.from_formula("roe ~ bonus + equity", data=panel_data).fit()

b = fe.params
B = re.params
v_b = fe.cov
v_B = re.cov
diff = b - B
stat = np.dot(diff.T, np.linalg.inv(v_b - v_B)).dot(diff)
print("Hausman test statistic:", float(stat))

In [None]:
panel_data.groupby("year")["salary","bonus","equity"].mean().plot(kind="line", marker="o")
plt.title("Trends in CEO Pay Components")
plt.ylabel("USD (millions)")
plt.show()

In [None]:
plt.scatter(panel_data["equity_lag2"], panel_data["roe"], alpha=0.5)
plt.title("Equity Pay (Lag 2) vs ROE")
plt.xlabel("Equity (t-2)")
plt.ylabel("ROE")
plt.show()

In [None]:
panel_data["equity_sq"] = panel_data["equity"]**2
nonlinear_model = PanelOLS.from_formula(
    "stock_return ~ equity + equity_sq + EntityEffects + TimeEffects",
    data=panel_data
).fit(cov_type="robust")
print(nonlinear_model.summary)