In [97]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

In [98]:
final_merged_df = pd.read_csv('/Users/christopher/Desktop/Seminar-Advanced-Finance/finaldata_p2.csv')

In [100]:
final_merged_df["debt-to-assets ratio"] = final_merged_df["Debt - Total"] / final_merged_df["Total Assets"]

In [108]:
# Average debt-to-assets ratio by GICS sector and calendar year
sector_debt = (
    final_merged_df
      .assign(Year=lambda d: d['Period End Date'].dt.year)           # extract calendar year
      .groupby(['GICS Sector', 'Year'], sort=True)['debt-to-assets ratio']
      .mean()                                                        # sector-year average
      .rename('debtassets')                                          # tidy column name
      .reset_index()                                                 # back to a flat table
)

sector_debt_wide = sector_debt.pivot(index='Year',
                                     columns='GICS Sector',
                                     values='debtassets')

sector_debt_wide

GICS Sector,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Health Care,Industrials,Information Technology,Materials,Real Estate,Utilities
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000,0.301184,0.287255,0.319626,0.274105,0.206493,0.308666,0.174242,0.306755,0.307436,0.412766
2001,0.32956,0.292639,0.324432,0.271596,0.215448,0.299424,0.200124,0.300719,0.366175,0.415579
2002,0.330568,0.270475,0.337209,0.28658,0.210232,0.303907,0.209243,0.278696,0.524722,0.417619
2003,0.306864,0.249841,0.328295,0.268098,0.199992,0.292891,0.185331,0.292742,0.530094,0.411778
2004,0.291761,0.230899,0.301183,0.228864,0.183142,0.268892,0.160674,0.264023,0.535003,0.380354
2005,0.280453,0.216552,0.30915,0.183276,0.187801,0.246718,0.154644,0.234248,0.471772,0.357647
2006,0.257726,0.228892,0.292868,0.18474,0.225511,0.234282,0.137813,0.216679,0.45215,0.355651
2007,0.267766,0.236888,0.304288,0.178392,0.226747,0.232261,0.175648,0.224953,0.480834,0.351497
2008,0.288029,0.266044,0.324831,0.180334,0.249844,0.241304,0.197816,0.275777,0.499845,0.360118
2009,0.308613,0.263928,0.325469,0.200667,0.249186,0.263087,0.185014,0.298863,0.489698,0.361686


# Debt Ratio analysis:

In [107]:
from linearmodels.panel import RandomEffects

# Ensure that 'Date' is in datetime format
final_merged_df['Period End Date'] = pd.to_datetime(final_merged_df['Period End Date'], errors='coerce')

# Set the panel index using 'Ticker_x' (firm identifier) and 'Date'
panel_data = final_merged_df.set_index(['Instrument', 'Period End Date'])

# Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# Define the independent variables as per the model:
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate','EBITDA']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=False, check_rank=False)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# Print the regression summary
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.3440
Estimator:                   PanelOLS   R-squared (Between):              0.4056
No. Observations:               33430   R-squared (Within):               0.0088
Date:                Wed, Apr 30 2025   R-squared (Overall):              0.3440
Time:                        15:49:02   Log-likelihood                -1.756e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      1030.8
Entities:                         594   P-value                           0.0000
Avg Obs:                       56.279   Distribution:                F(17,33412)
Min Obs:                       1.0000                                           
Max Obs:                       101.00   F-statistic (robust):             213.17
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [49]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -14.8380 & 0.0177 \\
Debt Ratio & -0.9990 & 0.0000 \\
DebtRatio_sq & 0.4859 & 0.0001 \\
Debt_EBITDA_Interaction & 0.0001 & 0.0615 \\
ROE & 0.0012 & 0.5982 \\
ROE_sq & 0.0000 & 0.0691 \\
Asset Tangibility & -0.0898 & 0.0384 \\
Industry_Median_Leverage & -0.0751 & 0.5840 \\
Size & -0.1478 & 0.3824 \\
Size_sq & -0.0009 & 0.9207 \\
DPR & -0.0088 & 0.0000 \\
Log_NetIncome_Shifted & 1.6448 & 0.0063 \\
GDP_Growth_pct & 0.0161 & 0.0000 \\
VIX_Growth & -0.0080 & 0.0032 \\
RnDintensity & 11.3760 & 0.0000 \\
PEratio & 0.0004 & 0.0000 \\
Interest Rate & 0.0115 & 0.0000 \\
EBITDA & 0.0000 & 0.7711 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]


# Market Timing Theory analysis:

In [50]:
# Ensure that 'Date' is in datetime format
final_merged_df['Period End Date'] = pd.to_datetime(final_merged_df['Period End Date'], errors='coerce')

# Set the panel index using 'Ticker_x' (firm identifier) and 'Date'
panel_data = final_merged_df.set_index(['Instrument', 'Period End Date'])

# Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# Define the independent variables as per the model:
X = panel_data[['Debt Ratio', 'Debt_PE_interaction','Debt_PE_lagged_interaction','ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=True, check_rank=False)
res = mod.fit(cov_type='clustered', cluster_entity=True)


# Print the regression summary
print(res.summary)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.0808
Estimator:                   PanelOLS   R-squared (Between):              0.1956
No. Observations:               32782   R-squared (Within):               0.0808
Date:                Tue, Apr 29 2025   R-squared (Overall):              0.1872
Time:                        14:48:33   Log-likelihood                    2196.6
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      166.40
Entities:                         584   P-value                           0.0000
Avg Obs:                       56.134   Distribution:                F(17,32181)
Min Obs:                       1.0000                                           
Max Obs:                      100.000   F-statistic (robust):             1597.1
                            

In [51]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -8.2333 & 0.0033 \\
Debt Ratio & 0.0162 & 0.8866 \\
Debt_PE_interaction & 0.4219 & 0.0070 \\
Debt_PE_lagged_interaction & 0.0210 & 0.0491 \\
ROE & 0.0017 & 0.0067 \\
ROE_sq & 0.0000 & 0.0444 \\
Asset Tangibility & 0.0166 & 0.3331 \\
Industry_Median_Leverage & -0.0675 & 0.4931 \\
Size & 0.0799 & 0.6588 \\
Size_sq & -0.0080 & 0.3821 \\
DPR & -0.0032 & 0.1411 \\
Log_NetIncome_Shifted & 0.8456 & 0.0006 \\
GDP_Growth_pct & 0.0122 & 0.0000 \\
VIX_Growth & -0.0175 & 0.0000 \\
RnDintensity & 5.1664 & 0.0028 \\
PEratio & -0.2875 & 0.0070 \\
Interest Rate & 0.0153 & 0.0000 \\
EBITDA & 0.0000 & 0.0329 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]


# Sector analysis:

### Information Technology:

In [52]:
# 1. Subset the DataFrame to IT only
it_df = final_merged_df[final_merged_df['GICS Sector'] == 'Information Technology'].copy()

# 2. Convert 'Date' to datetime (if not already done)
it_df['Date'] = pd.to_datetime(it_df['Date'], errors='coerce')

# 3. Set the panel index using 'Ticker' and 'Date'
panel_data = it_df.set_index(['Instrument', 'Date'])

# 4. Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# 5. Define the independent variables as per the model:
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# 6. Add a constant term to X
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=True, check_rank=False)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# 8. Print the regression summary
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.2499
Estimator:                   PanelOLS   R-squared (Between):              0.3186
No. Observations:                4461   R-squared (Within):               0.2499
Date:                Tue, Apr 29 2025   R-squared (Overall):              0.2723
Time:                        14:51:53   Log-likelihood                   -1079.1
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      85.425
Entities:                          86   P-value                           0.0000
Avg Obs:                       51.872   Distribution:                 F(17,4358)
Min Obs:                       1.0000                                           
Max Obs:                      100.000   F-statistic (robust):             32.811
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [53]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -2.4653 & 0.4086 \\
Debt Ratio & -1.0978 & 0.0034 \\
DebtRatio_sq & 0.8540 & 0.0000 \\
Debt_EBITDA_Interaction & -0.0000 & 0.3120 \\
ROE & 0.0125 & 0.4097 \\
ROE_sq & -0.0003 & 0.6743 \\
Asset Tangibility & -0.2682 & 0.2203 \\
Industry_Median_Leverage & 0.1518 & 0.5652 \\
Size & 0.8129 & 0.0723 \\
Size_sq & -0.0482 & 0.0411 \\
DPR & -0.0246 & 0.0525 \\
Log_NetIncome_Shifted & 0.0298 & 0.8778 \\
GDP_Growth_pct & 0.0179 & 0.0000 \\
VIX_Growth & -0.0147 & 0.0299 \\
RnDintensity & 6.5319 & 0.0966 \\
PEratio & 0.1885 & 0.0165 \\
Interest Rate & 0.0465 & 0.0000 \\
EBITDA & 0.0001 & 0.0165 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]


### Industrials:

In [65]:
# 1. Subset the DataFrame to Industrials only
industrials_df = final_merged_df[final_merged_df['GICS Sector'] == 'Industrials'].copy()

# 2. Convert 'Date' to datetime (if not already done)
industrials_df['Date'] = pd.to_datetime(industrials_df['Date'], errors='coerce')

# 3. Set the panel index using 'Ticker' and 'Date'
panel_data = industrials_df.set_index(['Instrument', 'Date'])

# 4.
y = panel_data['Log_TobinsQ']
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# 5. Add a constant term to X
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=True, check_rank=False)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# 7. Print the regression summary
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.0906
Estimator:                   PanelOLS   R-squared (Between):             -0.1140
No. Observations:                5707   R-squared (Within):               0.0906
Date:                Tue, Apr 29 2025   R-squared (Overall):             -0.1221
Time:                        14:59:12   Log-likelihood                    905.89
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      32.747
Entities:                         103   P-value                           0.0000
Avg Obs:                       55.408   Distribution:                 F(17,5587)
Min Obs:                       1.0000                                           
Max Obs:                       101.00   F-statistic (robust):             36.128
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [66]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -12.3870 & 0.0729 \\
Debt Ratio & -0.1493 & 0.8027 \\
DebtRatio_sq & 0.2954 & 0.4651 \\
Debt_EBITDA_Interaction & -0.0003 & 0.0218 \\
ROE & 0.0053 & 0.5130 \\
ROE_sq & 0.0000 & 0.6320 \\
Asset Tangibility & 0.1586 & 0.2001 \\
Industry_Median_Leverage & 0.4458 & 0.2001 \\
Size & -0.0469 & 0.8937 \\
Size_sq & 0.0024 & 0.8923 \\
DPR & -0.0081 & 0.2709 \\
Log_NetIncome_Shifted & 1.2174 & 0.0813 \\
GDP_Growth_pct & 0.0132 & 0.0000 \\
VIX_Growth & -0.0235 & 0.0000 \\
RnDintensity & 5.1676 & 0.1318 \\
PEratio & -0.0049 & 0.4514 \\
Interest Rate & 0.0123 & 0.0027 \\
EBITDA & 0.0003 & 0.0183 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]
