In [149]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from linearmodels.panel import PanelOLS

In [150]:
final_merged_df = pd.read_csv('/Users/christopher/Desktop/Seminar-Advanced-Finance/finaldata.csv')

# Debt Ratio analysis:

In [None]:
# Ensure that 'Date' is in datetime format
final_merged_df['Period End Date'] = pd.to_datetime(final_merged_df['Period End Date'], errors='coerce')

# Set the panel index using 'Ticker_x' (firm identifier) and 'Date'
panel_data = final_merged_df.set_index(['Instrument', 'Period End Date'])

# Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# Define the independent variables as per the model:
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate','EBITDA']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=False, check_rank=False)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# Print the regression summary
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.3444
Estimator:                   PanelOLS   R-squared (Between):              0.4875
No. Observations:               21894   R-squared (Within):               0.0026
Date:                Sun, Apr 13 2025   R-squared (Overall):              0.3444
Time:                        12:58:55   Log-likelihood                -1.275e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      675.85
Entities:                         362   P-value                           0.0000
Avg Obs:                       60.481   Distribution:                F(17,21876)
Min Obs:                       1.0000                                           
Max Obs:                       101.00   F-statistic (robust):             48.950
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


In [154]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -18.2210 & 0.0000 \\
Debt Ratio & -0.6783 & 0.0001 \\
DebtRatio_sq & 0.2284 & 0.0000 \\
Debt_EBITDA_Interaction & 0.0001 & 0.0504 \\
ROE & -0.0014 & 0.5443 \\
ROE_sq & 0.0000 & 0.0136 \\
Asset Tangibility & -0.0715 & 0.1526 \\
Industry_Median_Leverage & 0.0827 & 0.5728 \\
Size & 0.2357 & 0.0100 \\
Size_sq & -0.0202 & 0.0001 \\
DPR & -0.0307 & 0.0000 \\
Log_NetIncome_Shifted & 1.8336 & 0.0000 \\
GDP_Growth_pct & 0.0116 & 0.0000 \\
VIX_Growth & -0.0042 & 0.1678 \\
RnDintensity & 10.5510 & 0.0000 \\
PEratio & 0.0000 & 0.0871 \\
Interest Rate & 0.0098 & 0.0015 \\
EBITDA & -0.0000 & 0.6236 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]


# Market Timing Theory analysis:

In [172]:
# Ensure that 'Date' is in datetime format
final_merged_df['Period End Date'] = pd.to_datetime(final_merged_df['Period End Date'], errors='coerce')

# Set the panel index using 'Ticker_x' (firm identifier) and 'Date'
panel_data = final_merged_df.set_index(['Instrument', 'Period End Date'])

# Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# Define the independent variables as per the model:
X = panel_data[['Debt Ratio', 'Debt_PE_interaction','Debt_PE_lagged_interaction','ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# Add a constant term to the independent variables
X = sm.add_constant(X)

# Estimate the PanelOLS model with entity fixed effects using the Date column as the time index
mod = PanelOLS(y, X, entity_effects=True, check_rank=False)
#mod = PanelOLS(y, X, entity_effects=True, time_effects=True, drop_absorbed=True)
res = mod.fit(cov_type='clustered', cluster_entity=True)


# Print the regression summary
print(res.summary)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.1319
Estimator:                   PanelOLS   R-squared (Between):              0.2395
No. Observations:               21349   R-squared (Within):               0.1319
Date:                Sun, Apr 27 2025   R-squared (Overall):              0.1934
Time:                        22:14:51   Log-likelihood                   -1098.9
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      187.37
Entities:                         362   P-value                           0.0000
Avg Obs:                       58.975   Distribution:                F(17,20970)
Min Obs:                       1.0000                                           
Max Obs:                      100.000   F-statistic (robust):             101.09
                            

In [173]:
import pandas as pd

# Extract coefficients table as HTML and convert to DataFrame
coef_table = res.summary.tables[1].as_html()
df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]

# Extract only the coefficient and P-value columns
df_selected = df_coef[['Parameter', 'P-value']]

# Rename columns for clarity
df_selected.columns = ['Coefficient', 'P-value']

# Export to LaTeX
print(df_selected.to_latex(float_format="%.4f", caption="Fixed Effects Regression Results", label="tab:regression"))

\begin{table}
\caption{Fixed Effects Regression Results}
\label{tab:regression}
\begin{tabular}{lrr}
\toprule
 & Coefficient & P-value \\
\midrule
const & -10.6210 & 0.0000 \\
Debt Ratio & -0.2591 & 0.0145 \\
Debt_PE_interaction & -0.0017 & 0.0336 \\
Debt_PE_lagged_interaction & 0.0000 & 0.0014 \\
ROE & 0.0003 & 0.7420 \\
ROE_sq & 0.0000 & 0.0002 \\
Asset Tangibility & 0.0351 & 0.2427 \\
Industry_Median_Leverage & 0.0678 & 0.3714 \\
Size & 0.4848 & 0.0000 \\
Size_sq & -0.0274 & 0.0000 \\
DPR & -0.0318 & 0.0000 \\
Log_NetIncome_Shifted & 0.9204 & 0.0000 \\
GDP_Growth_pct & 0.0102 & 0.0000 \\
VIX_Growth & -0.0110 & 0.0000 \\
RnDintensity & 11.0300 & 0.0000 \\
PEratio & 0.0021 & 0.0324 \\
Interest Rate & 0.0144 & 0.0000 \\
EBITDA & 0.0000 & 0.0611 \\
\bottomrule
\end{tabular}
\end{table}



  df_coef = pd.read_html(coef_table, header=0, index_col=0)[0]


# Sector analysis:

### Information Technology:

In [174]:
# 1. Subset the DataFrame to IT only
it_df = final_merged_df[final_merged_df['GICS Sector'] == 'Information Technology'].copy()

# 2. Convert 'Date' to datetime (if not already done)
it_df['Date'] = pd.to_datetime(it_df['Date'], errors='coerce')

# 3. Set the panel index using 'Ticker' and 'Date'
panel_data = it_df.set_index(['Ticker', 'Date'])

# 4. Define the dependent variable (Market Value, proxied by 'Adj Close')
y = panel_data['Log_TobinsQ']

# 5. Define the independent variables as per the model:
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# 6. Add a constant term to X
X = sm.add_constant(X)

# 7. Estimate the PanelOLS model with entity fixed effects
mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# 8. Print the regression summary
print(res.summary)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.2779
Estimator:                   PanelOLS   R-squared (Between):              0.1599
No. Observations:                3211   R-squared (Within):               0.2779
Date:                Sun, Apr 27 2025   R-squared (Overall):              0.1378
Time:                        22:14:51   Log-likelihood                   -999.26
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      71.115
Entities:                          52   P-value                           0.0000
Avg Obs:                       61.750   Distribution:                 F(17,3142)
Min Obs:                       1.0000                                           
Max Obs:                      100.000   F-statistic (robust):             71.105
                            

### Industrials:

In [175]:
# 1. Subset the DataFrame to Industrials only
industrials_df = final_merged_df[final_merged_df['GICS Sector'] == 'Industrials'].copy()

# 2. Convert 'Date' to datetime (if not already done)
industrials_df['Date'] = pd.to_datetime(industrials_df['Date'], errors='coerce')

# 3. Set the panel index using 'Ticker' and 'Date'
panel_data = industrials_df.set_index(['Ticker', 'Date'])

# 4.
y = panel_data['Log_TobinsQ']
X = panel_data[['Debt Ratio','DebtRatio_sq','Debt_EBITDA_Interaction', 'ROE','ROE_sq', 'Asset Tangibility','Industry_Median_Leverage' ,'Size','Size_sq', 'DPR', 'Log_NetIncome_Shifted', 'GDP_Growth_pct', 'VIX_Growth', 'RnDintensity', 'PEratio', 'Interest Rate', 'EBITDA']]

# 5. Add a constant term to X
X = sm.add_constant(X)

# 6. Estimate the PanelOLS model with entity fixed effects
mod = PanelOLS(y, X, entity_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True)

# 7. Print the regression summary
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:            Log_TobinsQ   R-squared:                        0.1749
Estimator:                   PanelOLS   R-squared (Between):             -0.2318
No. Observations:                3776   R-squared (Within):               0.1749
Date:                Sun, Apr 27 2025   R-squared (Overall):             -0.1262
Time:                        22:14:51   Log-likelihood                   -38.379
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      46.007
Entities:                          69   P-value                           0.0000
Avg Obs:                       54.725   Distribution:                 F(17,3690)
Min Obs:                       1.0000                                           
Max Obs:                      100.000   F-statistic (robust):             17.328
                            

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
