<a href="https://colab.research.google.com/github/Jmmostafa/QAFMCB-Seminar/blob/main/Data_Analysis_Results_QACBFM_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Preparation

## Necessary installing libraries

In [None]:
# # Setting the working directory
# from google.colab import drive
# # Mount Google Drive
# drive.mount('/content/drive')

In [None]:
# install them before use
!pip install scienceplots --quiet
!pip install PyPDF2 --quiet
!pip install stargazer --quiet

## Importing libraries

In [None]:
# loadin the library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scienceplots
import seaborn as sns
# plt.style.use("ggplot")
# plt.style.use("classic")
# plt.style.use('seaborn-white')
# plt.style.use(['science', 'notebook', 'grid'])


## Loading Data
- Loading the data from the Drive (saved as csv files).
- For analysis, take the latest cleaning sentiment scores as the t-test result shows that there is no significant difference in the word count in the two ways. However, as the last NS_Sentiment_Clean_PP --at first delete the propernouns before making it lowers and drop the stopwords mentioned in the 'nltk' package.

In [None]:
# loading the data from the drive
lm_dict = pd.read_csv("/content/drive/MyDrive/QACFM Data/lm_dictionary_1993-2021.csv")
df_text = pd.read_csv("/content/drive/MyDrive/QACFM Data/df_ps_ng_sentiment_final.csv", index_col ='Date')
df_market = pd.read_csv("/content/drive/MyDrive/QACFM Data/df_market_vars.csv", index_col = 'Date')

# copying the into new df for further analysis
df_text_sent = df_text.copy()
df_market_data = df_market.copy()
df_market_data.index = pd.to_datetime(df_market_data.index)

# See the head of the datasets
df_text_sent.head()
df_market_data.head()

## Preliminary Actions

In [None]:
# Only take the necessary columns of each datasets and make other adjustments

# for the text
df_sent_vars = df_text_sent.iloc[:, list(range(7)) + list(range(14, 19))]
df_sent_vars = pd.DataFrame(df_sent_vars.drop(columns = ['original_text']))
df_sent_vars = df_sent_vars[df_sent_vars.index > '1999-12-01']
df_sent_vars.index = pd.to_datetime(df_sent_vars.index)


# for the market variables
df_mkt_vars = df_market_data.filter(regex='[tTF]')
drop_list = ['UMCSENT','CPILFESL','EPU_PCT','GDP_DFLTR','GDP_DFLTR_PCT','PCT_Export_GDP','EURO_STOXX50','USTB10','EUTB10','FEDTR']
df_mkt_vars = pd.DataFrame(df_mkt_vars.drop(columns = drop_list))

# df_mkt_vars[['FEDFUNDS','ECBMLFR','T10Y3M']].diff()
# df_mkt_vars.info()
# df_sent_vars.info()

### Summary Statistics

In [None]:
import pandas as pd
import warnings
from statsmodels.tsa.stattools import adfuller, kpss

# Alternatively, to suppress all warnings (not recommended for general use):
# warnings.filterwarnings("ignore")

summary_results = []

for column in df_mkt_vars.columns:
    # ADF Test
    adf_result = adfuller(df_mkt_vars[column])
    # KPSS Test
    kpss_result = kpss(df_mkt_vars[column], regression='c')
    # Summary Stats
    summary_stats = df_mkt_vars[column].describe()

    summary_results.append({
        'Series': column,
        'Count': round(summary_stats['count'],0),
        'Min': round(summary_stats['min'],3),
        'Mean': round(summary_stats['mean'],3),
        'Std Dev': round(summary_stats['std'],3),
        'Max': round(summary_stats['max'],3),
        'ADF Statistic': round(adf_result[0],3),
        'ADF p-value': round(adf_result[1],3),
        'KPSS Statistic': round(kpss_result[0],3),
        'KPSS p-value': round(kpss_result[1],3)
        # ... Add other summary stats as needed
    })

summary_results = pd.DataFrame(summary_results)


# as the adf and KPSS p values are less than 10%, means we reject the null hypothesis.
# That means, our time series are not non-stationary (not having any unit-root) rather they are stationary. it makes sense as we have used the percentage rather than real values (built in first difference).

# Function to determine significance
def significance_stars(p_value):
    if p_value < 0.01:
        return '***'
    elif p_value < 0.05:
        return '**'
    elif p_value <= 0.1:
        return '*'
    else:
        return ''

def significance_stars_kpss(p_value):
    if p_value < 0.01:
        return '***'
    elif p_value < 0.05:
        return '**'
    elif p_value <= 0.10:
        return '*'
    else:
        return ''

# Apply the function to each p-value column and create a combined column
columns_to_check_adf = ['ADF p-value']  # Add other columns as needed
columns_to_check_kpss = ['KPSS p-value']  # Add other columns as needed

summary_results['ADF_Sig'] = summary_results[columns_to_check_adf].applymap(significance_stars).agg(''.join, axis=1)
summary_results['KPSS_Sig'] = summary_results[columns_to_check_kpss].applymap(significance_stars_kpss).agg(''.join, axis=1)

summary_results.to_csv('/content/drive/MyDrive/QACFM Data/Summary_Stat.csv', index=False)

# Display the updated DataFrame
summary_results.head(3)

### Checking VIFs (Initially)

In [None]:
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

# Calculating VIF for each variable
vif = pd.DataFrame()
vif["Variable"] = df_mkt_vars.columns
vif["VIF"] = [variance_inflation_factor(df_mkt_vars.values, i) for i in range(df_mkt_vars.shape[1])]
print(vif)

### Difference in Series

In [None]:
import pandas as pd
from scipy.stats import ttest_ind
import numpy as np

# Performing t-test
t_statistic, p_value = ttest_ind(df_sent_vars['NG_Sentiment_org'], df_sent_vars['NG_Sentiment_clean_pp'])

# Display the results
print(f'T-statistic: {t_statistic}')
print(f'P-value: {p_value}')

# Interpret the results
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: There is a significant difference between the two series.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the two series.")


# Plotting Figures

### Comparing Negative Sentiment in Original text and Cleaned one

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300)
# Plot with two y-axes
fig, ax = plt.subplots()
fig.patch.set_facecolor('white')
ax.set_facecolor('white')

# Use the DataFrame's plot method directly for better readability
df_sent_vars['NG_Sentiment_org'].plot(linestyle='-', marker='o', color='blue', label='NG_Sentiment_org', ax=ax)
df_sent_vars['NG_Sentiment_clean_pp'].plot(linestyle='-', marker='*', color='red', label='NG_Sentiment_Clean_pp', secondary_y=True, ax=ax)

# Enable LaTeX rendering
# plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel('Negative Sentiment [Original Text]', color='blue', fontsize=12)
ax.right_ax.set_ylabel('Negative Sentiment [Cleaned Text]', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.3, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.3, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.3, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))


# Customize the grid
ax.grid(True, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/1. Negative_Tone_Fed_Org_Clean_pp.png')

plt.show()


### Positive Tone Sentiment of Original text and Cleaned Text

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300, facecolor='white', edgecolor='none')

# Plot with two y-axes
fig, ax = plt.subplots()

# Use the DataFrame's plot method directly for better readability
df_sent_vars['PS_Sentiment_org'].plot(linestyle='-', marker='o', color='blue', label='PS_Sentiment_org', ax=ax)
df_sent_vars['PS_Sentiment_clean_pp'].plot(linestyle='-', marker='*', color='red', label='PS_Sentiment_Clean_pp', secondary_y=True, ax=ax)

# Enable LaTeX rendering
# plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel('Positive Sentiment [Original Text]', color='blue', fontsize=12)
ax.right_ax.set_ylabel('Positive Sentiment [Cleaned Text]', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.3, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.3, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.3, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))


# Customize the grid
ax.grid(False, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/2. Positive_Tone_Fed_Org_Clean_pp.png')

plt.show()


### Negative and Positive in the Cleaned Text

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300, facecolor='white', edgecolor='none')

# Plot with two y-axes
fig, ax = plt.subplots()

# Use the DataFrame's plot method directly for better readability
df_sent_vars['PS_Sentiment_clean_pp'].plot(linestyle='--', marker='o', color='blue', label='PS_Sentiment_org', ax=ax)
df_sent_vars['NG_Sentiment_clean_pp'].plot(linestyle='-', marker='*', color='red', label='NG_Sentiment_Clean_pp', secondary_y=True, ax=ax)

# Enable LaTeX rendering
# plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel('Positive Sentiment Score', color='blue', fontsize=12)
ax.right_ax.set_ylabel('Negative Sentiment Score', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.4, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.4, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.4, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))


# Customize the grid
ax.grid(False, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/3. Positive_Negative_Tone_Fed_Org_Clean_pp.png')

plt.show()


### Negative Sentiment and Stock market reactions

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300, facecolor='white', edgecolor='none')

# Plot with two y-axes
fig, ax = plt.subplots()

# Use the DataFrame's plot method directly for better readability
np.log(df_market_data[['S&P 500','EURO_STOXX50','DAX30']]).plot(linestyle=':', marker='', color=['darkblue','#008000','black'], label='S&P500 Index', ax=ax)
df_sent_vars['NG_Sentiment_clean_pp'].plot(linestyle='-', marker='o', color='red', label='NG_Sentiment_Clean_pp', secondary_y=True, ax=ax)

# Enable LaTeX rendering
# plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel('Log Distribution of Indices', color='black', fontsize=12)
ax.right_ax.set_ylabel('Negative Sentiment Score', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.4, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.4, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.4, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))


# Customize the grid
ax.grid(False, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Show only the legend for 'Y1'
ax.legend(loc = 'lower right', labels=['S&P 500','EUROSTOXX 50','DAX30'], fontsize = 11, framealpha=0)

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/4. FED_Tone_SnP500_EUROSTOX50_DAX30.png')

plt.show()


### Fed and ECB Rate with Negative Tone Sentiment

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300, facecolor='white', edgecolor='none')

# Plot with two y-axes
fig, ax = plt.subplots()

# Use the DataFrame's plot method directly for better readability
df_mkt_vars[['FEDFUNDS','ECBMLFR']].plot(linestyle='-', marker='.', color=['darkblue','#008000'], ax = ax)
# np.log(df_market_data[['S&P 500','EURO_STOXX50','DAX30']]).plot(linestyle=':', marker='', color=['darkblue','#008000','black'], label='S&P500 Index', ax=ax)
df_sent_vars['NG_Sentiment_clean_pp'].plot(linestyle='-', marker='o', color='red', label='NG_Sentiment_Clean_pp', secondary_y=True, ax=ax)

# Enable LaTeX rendering
# plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel('Fund Rate (%)', color='black', fontsize=12)
ax.right_ax.set_ylabel('Negative Sentiment Score', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.4, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.4, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.4, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))


# Customize the grid
ax.grid(False, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Show only the legend for 'Y1'
ax.legend(loc = 'lower left', labels=['FEDFUNDS','ECBMLFR'], fontsize = 11, framealpha=0)

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/5. Fed_Ecb_Tone_sentiment.png')

plt.show()


### Fed and ECB rate

In [None]:
# import pandas as pd
# import matplotlib.pyplot as plt
# import matplotlib.dates as mdates

# # Assuming df_mkt_vars is your DataFrame with the necessary columns
# plt.figure(figsize=(12, 8), dpi=300, facecolor = 'white', edgecolor='white')
# plt.style.use(['science', 'notebook', 'grid'])

# # Plotting
# df_mkt_vars[['FEDFUNDS','ECBMLFR']].plot(linestyle='-', marker='.', color=['darkblue','#008000'])

# # Set labels
# plt.ylabel('Fund rate (%)', color='black', fontsize=12)
# plt.xlabel("", fontsize=12)

# # Shaded regions
# x_dotcom_b = "2001-03-01"
# x_dotcom_e = "2001-11-30"
# x_gfc_b = "2007-12-01"
# x_gfc_e = "2009-06-30"
# x_covid_b = "2020-02-01"
# x_covid_e = "2020-04-30"

# # Shade regions
# plt.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.4, label='Dotcom')
# plt.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.4, label='GFC')
# plt.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.4, label='COVID-19')

# # Customize the grid
# plt.grid(True, linestyle='', alpha=0)

# # Set x-axis ticks at 3-year intervals
# plt.gca().xaxis.set_major_locator(mdates.YearLocator(base=4))
# plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# # Legend
# plt.legend(loc = 'upper right', labels=['FEDFUNDR','ECBLR'],fontsize = 11, framealpha=0)

# # Reduce font size of tick labels
# plt.tick_params(axis='both', which='major', labelsize=15)

# # Use tight_layout for better spacing
# plt.tight_layout()

# # Saving the graph in the specified directory
# # plt.savefig('/content/drive/MyDrive/QACFM Data/5. FED_ECB_RATE_TONE.png')  # Modify the path as necessary

# plt.show()


# Regression Analysis

### Correlation among the variables

In [None]:
# # Create a correlation heatmap using seaborn
plt.figure(figsize=(12, 10), dpi=300, facecolor='white', edgecolor='none')
sns.set(font_scale=0.9)

# Create a heatmap with a diverging colormap
sns.heatmap(df_mkt_vars.corr(), cmap='Spectral', cbar_kws={'fraction': 0.25}, vmin=-1, vmax=1)

# Use tight_layout for better spacing
plt.tight_layout()

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/6. Correlation heatmap.png')

# Show the plot
plt.show()

### S&P500 return distributions

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Assuming df_mkt_vars is your DataFrame
# df_mkt_vars = ...

# Set the figure size
plt.figure(figsize=(12, 8))

# Plot histogram on a white background
df_mkt_vars["S&P500_PCT"].plot(kind="hist", bins=100, density=True, color='blue', edgecolor='black', facecolor='white', label='Returns [GSPC]', fontsize=12)

# Fit a normal distribution to the data
mu, std = norm.fit(df_mkt_vars["S&P500_PCT"])
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2, color="red", label='$\mu$ = {:.4f}, $\sigma$ = {:.4f}'.format(mu, std))

# Add labels and title
plt.legend(loc="upper left", fontsize=12)
plt.xlabel("Daily Returns (S&P 500)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Return Distributions of GSPC", fontsize=13)

# Customize the grid
plt.grid(True, linestyle='', alpha=0.5)

# Use tight_layout for better spacing
plt.tight_layout()

# Save the figure
plt.savefig('/content/drive/MyDrive/QACFM Data/7. SnP500_returns Distributions.png')

# Show the plot
plt.show()


### Calculating Changes in NG_Delta_Tau and PS_Delta_Phi

In [None]:
# create a dataframe with only event day with explanatory variables
df_mkt_vars[['ECBMLFR_DFF','T10Y3M_DFF']] = df_mkt_vars.loc[:,['ECBMLFR','T10Y3M']].diff()
df_sent_vars['NG_Delta_Tau'] = np.log(df_sent_vars['NG_Sentiment_clean_pp'].div(df_sent_vars['NG_Sentiment_clean_pp'].shift(1)))
df_sent_vars['PS_Delta_Phi'] = np.log(df_sent_vars['PS_Sentiment_clean_pp'].div(df_sent_vars['PS_Sentiment_clean_pp'].shift(1)))
df_reg_vars = pd.concat([df_sent_vars.iloc[:,9:13],df_mkt_vars],axis = 1).dropna()
df_reg_vars.info()

In [None]:
# df_reg_vars['NG_tone_sent_clean_pp'] = np.log(df_reg_vars['NG_Sentiment_clean_pp'].div(df_reg_vars['NG_Sentiment_clean_pp'].shift(1)))
# df_reg_vars['PS_tone_sent_clean_pp'] = np.log(df_reg_vars['PS_Sentiment_clean_pp'].div(df_reg_vars['PS_Sentiment_clean_pp'].shift(1)))

# df_reg_vars.dropna(inplace = True)
# df_reg_vars.head()

### Summary Stats for the updated data

In [None]:
import pandas as pd
import warnings
from statsmodels.tsa.stattools import adfuller, kpss


# Alternatively, to suppress all warnings (not recommended for general use):
# warnings.filterwarnings("ignore")


summary_results_1 = []

for column in df_reg_vars.columns:
    # ADF Test
    adf_result = adfuller(df_reg_vars[column])
    # KPSS Test
    kpss_result = kpss(df_reg_vars[column], regression='c')
    # Summary Stats
    summary_stats = df_reg_vars[column].describe()

    summary_results_1.append({
        'Series': column,
        'Count': round(summary_stats['count'],0),
        'Min': round(summary_stats['min'],3),
        'Mean': round(summary_stats['mean'],3),
        'Std Dev': round(summary_stats['std'],3),
        'Max': round(summary_stats['max'],3),
        'ADF Statistic': round(adf_result[0],3),
        'ADF p-value': round(adf_result[1],3),
        'KPSS Statistic': round(kpss_result[0],3),
        'KPSS p-value': round(kpss_result[1],3)
        # ... Add other summary stats as needed
    })

summary_results_1 = pd.DataFrame(summary_results_1)


# as the adf and KPSS p values are less than 10%, means we reject the null hypothesis.
# That means, our time series are not non-stationary (not having any unit-root) rather they are stationary. it makes sense as we have used the percentage rather than real values (built in first difference).

# Function to determine significance
def significance_stars(p_value):
    if p_value < 0.01:
        return '***'
    elif p_value < 0.05:
        return '**'
    elif p_value <= 0.1:
        return '*'
    else:
        return ''

def significance_stars_kpss(p_value):
    if p_value < 0.01:
        return '***'
    elif p_value < 0.05:
        return '**'
    elif p_value <= 0.10:
        return '*'
    else:
        return ''

# Apply the function to each p-value column and create a combined column
columns_to_check_adf = ['ADF p-value']  # Add other columns as needed
columns_to_check_kpss = ['KPSS p-value']  # Add other columns as needed

summary_results_1['ADF_Sig'] = summary_results_1[columns_to_check_adf].applymap(significance_stars).agg(''.join, axis=1)
summary_results_1['KPSS_Sig'] = summary_results_1[columns_to_check_kpss].applymap(significance_stars_kpss).agg(''.join, axis=1)

summary_results_1.to_csv('/content/drive/MyDrive/QACFM Data/Summary_Stat_1.csv', index=False)

# Display the updated DataFrame
summary_results_1.head(3)

### VIF values for updated data

In [None]:
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Assuming you have a DataFrame named 'df_reg_vars'

# Specify the columns to exclude (0-based indices)
exclude_columns = [0,1,4,5,8]

# Get the remaining columns for VIF calculation
selected_columns = df_reg_vars.columns[~df_reg_vars.columns.isin(df_reg_vars.columns[exclude_columns])]

# Calculate VIF for each selected column
vif_data = pd.DataFrame()
vif_data["Variable"] = selected_columns
vif_data["VIF"] = [variance_inflation_factor(df_reg_vars[selected_columns].values, i) for i in range(len(selected_columns))]

print(vif_data)


### Model 1: EVENT DAY models using different controls: Without Interaction terms

### Plotting Evolutions of $\delta\tau$ & $\delta\phi$

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Assuming df_sent_vars is your DataFrame with the necessary columns

plt.style.use(['science', 'notebook', 'grid'])
plt.figure(figsize=(12, 8), dpi=300, facecolor='white')

# Plot with two y-axes
fig, ax = plt.subplots()

# Use the DataFrame's plot method directly for better readability
df_reg_vars[['PS_Delta_Phi']].plot(linestyle='--', marker='.', color='blue', ax=ax)
df_reg_vars[['NG_Delta_Tau']].plot(linestyle='-', marker='.', color='red', secondary_y=True, ax=ax)

# Enable LaTeX rendering
plt.rcParams['text.usetex'] = True

# Set labels for both y-axes
ax.set_ylabel(r'Changes in Postive Sentiment ($\delta\phi$)', color='blue', fontsize=12)
ax.right_ax.set_ylabel(r'Changes in Negative Sentiment ($\delta\tau$)', fontsize=12, color='red')
ax.set_xlabel("", fontsize=12)


# shaded regions
x_dotcom_b = "2001-03-01"
x_dotcom_e = "2001-11-30"
x_gfc_b = "2007-12-01"
x_gfc_e = "2009-06-30"
x_covid_b = "2020-02-01"
x_covid_e = "2020-04-30"

# Shade regions
ax.axvspan(x_dotcom_b, x_dotcom_e, facecolor='gray', alpha=0.4, label='Dotcom')
ax.axvspan(x_gfc_b, x_gfc_e, facecolor='gray', alpha=0.4, label='GFC')
ax.axvspan(x_covid_b, x_covid_e, facecolor='gray', alpha=0.4, label='COVID-19')
# Add LaTeX text annotations
# ax.text('2001-03-01', 0.976, r'DotCom', fontsize=10, ha='center', va='center', bbox=dict(facecolor='none', edgecolor='none', boxstyle='round,pad=0.5'))

# Hide the legend
ax.legend().set_visible(False)

# Customize the grid
ax.grid(True, linestyle='', alpha=0.8)

# Set x-axis ticks at 3-year intervals
ax.xaxis.set_major_locator(mdates.YearLocator(base=4))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Adjust the format as needed

# Reduce font size of tick labels on both axes
ax.tick_params(axis='both', which='both', labelsize=15)
ax.right_ax.tick_params(axis='both', which='both', labelsize=15)


# Use tight_layout for better spacing
plt.tight_layout()

# Set the background color of the entire figure
ax.patch.set_facecolor('white')
ax.spines['top'].set_visible(True)
ax.spines['top'].set_visible(True)

# saving the graph in the Drive folder
plt.savefig('/content/drive/MyDrive/QACFM Data/6. NG_Tau_PS_phi.png')

plt.show()


### Models 01: $\delta\tau$ and $R_t$

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt


# y-variables along
y1_US =df_reg_vars['S&P500_PCT']
y2_EU =df_reg_vars['EURO_STOXX50_PCT']

# x-variables along with the models (model 1)
X1_policy = df_reg_vars.loc[:,['NG_Delta_Tau','FEDFUNDS','ECBMLFR_DFF', 'MSIMZMP_PCT']]

# modeling the regression
X1_policy = sm.add_constant(X1_policy)
# Fit the OLS model
model1 = sm.OLS(y1_US, X1_policy).fit()
# print(model1.summary())


# ----------------------------------------------

# model 2
X2_macs = df_reg_vars.loc[:,['NG_Delta_Tau','GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT']]

# modeling the regression
X2_macs = sm.add_constant(X2_macs)
# Fit the OLS model
model2 = sm.OLS(y1_US, X2_macs).fit()
# print(model2.summary())

# ----------------------------------------------


# model 3
X3_exts = df_reg_vars.loc[:,['NG_Delta_Tau','USD_EURO_PCT','EUTB10_PCT','EURO_STOXX50_PCT','DAX30_PCT']]

# modeling the regression
X3_exts = sm.add_constant(X3_exts)
# Fit the OLS model
model3 = sm.OLS(y1_US, X3_exts).fit()
# print(model3.summary())


# -------------------------------------------------

# model 4
X4_vol = df_reg_vars.loc[:,['NG_Delta_Tau','USTB10_PCT','T10Y3M_DFF','VIXCLS_PCT']]

# modeling the regression
X4_vol = sm.add_constant(X4_vol)
# Fit the OLS model
model4 = sm.OLS(y1_US, X4_vol).fit()
# print(model4.summary())


# ----------------------------------------------------

# model 5
X5_conf = df_reg_vars.loc[:,['NG_Delta_Tau','CCI_PCT','EPI_PCT']]

# modeling the regression
X5_conf = sm.add_constant(X5_conf)
# Fit the OLS model
model5 = sm.OLS(y1_US, X5_conf).fit()
# print(model5.summary())


# ----------------------------------------------------

# model 6
X6_epu = df_reg_vars.loc[:,['NG_Delta_Tau','USEPUINDXD_PCT','UMCSENT_PCT']]

# modeling the regression
X6_epu = sm.add_constant(X6_epu)
# Fit the OLS model
model6 = sm.OLS(y1_US, X6_epu).fit()
# print(model6.summary())

# -------------------------------------------------------

# model 7
X7_all = df_reg_vars.loc[:,['NG_Delta_Tau', 'MSIMZMP_PCT', 'FEDFUNDS','ECBMLFR_DFF',
                            'GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT',
                            'USD_EURO_PCT','EUTB10_PCT', 'EURO_STOXX50_PCT','DAX30_PCT',
                            'USTB10_PCT','VIXCLS_PCT',  'T10Y3M_DFF',
                            'CCI_PCT','EPI_PCT',
                            'USEPUINDXD_PCT','UMCSENT_PCT']]

# modeling the regression
X7_all = sm.add_constant(X7_all)
# Fit the OLS model
model7 = sm.OLS(y1_US, X7_all).fit()
# print(model7.summary())

# ----------------------------------------------------

# model 8
X8_tone = df_reg_vars.loc[:,['NG_Delta_Tau']]

# modeling the regression
X8_tone = sm.add_constant(X8_tone)
# Fit the OLS model
model8 = sm.OLS(y1_US, X8_tone).fit()
# print(model8.summary())

# ----------------------------------------------------

# printing the results in more cleaner way
from IPython.core.display import HTML
from stargazer.stargazer import Stargazer

# Assuming model1, model2, ..., model7 are your regression models
stargazer = Stargazer([model1,model2,model3,model4,model5,model6, model7,model8])

# Display the HTML table in Jupyter Notebook
HTML(stargazer.render_html())

# Save the HTML to a file
with open('regression_results_NG_01.html', 'w') as file:
    file.write(stargazer.render_html())



### Model 02: $\delta\phi$ and $R_t$

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt


# y-variables along
y1_US =df_reg_vars['S&P500_PCT']
y2_EU =df_reg_vars['EURO_STOXX50_PCT']

# x-variables along with the models (model 1)
X1_policy = df_reg_vars.loc[:,['PS_Delta_Phi','FEDFUNDS','ECBMLFR_DFF', 'MSIMZMP_PCT']]

# modeling the regression
X1_policy = sm.add_constant(X1_policy)
# Fit the OLS model
model1 = sm.OLS(y1_US, X1_policy).fit()
# print(model1.summary())


# ----------------------------------------------

# model 2
X2_macs = df_reg_vars.loc[:,['PS_Delta_Phi','GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT']]

# modeling the regression
X2_macs = sm.add_constant(X2_macs)
# Fit the OLS model
model2 = sm.OLS(y1_US, X2_macs).fit()
# print(model2.summary())

# ----------------------------------------------


# model 3
X3_exts = df_reg_vars.loc[:,['PS_Delta_Phi','USD_EURO_PCT','EUTB10_PCT','EURO_STOXX50_PCT','DAX30_PCT']]

# modeling the regression
X3_exts = sm.add_constant(X3_exts)
# Fit the OLS model
model3 = sm.OLS(y1_US, X3_exts).fit()
# print(model3.summary())


# -------------------------------------------------

# model 4
X4_vol = df_reg_vars.loc[:,['PS_Delta_Phi','USTB10_PCT','T10Y3M_DFF','VIXCLS_PCT']]

# modeling the regression
X4_vol = sm.add_constant(X4_vol)
# Fit the OLS model
model4 = sm.OLS(y1_US, X4_vol).fit()
# print(model4.summary())


# ----------------------------------------------------

# model 5
X5_conf = df_reg_vars.loc[:,['PS_Delta_Phi','CCI_PCT','EPI_PCT']]

# modeling the regression
X5_conf = sm.add_constant(X5_conf)
# Fit the OLS model
model5 = sm.OLS(y1_US, X5_conf).fit()
# print(model5.summary())


# ----------------------------------------------------

# model 6
X6_epu = df_reg_vars.loc[:,['PS_Delta_Phi','USEPUINDXD_PCT','UMCSENT_PCT']]

# modeling the regression
X6_epu = sm.add_constant(X6_epu)
# Fit the OLS model
model6 = sm.OLS(y1_US, X6_epu).fit()
# print(model6.summary())

# -------------------------------------------------------

# model 7
X7_all = df_reg_vars.loc[:,['PS_Delta_Phi','FEDFUNDS','ECBMLFR_DFF', 'MSIMZMP_PCT',
                            'GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT',
                            'USD_EURO_PCT','EUTB10_PCT','EURO_STOXX50_PCT','DAX30_PCT',
                            'USTB10_PCT','T10Y3M_DFF','VIXCLS_PCT',
                            'CCI_PCT','EPI_PCT',
                            'USEPUINDXD_PCT','UMCSENT_PCT']]

# modeling the regression
X7_all = sm.add_constant(X7_all)
# Fit the OLS model
model7 = sm.OLS(y1_US, X7_all).fit()
# print(model7.summary())


# ----------------------------------------------------

# model 8
X8_tone = df_reg_vars.loc[:,['PS_Delta_Phi']]

# modeling the regression
X8_tone = sm.add_constant(X8_tone)
# Fit the OLS model
model8 = sm.OLS(y1_US, X8_tone).fit()
# print(model8.summary())

# ----------------------------------------------------

# printing the results in more cleaner way
from IPython.core.display import HTML
from stargazer.stargazer import Stargazer

# Assuming model1, model2, ..., model7 are your regression models
stargazer = Stargazer([model1,model2,model3,model4,model5,model6, model7,model8])

# Display the HTML table in Jupyter Notebook
HTML(stargazer.render_html())

# Save the HTML to a file
with open('regression_results_PS_02.html', 'w') as file:
    file.write(stargazer.render_html())



#### Model 3: $\delta\tau$ and $R_t$ Including the interaction Terms

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt


# y-variables along
y1_US =df_reg_vars['S&P500_PCT']
y2_EU =df_reg_vars['EURO_STOXX50_PCT']

# x-variables along with the models (model 1)
X1_policy = df_reg_vars.loc[:,['NG_Delta_Tau','FEDFUNDS','ECBMLFR_DFF', 'MSIMZMP_PCT']]
X1_policy['NGT_FED'] = X1_policy['NG_Delta_Tau']*X1_policy['FEDFUNDS']
X1_policy['NGT_ECB'] = X1_policy['NG_Delta_Tau']*X1_policy['ECBMLFR_DFF']
X1_policy['NGT_MSI'] = X1_policy['NG_Delta_Tau']*X1_policy['MSIMZMP_PCT']

# modeling the regression
X1_policy = sm.add_constant(X1_policy)
# Fit the OLS model
model1 = sm.OLS(y1_US, X1_policy).fit()
print(model1.summary())

# model 2
X2_macs = df_reg_vars.loc[:,['NG_Delta_Tau','GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT']]
X2_macs['NGT_CPI'] = X2_macs['NG_Delta_Tau']*X2_macs['CPIAUCSL_PCT']
X2_macs['NGT_PPI'] = X2_macs['NG_Delta_Tau']*X2_macs['PPIACO_PCT']
X2_macs['NGT_GDP'] = X2_macs['NG_Delta_Tau']*X2_macs['GDP_GROWTH']
X2_macs['NGT_UNR'] = X2_macs['NG_Delta_Tau']*X2_macs['UNRATE']

# modeling the regression
X2_macs = sm.add_constant(X2_macs)
# Fit the OLS model
model2 = sm.OLS(y1_US, X2_macs).fit()
print(model2.summary())

# model 3
X3_exts = df_reg_vars.loc[:,['NG_Delta_Tau','USD_EURO_PCT','EUTB10_PCT','EURO_STOXX50_PCT','DAX30_PCT']]
X3_exts['NGT_USDEURO'] = X3_exts['NG_Delta_Tau']*X3_exts['USD_EURO_PCT']
X3_exts['NGT_EUT10'] = X3_exts['NG_Delta_Tau']*X3_exts['EUTB10_PCT']
X3_exts['NGT_EUSTM'] = X3_exts['NG_Delta_Tau']*X3_exts['EURO_STOXX50_PCT']
X3_exts['NGT_DAX'] = X3_exts['NG_Delta_Tau']*X3_exts['DAX30_PCT']

# modeling the regression
X3_exts = sm.add_constant(X3_exts)
# Fit the OLS model
model3 = sm.OLS(y1_US, X3_exts).fit()
print(model3.summary())

# model 4
X4_vol = df_reg_vars.loc[:,['NG_Delta_Tau','USTB10_PCT','T10Y3M_DFF','VIXCLS_PCT']]
X4_vol['NGT_YSP'] = X4_vol['NG_Delta_Tau']*X4_vol['T10Y3M_DFF']
X4_vol['NGT_VIX'] = X4_vol['NG_Delta_Tau']*X4_vol['VIXCLS_PCT']
X4_vol['NGT_BVOL'] = X4_vol['NG_Delta_Tau']*X4_vol['USTB10_PCT']

# modeling the regression
X4_vol = sm.add_constant(X4_vol)
# Fit the OLS model
model4 = sm.OLS(y1_US, X4_vol).fit()
# print(model4.summary())

# model 5
X5_conf = df_reg_vars.loc[:,['NG_Delta_Tau','CCI_PCT','EPI_PCT']]
X5_conf['NGT_CCONF'] = X5_conf['NG_Delta_Tau']*X5_conf['CCI_PCT']
X5_conf['NGT_ECONF'] = X5_conf['NG_Delta_Tau']*X5_conf['EPI_PCT']

# modeling the regression
X5_conf = sm.add_constant(X5_conf)
# Fit the OLS model
model5 = sm.OLS(y1_US, X5_conf).fit()
# print(model5.summary())

# model 6
X6_epu = df_reg_vars.loc[:,['NG_Delta_Tau','USEPUINDXD_PCT','UMCSENT_PCT']]
X6_epu['NGT_EPU'] = X6_epu['NG_Delta_Tau']*X6_epu['USEPUINDXD_PCT']
X6_epu['NGT_ESENT'] = X6_epu['NG_Delta_Tau']*X6_epu['UMCSENT_PCT']

# modeling the regression
X6_epu = sm.add_constant(X6_epu)
# Fit the OLS model
model6 = sm.OLS(y1_US, X6_epu).fit()
# print(model6.summary())

# printing the results in more cleaner way
from IPython.core.display import HTML
from stargazer.stargazer import Stargazer

# Assuming model1, model2, ..., model6 are your regression models
stargazer = Stargazer([model1,model2,model3,model4,model5,model6])

# Display the HTML table in Jupyter Notebook
HTML(stargazer.render_html())

# Save the HTML to a file
with open('regression_results_NG_Interact_01.html', 'w') as file:
    file.write(stargazer.render_html())


#### Model 4: $\delta\phi$ and $R_t$ Including the interaction Terms

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt


# y-variables along
y1_US =df_reg_vars['S&P500_PCT']
y2_EU =df_reg_vars['EURO_STOXX50_PCT']

# x-variables along with the models (model 1)
X1_policy = df_reg_vars.loc[:,['PS_Delta_Phi','FEDFUNDS','ECBMLFR_DFF', 'MSIMZMP_PCT']]
X1_policy['NGT_FED'] = X1_policy['PS_Delta_Phi']*X1_policy['FEDFUNDS']
X1_policy['NGT_ECB'] = X1_policy['PS_Delta_Phi']*X1_policy['ECBMLFR_DFF']
X1_policy['NGT_MSI'] = X1_policy['PS_Delta_Phi']*X1_policy['MSIMZMP_PCT']

# modeling the regression
X1_policy = sm.add_constant(X1_policy)
# Fit the OLS model
model1 = sm.OLS(y1_US, X1_policy).fit()
print(model1.summary())

# model 2
X2_macs = df_reg_vars.loc[:,['PS_Delta_Phi','GDP_GROWTH','UNRATE','CPIAUCSL_PCT','Export_YOY','PPIACO_PCT','CRUDE_OIL_PCT','GOLD_PCT']]
X2_macs['NGT_CPI'] = X2_macs['PS_Delta_Phi']*X2_macs['CPIAUCSL_PCT']
X2_macs['NGT_PPI'] = X2_macs['PS_Delta_Phi']*X2_macs['PPIACO_PCT']
X2_macs['NGT_GDP'] = X2_macs['PS_Delta_Phi']*X2_macs['GDP_GROWTH']
X2_macs['NGT_UNR'] = X2_macs['PS_Delta_Phi']*X2_macs['UNRATE']

# modeling the regression
X2_macs = sm.add_constant(X2_macs)
# Fit the OLS model
model2 = sm.OLS(y1_US, X2_macs).fit()
print(model2.summary())

# model 3
X3_exts = df_reg_vars.loc[:,['PS_Delta_Phi','USD_EURO_PCT','EUTB10_PCT','EURO_STOXX50_PCT','DAX30_PCT']]
X3_exts['NGT_USDEURO'] = X3_exts['PS_Delta_Phi']*X3_exts['USD_EURO_PCT']
X3_exts['NGT_EUT10'] = X3_exts['PS_Delta_Phi']*X3_exts['EUTB10_PCT']
X3_exts['NGT_EUSTM'] = X3_exts['PS_Delta_Phi']*X3_exts['EURO_STOXX50_PCT']
X3_exts['NGT_DAX'] = X3_exts['PS_Delta_Phi']*X3_exts['DAX30_PCT']

# modeling the regression
X3_exts = sm.add_constant(X3_exts)
# Fit the OLS model
model3 = sm.OLS(y1_US, X3_exts).fit()
print(model3.summary())

# model 4
X4_vol = df_reg_vars.loc[:,['PS_Delta_Phi','USTB10_PCT','T10Y3M_DFF','VIXCLS_PCT']]
X4_vol['NGT_YSP'] = X4_vol['PS_Delta_Phi']*X4_vol['T10Y3M_DFF']
X4_vol['NGT_VIX'] = X4_vol['PS_Delta_Phi']*X4_vol['VIXCLS_PCT']
X4_vol['NGT_BVOL'] = X4_vol['PS_Delta_Phi']*X4_vol['USTB10_PCT']

# modeling the regression
X4_vol = sm.add_constant(X4_vol)
# Fit the OLS model
model4 = sm.OLS(y1_US, X4_vol).fit()
# print(model4.summary())

# model 5
X5_conf = df_reg_vars.loc[:,['PS_Delta_Phi','CCI_PCT','EPI_PCT']]
X5_conf['NGT_CCONF'] = X5_conf['PS_Delta_Phi']*X5_conf['CCI_PCT']
X5_conf['NGT_ECONF'] = X5_conf['PS_Delta_Phi']*X5_conf['EPI_PCT']

# modeling the regression
X5_conf = sm.add_constant(X5_conf)
# Fit the OLS model
model5 = sm.OLS(y1_US, X5_conf).fit()
# print(model5.summary())

# model 6
X6_epu = df_reg_vars.loc[:,['PS_Delta_Phi','USEPUINDXD_PCT','UMCSENT_PCT']]
X6_epu['NGT_EPU'] = X6_epu['PS_Delta_Phi']*X6_epu['USEPUINDXD_PCT']
X6_epu['NGT_ESENT'] = X6_epu['PS_Delta_Phi']*X6_epu['UMCSENT_PCT']

# modeling the regression
X6_epu = sm.add_constant(X6_epu)
# Fit the OLS model
model6 = sm.OLS(y1_US, X6_epu).fit()
# print(model6.summary())


# printing the results in more cleaner way
from IPython.core.display import HTML
from stargazer.stargazer import Stargazer

# Assuming model1, model2, ..., model6 are your regression models
stargazer = Stargazer([model1,model2,model3,model4,model5,model6])

# Display the HTML table in Jupyter Notebook
HTML(stargazer.render_html())

# Save the HTML to a file
with open('regression_results_PS_Interact_02.html', 'w') as file:
    file.write(stargazer.render_html())
