<a href="https://colab.research.google.com/github/AnastasiiaVoll/-Geopolitics-of-Renewable-Energy-time-varying-interactions-between-geopolitical-risk-and-renewable/blob/main/Question_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
pip install pandas statsmodels



In [51]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import t as t_dist
from scipy import stats
import statsmodels.api as sm

# Load data
df = pd.read_csv('EGCPrices.csv')

In [52]:
# Create the logarithm of the coal, gas, and electricity price
df['pc_t'] = np.log(df['P_Coal'])
df['pg_t'] = np.log(df['P_Gas'])
df['pe_t'] = np.log(df['P_Ele'])

# Create lagged variables
df['pc_t-1'] = df['pc_t'].shift(1)
df['pg_t-1'] = df['pg_t'].shift(1)
df['pe_t-1'] = df['pe_t'].shift(1)

In [53]:
# Filter data between February 1st, 2015 and December 31st, 2019
start_date = "2015-02-01"
end_date = "2019-12-31"
mask = (pd.to_datetime(df[['year', 'month', 'day']]) >= start_date) & (pd.to_datetime(df[['year', 'month', 'day']]) <= end_date)
df_filtered = df[mask].copy()

# Drop missing values (due to lag creation)
df_filtered = df_filtered.dropna()

In [54]:
## (a1)
# Define independent and dependent variables
X = df_filtered[['pg_t-1', 'pe_t', 'pe_t-1', 'pc_t', 'pc_t-1']]
X = sm.add_constant(X)
y = df_filtered['pg_t']

# Fit the regression model
model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   pg_t   R-squared:                       0.977
Model:                            OLS   Adj. R-squared:                  0.977
Method:                 Least Squares   F-statistic:                 1.512e+04
Date:                Thu, 28 Sep 2023   Prob (F-statistic):               0.00
Time:                        17:38:00   Log-Likelihood:                 3218.0
No. Observations:                1795   AIC:                            -6424.
Df Residuals:                    1789   BIC:                            -6391.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0059      0.017     -0.336      0.7

In [55]:
## (a2)
# Compute t statistic for the hypothesis that beta4 equals -beta5
beta4 = model.params['pc_t']
beta5 = model.params['pc_t-1']
var_beta4 = model.cov_params().loc['pc_t', 'pc_t']
var_beta5 = model.cov_params().loc['pc_t-1', 'pc_t-1']
cov_beta4_beta5 = model.cov_params().loc['pc_t', 'pc_t-1']

t_stat = (beta4 + beta5) / np.sqrt(var_beta4 + var_beta5 + 2 * cov_beta4_beta5)
print(f"T statistic: {t_stat}")

## (a2)
# Exact test
df = len(df_filtered) - 6  # subtracting the number of parameters, including the intercept
p_value_exact = 2 * (1 - t_dist.cdf(np.abs(t_stat), df))
print(f"P-value (exact test): {p_value_exact}")


T statistic: 3.20400911686291
P-value (exact test): 0.0013791465533374847


In [56]:
##(a3)
n_bootstrap = 1000
bootstrap_t_stats = []

residuals = model.resid

for _ in range(n_bootstrap):
    # Sample residuals with replacement
    residuals_resample = np.random.choice(residuals, size=len(residuals))
    y_resample = model.predict(X) + residuals_resample

    model_resample = sm.OLS(y_resample, X).fit()

    beta4_resample = model_resample.params['pc_t']
    beta5_resample = model_resample.params['pc_t-1']
    var_beta4_resample = model_resample.cov_params().loc['pc_t', 'pc_t']
    var_beta5_resample = model_resample.cov_params().loc['pc_t-1', 'pc_t-1']
    cov_beta4_beta5_resample = model_resample.cov_params().loc['pc_t', 'pc_t-1']

    t_stat_resample = (beta4_resample + beta5_resample) / np.sqrt(var_beta4_resample + var_beta5_resample + 2 * cov_beta4_beta5_resample)
    bootstrap_t_stats.append(t_stat_resample)

# Computing the results
p_value_bootstrap = np.mean(np.abs(bootstrap_t_stats) > np.abs(t_stat))
critical_value_90 = np.percentile(bootstrap_t_stats, [5, 95])  # for two-tailed test at alpha = 0.10
mean_t_stat_bootstrap = np.mean(bootstrap_t_stats)
std_t_stat_bootstrap = np.std(bootstrap_t_stats)

# Printing out the results
print(f"Mean T-statistic (bootstrap): {mean_t_stat_bootstrap}")
print(f"Standard Deviation of T-statistic (bootstrap): {std_t_stat_bootstrap}")
print(f"P-value (bootstrap): {p_value_bootstrap}")
print(f"Critical Values for 90% Confidence (bootstrap): {critical_value_90}")
print(f"Original T-statistic: {t_stat}")


Mean T-statistic (bootstrap): 3.3222160261649414
Standard Deviation of T-statistic (bootstrap): 1.1903381804894109
P-value (bootstrap): 0.542
Critical Values for 90% Confidence (bootstrap): [1.38410547 5.23939097]
Original T-statistic: 3.20400911686291


In [57]:
## (b1)
# Create year dummies
df_filtered['year'] = df_filtered['year'].astype(str)
year_dummies = pd.get_dummies(df_filtered['year'], prefix='year', drop_first=True)

# Extend the dataset with year dummies
X_extended = pd.concat([X, year_dummies], axis=1)

# Estimate the extended model
model_extended = sm.OLS(Y, X_extended).fit()
print(model_extended.summary())


                            OLS Regression Results                            
Dep. Variable:                   pg_t   R-squared:                       0.977
Model:                            OLS   Adj. R-squared:                  0.977
Method:                 Least Squares   F-statistic:                     8546.
Date:                Thu, 28 Sep 2023   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                 3234.9
No. Observations:                1795   AIC:                            -6450.
Df Residuals:                    1785   BIC:                            -6395.
Df Model:                           9                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0287      0.028     -1.029      0.3

In [58]:
## (b2)
n_bootstrap = 1000
f_statistics = []

residuals_extended = model_extended.resid
y_fitted_extended = model_extended.fittedvalues

for _ in range(n_bootstrap):
    # Sample residuals with replacement
    residuals_resample = np.random.choice(residuals_extended, size=len(residuals_extended))
    y_resample = y_fitted_extended + residuals_resample

    # Refit models on resampled data
    model_original_resample = sm.OLS(y_resample, X).fit()
    model_extended_resample = sm.OLS(y_resample, X_extended).fit()

    # Calculate F-statistic
    rss_restricted = sum(model_original_resample.resid**2)
    rss_unrestricted = sum(model_extended_resample.resid**2)
    df_restricted = len(Y) - X.shape[1]
    df_unrestricted = len(Y) - X_extended.shape[1]
    f_stat = ((rss_restricted - rss_unrestricted) / (df_restricted - df_unrestricted)) / (rss_unrestricted / df_unrestricted)
    f_statistics.append(f_stat)

# Calculate p-value from F-statistics
p_value_f_test = np.mean(np.array(f_statistics) >= model_extended.fvalue)
print(f"P-value from parametric bootstrap F-test: {p_value_f_test}")



P-value from parametric bootstrap F-test: 0.0
