# **Estimate the day-ahead merit-order effect of renewable energy for Sweden**

#### **Setting Libraries**

In [1]:
pip install statsmodels --upgrade --pre

Note: you may need to restart the kernel to use updated packages.


In [2]:
import re
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt
import statsmodels.tsa.api as smt

#### **Loading the data**

In [3]:
# Load the pre_panel dataframe
pre_panel = pd.read_csv('pre_panel.csv')

# Sorting by 'entity' and 'time' in descending order
pre_panel = pre_panel.sort_values(by=['subject_id', 'date'], ascending=[True, True])

# Convert the date column to datetime format
pre_panel['date'] = pd.to_datetime(pre_panel['date'], utc=True, format='%Y-%m-%d %H:%M:%S')

#### **Cleaning Database and Creating Dummy and First Difference Variables**

In [4]:
# Rename 'subject_id' to 'entity' and 'date' to 'time'
pre_panel = pre_panel.rename(columns={'subject_id': 'zona', 'date': 'fecha'})

# Create 'dum_ss' and 'dum_aw'
pre_panel['dum_ss'] = pre_panel['mes'].isin([3, 4, 5, 6, 7, 8]).astype(int)
pre_panel['dum_aw'] = pre_panel['mes'].isin([9, 10, 11, 12, 1, 2]).astype(int)

# Create 'dum_wd' and 'dum_we'
pre_panel['dum_wd'] = pre_panel['wd'].isin([1, 2, 3, 4]).astype(int)
pre_panel['dum_we'] = pre_panel['wd'].isin([5, 6, 7]).astype(int)

for col in ['hydro', 'solar', 'wind', 'other', 'load_var']:
    # Calculate the first difference of each variable and add it as a new column in the DataFrame
    pre_panel[col + '_diff'] = pre_panel[col] - pre_panel[col].shift(periods=168)

#### **Creating the Panel**

In [5]:
# Create a definite Pane
panel = pre_panel
# Check for missing values in pre_panel
missing_values = panel.isnull().sum()
# Display the columns with missing values, if any
print("Columns with missing values:")
print(missing_values[missing_values > 0])

Columns with missing values:
load_a             8
hydro             16
solar            192
wind             192
other             16
load_var           8
hydro_diff        32
solar_diff       201
wind_diff        201
other_diff        32
load_var_diff     16
dtype: int64


In [None]:
# Dropping Missing Values
panel = panel.dropna()
# Rename 'subject_id' to 'entity' and 'date' to 'time'
panel = panel.rename(columns={'zona': 'id', 'fecha': 'time'})
# Generating External Index
time_index = pd.Index(pre_panel['fecha'])
entity_index = pd.Index(pre_panel['zona'])

In [6]:
# Seting Multindex
panel.set_index(['time','id'], inplace=True)

## **Descriptive Statistics**

#### **Summary Statistics**

In [19]:
# Assuming 'pre_panel' is your DataFrame
vars = ['price', 'hydro', 'solar', 'wind', 'other', 'load_var']

# Calculate mean, standard deviation, min, 25%, 50%, 75%, and max
mean_values = panel[vars].apply(np.mean)
std_dev_values =panel[vars].apply(np.std)
min_values = panel[vars].min()
q25_values = panel[vars].quantile(0.25)
median_values = panel[vars].median()
q75_values = panel[vars].quantile(0.75)
max_values = panel[vars].max()

# Create a DataFrame to display the summary statistics
summary_stats = pd.DataFrame({
    'Mean': mean_values,
    'Std Dev': std_dev_values,
    'Min': min_values,
    '25%': q25_values,
    'Median': median_values,
    '75%': q75_values,
    'Max': max_values
})

print(summary_stats)

                 Mean      Std Dev      Min     25%   Median      75%      Max
price       74.815475    93.170488   -60.04   17.26    41.01    95.59   799.97
hydro     1919.544068  1690.137853     6.00  297.00  1433.00  3152.00  6577.00
solar       31.841635    85.078865     0.00    0.00     0.00    10.00   745.00
wind       960.981419   873.203355     3.00  313.00   716.00  1331.00  7017.00
other      222.396242   275.562723     0.00   41.00   126.00   255.00  1463.00
load_var    46.064608   234.257160 -6441.00  -76.00    37.00   162.00  1734.00


#### **Correlation Matrix**

In [20]:
correlation_matrix = panel[['price', 'hydro', 'solar', 'wind', 'other', 'load_var']].corr()
print(correlation_matrix)

             price     hydro     solar      wind     other  load_var
price     1.000000 -0.116484  0.053038 -0.309823  0.299406 -0.072351
hydro    -0.116484  1.000000 -0.240234  0.173884 -0.243385  0.062293
solar     0.053038 -0.240234  1.000000 -0.147080  0.151170  0.048625
wind     -0.309823  0.173884 -0.147080  1.000000  0.038889  0.150827
other     0.299406 -0.243385  0.151170  0.038889  1.000000 -0.024736
load_var -0.072351  0.062293  0.048625  0.150827 -0.024736  1.000000


#### **Scatter Plott**

In [24]:
panel['intercept'] = 1

# Estimate the two-way fixed effect model
model = sm.OLS(panel['price'], panel[['intercept', 'load_var', 'hydro', 'solar', 'wind', 'other']])
results = model.fit(index=time_index, groups=entity_index, use_demeaned=True)

# Print the results
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.195
Model:                            OLS   Adj. R-squared:                  0.195
Method:                 Least Squares   F-statistic:                     3245.
Date:                Mon, 18 Dec 2023   Prob (F-statistic):               0.00
Time:                        22:22:09   Log-Likelihood:            -3.9099e+05
No. Observations:               66896   AIC:                         7.820e+05
Df Residuals:                   66890   BIC:                         7.821e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     85.0042      0.698    121.856      0.0

In [35]:
# Fixed effects model with entity and time effects
sm.OLS(panel['price'], panel[['load_var', 'hydro', 'solar', 'wind', 'other']])
FE_results = model.fit(index=time_index, groups=entity_index)
print('Fixed Effects Model:')
print(FE_results.summary())

Fixed Effects Model:
                            OLS Regression Results                            
Dep. Variable:                  price   R-squared:                       0.195
Model:                            OLS   Adj. R-squared:                  0.195
Method:                 Least Squares   F-statistic:                     3245.
Date:                Tue, 19 Dec 2023   Prob (F-statistic):               0.00
Time:                        00:34:59   Log-Likelihood:            -3.9099e+05
No. Observations:               66896   AIC:                         7.820e+05
Df Residuals:                   66890   BIC:                         7.821e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     85.0042      0.69