In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import numpy as np
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import statsmodels.stats.api as sms
from datetime import datetime
from statsmodels.compat import lzip
from DATA import read_power_data
from DATA import calculate_capture_factors
from DATA import calculate_volatility
from DATA import filter_dates_by_interval

### DATA filtering 

In [2]:
Power_DATA = read_power_data()
energy_sources = ['Wind offshore', 'Wind onshore', 'Solar']
frequency = 'daily' # or 'monthly', 'yearly'
percentage_change_df = calculate_capture_factors(Power_DATA, energy_sources,frequency)
weighted_volatility_reset = calculate_volatility(Power_DATA,frequency=frequency)
ANALYSE_DATA = pd.merge(percentage_change_df, weighted_volatility_reset, on='Date', how='inner')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  capture_factors['Date'] = pd.to_datetime(capture_factors['Date'])


In [None]:
column_name_solar = 'Capture_Factor_Solar'
interval_start = -1
interval_end = 1
date = pd.Timestamp(2023, 1, 1)
ANALYSE_DATA_F = filter_dates_by_interval(ANALYSE_DATA, column_name_solar, interval_start, interval_end, date)
ANALYSE_DATA_F.to_csv('OX_DATA.csv')
ANALYSE_DATA_F

### Plots

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(ANALYSE_DATA_F['Date'], ANALYSE_DATA_F['Capture_Factor_Solar'], marker='o', color='yellow', linestyle='-', label='Solar')
#plt.plot(ANALYSE_DATA_F['Date'], ANALYSE_DATA_F['Capture_Factor_Wind_Offshore'], marker='o', color='blue', linestyle='-', label='MW Wind Offshore')
#plt.plot(ANALYSE_DATA_F['Date'], ANALYSE_DATA_F['Capture_Factor_Wind_Onshore'] , marker='o', color='lightblue', linestyle='-', label='MW Wind Onshore')

plt.title('Capture-factor over Time')
plt.xlabel('Date')
plt.ylabel('Capture-factor')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

plt.plot(ANALYSE_DATA_F['Date'], ANALYSE_DATA_F['Weighted_volatility_adj'], marker='o', color='red', linestyle='-', label='Weighted_volatility')

plt.title('Weighted_volatility over time')
plt.xlabel('Date')
plt.ylabel('Weighted_volatility')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()

### Empirical analysis

In [None]:
# Define the independent variables including the constant
X = sm.add_constant(ANALYSE_DATA_F['Capture_Factor_Solar'])

# Define the dependent variable
y = ANALYSE_DATA_F['Weighted_volatility_adj']

# Fit the regression model
model = sm.RLM(y, X).fit()

# Print the summary of the model
print(model.summary())

In [None]:
# Assuming you have your data in a DataFrame called 'ANALYSE_DATA'

# Define the independent variable including the constant
X = sm.add_constant(ANALYSE_DATA['Capture_Factor_Solar'])

# Define the dependent variable
y = ANALYSE_DATA['Weighted_volatility_adj']

# Fit the robust linear regression model
model = sm.RLM(y, X).fit()

# Print the summary of the model
print(model.summary())

# Residuals
residuals = model.resid


# Perform Breusch-Pagan test for heteroscedasticity
name = ['Lagrange multiplier statistic', 'p-value', 'f-value', 'f p-value']
#test = sm.stats.diagnostic.het_breuschpagan(residuals, X)
#print("Breusch-Pagan Test (LM, p-value, f-value, f p-value):", test)

# Perform Durbin-Watson test for autocorrelation
durbin_watson_statistic = sm.stats.stattools.durbin_watson(residuals)
print("Durbin-Watson Statistic:", durbin_watson_statistic)

# Perform Jarque-Bera test for normality
jarque_bera_test = sm.stats.stattools.jarque_bera(residuals)
#print("Jarque-Bera Test (JB, JB p-value, skewness, kurtosis):", jarque_bera_test)

plt.show()