In [1]:
import pandas as pd
import datetime 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os 
from datetime import datetime
import time
import matplotlib.image as mpimg

In [2]:
# Save current directory
current_directory = os.getcwd()

# Set print options to suppress scientific notation and show 3 decimal places
np.set_printoptions(suppress=True, precision=5)
pd.options.display.float_format = '{:.5f}'.format

# Suppress all warnings globally
import warnings
warnings.filterwarnings("ignore")

In [3]:
file_path = os.path.join(current_directory, 'data_expanded/means_test.csv')
means_test = pd.read_csv(file_path)
file_path = os.path.join(current_directory, 'data_expanded/stds_test.csv')
stds_test = pd.read_csv(file_path)

In [4]:
mean_test = means_test['power_consumption']
std_test = stds_test['power_consumption']

## Prediction accuracy

### Metrics to Evaluate Prediction Accuracy

The prediction accuracy is evaluated using 4 metrics to provide a comprehensive assessment of the model’s performance, balancing overall accuracy, robustness, and sensitivity to extreme deviations.

1. **Root Mean Squared Error (RMSE)**  
   $$
   \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^n (\hat{y}_i - y_i)^2}
   $$
   - Measures the average magnitude of prediction errors, with a higher sensitivity to large deviations due to the squaring of errors.
   - It effectively captures the overall performance of the model but is less robust in the presence of outliers.

2. **Mean Absolute Error (MAE)**  
   $$
   \text{MAE} = \frac{1}{n} \sum_{i=1}^n |\hat{y}_i - y_i|
   $$  
   - Measures the average error magnitude by treating all errors equally.
   - It is robust to extreme values.
  
3. **Maximum Error (ME)**  
   $$
   \text{ME} = \max(|\hat{y}_i - y_i|)
   $$  
   - Highlights the largest deviation, captures the extreme behavior.

  
4. **Mean Absolute Percentage Error (MAPE)**  
   $$
   \text{MAPE} = \frac{100}{n} \sum_{i=1}^n \left| \frac{\hat{y}_i - y_i}{y_i} \right|
   $$  
   - Expresses errors as a percentage, providing relative interpretability
   - It is sensitive to small actual values.

### Persistence Forecast Model

In [7]:
file_path = os.path.join(current_directory, 'results/persistence_forecast.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: '/Users/ginevralarroux/Documents/GitHub/ml-project-2-powerpredictors/results/persistence_forecast.png'

### SARIMA

In [None]:
file_path = os.path.join(current_directory, 'results/sarima.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

### LSTM

In [None]:
file_path = os.path.join(current_directory, 'results/LSTM.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

### LSTM - include forecast

In [None]:
file_path = os.path.join(current_directory, 'results/LSTM_include_forecast.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

### LSTM - expanded dataset

In [None]:
file_path = os.path.join(current_directory, 'results/LSTM_expanded.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

### TCN

In [None]:
file_path = os.path.join(current_directory, 'results/TCN.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

In [None]:
file_path = os.path.join(current_directory, 'results/errors.csv')
errors = pd.read_csv(file_path, index_col=0)

#### Prediction error - Standardized

In [None]:
errors

#### Prediction error - Original scale

In [None]:
errors * std_test.item() + mean_test.item()

#### Prediction error - Normalized 

In [None]:
errors / np.max(errors, axis = 0)

## Prediction uncertainty

### Uncertainty of predictions

The predictions uncertainty is quantified with the metrics PICP and PINAW. 

1. **Prediction Interval Coverage Probability (PICP)**:
   - Measures the percentage of true values captured within the prediction intervals.
   - It should align with the specified confidence level of the prediction intervals (e.g., 95\%)
   - Too low: Intervals miss actual values (underestimating uncertainty).
   - Too high: Intervals are overly wide (too conservative).

2. **Prediction Interval Normalized Average Width (PINAW)**:
   - Quantifies the average width of prediction intervals relative to the range of true values.
   - It should be minimized while ensuring the PICP aligns with the desired confidence level
   - Too narrow: Risk of under-coverage.
   - Too wide: Excessive conservatism, reducing usefulness.

The goal is to balance **PICP** (coverage) and **PINAW** (sharpness) to create reliable and efficient prediction intervals.

### LSTM - Monte Carlo Dropout

In [None]:
file_path = os.path.join(current_directory, 'results/LSTM_MCDropout.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

### TCN - Monte Carlo Dropout

In [None]:
file_path = os.path.join(current_directory, 'results/TCN_MCDropout.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(20, 8)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

In [None]:
file_path = os.path.join(current_directory, 'results/uncertainty_evaluation.csv')
uncertainty_evaluation = pd.read_csv(file_path, index_col=0)
uncertainty_evaluation

#### Normalized

In [None]:
uncertainty_evaluation / np.max(uncertainty_evaluation, axis = 0)

## Residuals analysis

In [None]:
file_path = os.path.join(current_directory, 'results/residuals_plot_LSTM.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(15, 5)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

In [None]:
file_path = os.path.join(current_directory, 'results/residuals_plots_per_hour.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(10, 60)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

In [None]:
file_path = os.path.join(current_directory, 'results/accuracy_per_hour.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(10, 60)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

## Train vs Validation loss 

#### LSTM

In [None]:
file_path = os.path.join(current_directory, 'results/train_vs_validation_loss_LSTM.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(10, 60)) 
plt.imshow(img)
plt.axis('off')  
plt.show()


#### TCN

In [None]:
file_path = os.path.join(current_directory, 'results/train_vs_validation_loss_TCN.png')
img = mpimg.imread(file_path)
plt.figure(figsize=(10, 60)) 
plt.imshow(img)
plt.axis('off')  
plt.show()

## Cost of imbalance 

In [None]:
file_path = os.path.join(current_directory, 'data_augmented/residuals.csv')
residuals = pd.read_csv(file_path, index_col=0)  * std_test.item()

imbalance_price = 0.2 #[euro/kWh]
imbalance_cost_daily = np.mean(np.abs(residuals)) * imbalance_price * 24
imbalance_cost_daily