In [1]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import json
from datetime import datetime

# Import our modules
from opsd_utils import *
from advanced_analytics import (
    EnergyTransitionAnalyzer, 
    EnergyForecastingAnalyzer, 
    EnergyClusteringAnalyzer,
    generate_comprehensive_report
)

warnings.filterwarnings('ignore')
setup_plotting_style()

print("Advanced Energy Analytics Module Loaded")
print("Ready for cutting-edge energy insights!")


Advanced Energy Analytics Module Loaded
Ready for cutting-edge energy insights!


In [2]:
# Load the cleaned dataset
print("Loading OPSD dataset...")
df = load_opsd_data("../data")
print(f"Dataset shape: {df.shape}")
print(f"Time range: {df.index.min()} to {df.index.max()}")
print(f"Countries detected: {len([col for col in df.columns if col.count('_') >= 2])} unique time series")

# Clean and prepare data
print("\nData Cleaning & Feature Engineering...")
df_clean = clean_timestamps(df)
df_filled = fill_missing_values(df_clean)
df_capped = cap_outliers(df_filled)
df_features = create_time_features(df_capped)
df_rolling = create_rolling_features(df_features)
df_final = calculate_renewable_share(df_rolling)

print(f"\nFinal dataset shape: {df_final.shape}")
print(f"Missing values: {df_final.isnull().sum().sum()}")
print(f"Available columns: {len(df_final.columns)}")


Loading OPSD dataset...
Loading main time series file: ../data/opsd-time_series-2020-10-06/time_series_60min_singleindex.csv
Loaded data shape: (50401, 299)
Date range: 2014-12-31 23:00:00+00:00 to 2020-09-30 23:00:00+00:00
Dataset shape: (50401, 299)
Time range: 2014-12-31 23:00:00+00:00 to 2020-09-30 23:00:00+00:00
Countries detected: 299 unique time series

Data Cleaning & Feature Engineering...
Cleaned timestamps. Shape: (50401, 299)
Filled missing values. Long gaps flagged: 50401
Capped 16 outliers in AT_price_day_ahead
Capped 4 outliers in AT_solar_generation_actual
Capped 36 outliers in CH_load_actual_entsoe_transparency
Capped 754 outliers in CH_solar_generation_actual
Capped 57 outliers in CH_wind_onshore_generation_actual
Capped 5 outliers in DE_50hertz_load_actual_entsoe_transparency
Capped 1 outliers in DE_50hertz_load_forecast_entsoe_transparency
Capped 50 outliers in DE_LU_price_day_ahead
Capped 4 outliers in DK_load_actual_entsoe_transparency
Capped 7 outliers in DK_sola

In [None]:
# Generate comprehensive analytical report
print("Generating Comprehensive Advanced Analytics Report...")
print("This includes all cutting-edge energy transition insights...")

# This function runs ALL advanced analytics:
# 1. Duck curve evolution analysis
# 2. Renewable intermittency assessment
# 3. Price-renewable correlation analysis
# 4. Cross-border market integration
# 5. Seasonal pattern analysis
# 6. Anomaly detection & event analysis
# 7. Forecast accuracy assessment
# 8. Load profile clustering
# 9. Executive summary generation

report_path = generate_comprehensive_report(df_final)

print(f"\nCOMPREHENSIVE REPORT GENERATED!")
print(f"Detailed Report: {report_path}")
print(f"Executive Summary: {report_path.replace('.json', '_executive_summary.txt')}")

# Load and display key insights from the report
try:
    with open(report_path, 'r') as f:
        report_data = json.load(f)
    
    print(f"\nANALYSIS OVERVIEW:")
    print(f"   • Time Period: {report_data['dataset_summary']['time_range']}")
    print(f"   • Total Records: {report_data['dataset_summary']['total_records']:,}")
    print(f"   • Countries: {', '.join(report_data['dataset_summary']['countries_analyzed'])}")
    print(f"   • Generated: {report_data['generation_time']}")
    
    # Display executive summary if available
    summary_path = report_path.replace('.json', '_executive_summary.txt')
    if Path(summary_path).exists():
        print(f"\n" + "="*60)
        print("EXECUTIVE SUMMARY")
        print("="*60)
        
        with open(summary_path, 'r') as f:
            print(f.read())
        
        print("="*60)
        
except Exception as e:
    print(f"Error reading report: {e}")

print(f"\nKEY INSIGHTS GENERATED:")
print(f"   Duck Curve: Grid flexibility requirements quantified")
print(f"   Intermittency: Renewable variability assessed")
print(f"   Price Impact: Merit order effects measured")
print(f"   Market Integration: Cross-border efficiency analyzed")
print(f"   Seasonal Patterns: Climate sensitivity evaluated")
print(f"   Anomalies: Extreme events characterized")
print(f"   Forecasts: Model accuracy assessed")
print(f"   Clustering: Load profiles categorized")

print(f"\nSTRATEGIC VALUE:")
print(f"   These insights can guide billions in energy infrastructure investments")
print(f"   Accelerate energy transition with improved grid stability")
print(f"   Enable data-driven policy and investment decisions")
print(f"   Optimize renewable energy integration strategies")


Generating Comprehensive Advanced Analytics Report...
This includes all cutting-edge energy transition insights...
Analyzing dataset: 50,401 records, 877 columns
Countries detected: ['RO', 'SE', 'SI', 'LT', 'CY', 'RS', 'GR', 'GB', 'DE', 'FR', 'ES', 'EE', 'PL', 'LV', 'BE', 'HU', 'UA', 'DK', 'HR', 'NO', 'ME', 'LU', 'SK', 'CH', 'FI', 'BG', 'IE', 'IT', 'NL', 'CZ', 'PT', 'AT']
1/4 Running duck curve evolution analysis...
2/4 Running renewable intermittency analysis...
3/4 Running load profile clustering...
4/4 Running ML demand forecast for DE...
Focused report generated: output/reports/comprehensive_energy_report.json
Executive summary: output/reports/executive_summary.txt

COMPREHENSIVE REPORT GENERATED!
Detailed Report: output/reports/comprehensive_energy_report.json
Executive Summary: output/reports/comprehensive_energy_report_executive_summary.txt

ANALYSIS OVERVIEW:
   • Time Period: 2015-01-01 00:00:00+01:00 to 2020-10-01 01:00:00+02:00
   • Total Records: 50,401
   • Countries: RO, 