In [1]:
from src.data.build_panel import build_main_panel

print("Building 2-year panel...")
print("This will take 5-10 minutes...")

panel = build_main_panel(
    start_date='2022-01-01',
    end_date='2023-12-31',
    countries=['ES', 'PT']
)

print(f"\n‚úÖ Panel complete!")
print(f"   Total rows: {len(panel):,}")
print(f"   Date range: {panel['timestamp'].min()} to {panel['timestamp'].max()}")
print(f"   Countries: {sorted(panel['country'].unique())}")

# Check data quality
missing_prices = panel['price_eur_mwh'].isna().sum()
print(f"   Missing prices: {missing_prices} / {len(panel)} ({missing_prices/len(panel)*100:.2f}%)")

# Check pre/during split
pre = panel[~panel['is_iberian_exception']]
during = panel[panel['is_iberian_exception']]

print(f"\nüìä Period breakdown:")
print(f"   Pre-exception: {len(pre):,} rows")
print(f"   During exception: {len(during):,} rows")

INFO:src.data.build_panel:BUILDING MAIN ANALYSIS PANEL
INFO:src.data.build_panel:1. Creating hour index...
INFO:src.utils.timezone_utils:Created hour index: 17,520 hours from 2022-01-01 to 2023-12-31
INFO:src.data.build_panel:2. Loading prices...
INFO:src.data.build_panel:Building price panel...
INFO:src.data.build_panel:Price panel: 0 rows, 0 countries
INFO:src.data.build_panel:3. Loading weather...
INFO:src.data.build_panel:Building weather panel...
INFO:src.data.build_panel:Weather panel: 0 rows
INFO:src.data.build_panel:4. Loading cross-border flows...
INFO:src.data.build_panel:Building flows panel...
INFO:src.data.build_panel:Flows panel: 0 rows, 0 flow pairs
INFO:src.data.build_panel:5. Merging all data sources...


Building 2-year panel...
This will take 5-10 minutes...
DEBUG - Price panel loaded:
  Total rows: 0
  Countries: []
  Rows per country: Series([], dtype: int64)

Sample data:
Empty DataFrame
Columns: [timestamp, country, price_eur_mwh]
Index: []


Countries: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:00<00:00, 222.74it/s]
INFO:src.data.build_panel:6. Adding cross-border flows...
INFO:src.data.build_panel:7. Adding time features...
INFO:src.data.build_panel:8. Data quality checks...
INFO:src.data.build_panel:  Total rows: 35,040
INFO:src.data.build_panel:  Date range: 2022-01-01 00:00:00+00:00 to 2023-12-31 23:00:00+00:00
INFO:src.data.build_panel:  Countries: ['ES', 'PT']
INFO:src.data.build_panel:  Missing prices: 35,040 (100.0%)
INFO:src.data.build_panel:Saved to: C:\Users\a7654\OneDrive\Ambiente de Trabalho\STUDY\projects\projects\mibel-intelligence\data\processed\main_panel_2022-01-01_2023-12-31.parquet
INFO:src.data.build_panel:PANEL CONSTRUCTION COMPLETE



‚úÖ Panel complete!
   Total rows: 35,040
   Date range: 2022-01-01 00:00:00+00:00 to 2023-12-31 23:00:00+00:00
   Countries: ['ES', 'PT']
   Missing prices: 35040 / 35040 (100.00%)

üìä Period breakdown:
   Pre-exception: 7,920 rows
   During exception: 27,120 rows


In [2]:
# Cell 1: Load 2-year panel
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Plotting setup
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

plt.rcParams['figure.figsize'] = (14, 6)
plt.rcParams['font.size'] = 11

PROCESSED_DIR = Path("../data/processed")

# Load the 2-year panel
panel_file = list(PROCESSED_DIR.glob("main_panel_2022-01-01_2023-12-31.parquet"))[0]
df = pd.read_parquet(panel_file)

print(f"‚úÖ Loaded 2-year panel: {len(df):,} rows")
print(f"üìÖ Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"üåç Countries: {sorted(df['country'].unique())}")

# Create period splits
exception_start = pd.Timestamp('2022-06-15', tz='UTC')

pre_exception = df[~df['is_iberian_exception']].copy()
during_exception = df[df['is_iberian_exception']].copy()

print(f"\nüìä Period breakdown:")
print(f"   Pre-exception (Jan-Jun 14, 2022): {len(pre_exception):,} rows")
print(f"   During exception (Jun 15, 2022 - Dec 31, 2023): {len(during_exception):,} rows")

# Create country subsets
spain = df[df['country'] == 'ES'].sort_values('timestamp').copy()
portugal = df[df['country'] == 'PT'].sort_values('timestamp').copy()

spain_pre = spain[~spain['is_iberian_exception']].copy()
spain_during = spain[spain['is_iberian_exception']].copy()

print(f"\n‚úÖ Data loaded and segmented")

IndexError: list index out of range