Cell 1: Imports and Configuration
In this cell, we load all the necessary tools. If it fails here, we know that a library is missing and needs to be installed.

In [1]:
# --- CELL 1: IMPORTS AND CONFIGURATION ---
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import errno
import plotly.io as pio

# --- ESTA ES LA CLAVE ---
# Le dice a Plotly que pinte el gráfico en formato iframe compatible con Jupyter clásico
pio.renderers.default = 'iframe' 

print("Libraries imported successfully.")

Libraries imported successfully.


Cell 2: Helper Functions
Here we define your function to create folders. By separating it, it doesn't clutter the main analysis logic.

In [2]:
# --- CELL 2: UTILITY FUNCTIONS ---

def create_dirs_recursively(directory_path):
    """
    Recursively creates directories based on the file path.
    Avoids errors if the folder already exists.
    """
    directory_path = os.path.normpath(directory_path.strip())

    if not directory_path:
        return

    # Separate filename and parent directory
    parent_dir, basename = os.path.split(directory_path)
    is_likely_file = '.' in basename and basename != '.' and basename != '..'
    target_dir_to_ensure = parent_dir if is_likely_file and parent_dir else directory_path

    if not target_dir_to_ensure or os.path.exists(target_dir_to_ensure):
        return 

    # Recursive call
    grandparent_dir = os.path.dirname(target_dir_to_ensure)
    if grandparent_dir and grandparent_dir != target_dir_to_ensure:
        create_dirs_recursively(grandparent_dir)

    try:
        os.mkdir(target_dir_to_ensure)
        print(f"Directory created: '{target_dir_to_ensure}'")
    except OSError as e:
        if e.errno != errno.EEXIST:
            print(f"Error creating directory: {e}")
            raise

print("Functions defined.")

Functions defined.


Cell 3: Data Loading and Preprocessing
This is the critical part of Data Science. We load the CSV, convert dates, and filter. Separating this allows us to verify that the data is correct before attempting to plot it.

In [3]:
# --- CELL 3: LOADING AND PREPROCESSING (OPTIMIZED) ---

# 1. Define file path
file_path = 'train_256063.csv' 

try:
    # 2. Load the CSV
    df_train = pd.read_csv(file_path)
    print("File loaded successfully.")

    # --- AUTO-FIX: NORMALIZE COLUMNS ---
    # This converts "Timestamp", "Demand", "PV" -> "timestamp", "demand", "pv"
    # This solves your KeyError permanently.
    df_train.columns = df_train.columns.str.lower()
    print("Columns found (normalized):", df_train.columns.tolist())

    # 3. Detect time column (now safe to assume lowercase)
    time_col = 'timestamp' 
    if 'time' in df_train.columns: time_col = 'time'
    if 'date' in df_train.columns: time_col = 'date'
    
    # 4. Convert and Set Index
    df_train[time_col] = pd.to_datetime(df_train[time_col])
    df_train.set_index(time_col, inplace=True)

    # 5. Select the first 3 days
    start_date = df_train.index.min()
    end_date = start_date + pd.Timedelta(days=3) - pd.Timedelta(seconds=1)
    df_subset = df_train.loc[start_date:end_date].copy()

    # Verification
    print(f"Rows selected: {len(df_subset)}")
    display(df_subset.head(3))

except FileNotFoundError:
    print(f"ERROR: File '{file_path}' not found. Check the file name.")
except KeyError as e:
    print(f"ERROR: Column issue. Details: {e}")

File loaded successfully.
Columns found (normalized): ['timestamp', 'pv_mod1', 'pv_mod2', 'pv_mod3', 'demand', 'pv', 'price', 'temperature', 'pressure (hpa)', 'cloud_cover (%)', 'cloud_cover_low (%)', 'cloud_cover_mid (%)', 'cloud_cover_high (%)', 'wind_speed_10m (km/h)', 'shortwave_radiation (w/m²)', 'direct_radiation (w/m²)', 'diffuse_radiation (w/m²)', 'direct_normal_irradiance (w/m²)']
Rows selected: 72


Unnamed: 0_level_0,pv_mod1,pv_mod2,pv_mod3,demand,pv,price,temperature,pressure (hpa),cloud_cover (%),cloud_cover_low (%),cloud_cover_mid (%),cloud_cover_high (%),wind_speed_10m (km/h),shortwave_radiation (w/m²),direct_radiation (w/m²),diffuse_radiation (w/m²),direct_normal_irradiance (w/m²)
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2013-07-01 00:00:00+00:00,0.0,0.0,0.0,0.27,0.0,0.01605,13.5,1011.3,4,0,0,3,10.5,,0,0,0.0
2013-07-01 01:00:00+00:00,0.0,0.0,0.0,0.23,0.0,0.00095,13.2,1010.8,27,1,2,23,11.9,0.0,0,0,0.0
2013-07-01 02:00:00+00:00,0.0,0.0,0.0,0.26,0.0,0.0006,13.1,1010.3,33,0,0,32,11.6,0.0,0,0,0.0


Cell 4: Visualization and Saving
Finally, we generate the plot. Since the data is already ready in df_subset, this cell only handles drawing and saving.

In [4]:
# --- CELL 4: VISUALIZATION ---

# 1. Create figure with 3 vertical subplots
fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                    subplot_titles=('Demand (kW)', 'Price (EUR/kWh)', 'PV Generation (kW)'))

# 2. Add DEMAND trace
# Note: I use 'demand' in lowercase as it is standard; if you get an error, change it to 'Demand'
fig.add_trace(go.Scatter(x=df_subset.index, y=df_subset['demand'],
                         mode='lines', name='Demand', line=dict(color='blue')),
              row=1, col=1)

# 3. Add PRICE trace
fig.add_trace(go.Scatter(x=df_subset.index, y=df_subset['price'],
                         mode='lines', name='Price', line=dict(color='red')),
              row=2, col=1)

# 4. Add SOLAR GENERATION (PV) trace
fig.add_trace(go.Scatter(x=df_subset.index, y=df_subset['pv'],
                         mode='lines', name='PV Generation', line=dict(color='green')),
              row=3, col=1)

# 5. Style the plot
fig.update_layout(title_text='Energy Overview: First 3 Days',
                  height=800,
                  hovermode='x unified') # Shows all values when hovering over a specific hour

# Y-axis labels
fig.update_yaxes(title_text='kW', row=1, col=1)
fig.update_yaxes(title_text='EUR', row=2, col=1)
fig.update_yaxes(title_text='kW', row=3, col=1)
fig.update_xaxes(title_text='Date and Time', row=3, col=1)

# 6. Save result
output_file = 'results/task1/task1_plotly_visualization.html'
create_dirs_recursively(output_file)
fig.write_html(output_file)

print(f"Interactive plot saved to: {output_file}")

# Show plot in the Notebook
fig.show()

Interactive plot saved to: results/task1/task1_plotly_visualization.html
