In [38]:
import os
import json
import pandas as pd
from datetime import datetime
from typing import List, Dict, Tuple

In [39]:
# Load configuration from config.json
with open('../models/config.json', 'r') as f:
    config = json.load(f)

postprocess = config['postprocessing']

# Output and input paths from configuration
OUTPUT_PATH = postprocess['output_path']
INPUT_PATH_S = postprocess['input_path_s']
INPUT_PATH_D = postprocess['input_path_d']


In [40]:
def get_deterministic_filename(start_date: str, end_date: str, period: str) -> str:
    """
    Generate the deterministic filename based on the provided dates and period.
    """
    filename = f"d_weighted_{start_date}_to_{end_date}_{period}_ts.csv"
    return filename

def get_stochastic_filename(start_date: str, end_date: str, period: str, scenario: int) -> str:
    """
    Generate the stochastic filename based on the provided dates, period, and scenario number.
    """
    filename = f"s_weighted_{start_date}_to_{end_date}_{period}_{scenario}_ts.csv"
    return filename

def load_csv_data(filepath: str) -> pd.DataFrame:
    """
    Load CSV data from the given file path.
    """
    try:
        data = pd.read_csv(filepath)
        return data
    except FileNotFoundError:
        print(f"File not found: {filepath}")
        return pd.DataFrame()  # Return empty DataFrame if file not found

def load_deterministic_data(date_str: str, period: str, input_path: str) -> pd.DataFrame:
    """
    Load deterministic data for a specific date and period.
    """
    filename = get_deterministic_filename(date_str, date_str, period)
    filepath = os.path.join(input_path, filename)
    data = load_csv_data(filepath)
    return data

def load_stochastic_data(date_str: str, period: str, scenario: int, input_path: str) -> pd.DataFrame:
    """
    Load stochastic data for a specific date, period, and scenario.
    """
    filename = get_stochastic_filename(date_str, date_str, period, scenario)
    filepath = os.path.join(input_path, filename)
    data = load_csv_data(filepath)
    return data

def load_data_for_dates(dates: List[datetime], period: str, input_path_d: str, input_path_s: str, num_scenarios: int = 10) -> Tuple[Dict[str, pd.DataFrame], Dict[str, Dict[int, pd.DataFrame]]]:
    """
    Load deterministic and stochastic data for specified dates.
    
    Returns:
        - Dictionary of deterministic data indexed by date string.
        - Nested dictionary of stochastic data indexed by date string and scenario number.
    """
    deterministic_data = {}
    stochastic_data = {}

    for date in dates:
        # Convert date to string in 'YYYYMMDD' format
        date_str = date.strftime('%Y%m%d')
        
        # Load deterministic data
        det_data = load_deterministic_data(date_str, period, input_path_d)
        deterministic_data[date_str] = det_data
        
        # Load stochastic data for each scenario
        stochastic_data[date_str] = {}
        for scenario in range(1, num_scenarios + 1):
            scen_name = f"Scenario{scenario}"
            sto_data = load_stochastic_data(date_str, period, scen_name, input_path_s)
            stochastic_data[date_str][scenario] = sto_data

    return deterministic_data, stochastic_data


In [41]:
# Specify the dates you want to process
dates_to_process = [
    datetime(2023, 3, 3),
    datetime(2023, 6, 26),
    # Add more dates as needed
]

# Specify the period ('day', 'week', etc.)
period = 'day'

# Load the data
deterministic_data, stochastic_data = load_data_for_dates(
    dates=dates_to_process,
    period=period,
    input_path_d=INPUT_PATH_D,
    input_path_s=INPUT_PATH_S,
    num_scenarios=10
)

In [42]:
# Choose the date to analyze
analysis_date = '20230303'  # Format: 'YYYYMMDD'


# Retrieve deterministic data for the chosen date
det_data = deterministic_data.get(analysis_date)

# Initialize a list to collect DataFrames
difference_list = []

if det_data is not None and not det_data.empty:
    # Extract the deterministic 'heat_demand' column
    det_heat_demand = det_data['heat_demand'].reset_index(drop=True)
    det_heat_demand.name = 'deterministic_heat_demand'
    
    # Initialize a DataFrame with time steps and deterministic 'heat_demand'
    comparison_df = pd.DataFrame({
        't': det_data['t'],
        'deterministic_heat_demand': det_heat_demand
    })
    
    # Loop through each scenario
    for scenario in range(1, 11):
        sto_data = stochastic_data[analysis_date].get(scenario)
        
        if sto_data is not None and not sto_data.empty:
            # Extract the 'heat_demand_scenario' column
            scenario_col_name = f'Scenario{scenario}.heat_demand_scenario'
            sto_heat_demand = sto_data[scenario_col_name].reset_index(drop=True)
            sto_heat_demand.name = f'scenario{scenario}_heat_demand_scenario'
            
            # Compute the difference
            difference = det_heat_demand - sto_heat_demand
            difference.name = f'scenario{scenario}_difference'
            
            # Add the scenario 'heat_demand_scenario' and difference to the comparison DataFrame
            comparison_df[f'scenario{scenario}_heat_demand_scenario'] = sto_heat_demand
            comparison_df[f'scenario{scenario}_difference'] = difference
        else:
            print(f"No stochastic data available for date {analysis_date}, Scenario {scenario}.")
    
    # Display the combined DataFrame
    display(comparison_df.head())
else:
    print(f"No deterministic data available for date {analysis_date}.")

Unnamed: 0,t,deterministic_heat_demand,scenario1_heat_demand_scenario,scenario1_difference,scenario2_heat_demand_scenario,scenario2_difference,scenario3_heat_demand_scenario,scenario3_difference,scenario4_heat_demand_scenario,scenario4_difference,...,scenario6_heat_demand_scenario,scenario6_difference,scenario7_heat_demand_scenario,scenario7_difference,scenario8_heat_demand_scenario,scenario8_difference,scenario9_heat_demand_scenario,scenario9_difference,scenario10_heat_demand_scenario,scenario10_difference
0,1,141.657945,160.499276,-18.84133,139.05463,2.603315,139.05463,2.603315,139.05463,2.603315,...,96.165339,45.492606,139.05463,2.603315,139.05463,2.603315,117.609985,24.047961,139.05463,2.603315
1,2,146.058094,129.879358,16.178736,151.983206,-5.925112,174.087054,-28.02896,129.879358,16.178736,...,151.983206,-5.925112,151.983206,-5.925112,151.983206,-5.925112,129.879358,16.178736,174.087054,-28.02896
2,3,234.265871,199.579471,34.686401,268.257547,-33.991675,268.257547,-33.991675,233.918509,0.347363,...,233.918509,0.347363,268.257547,-33.991675,268.257547,-33.991675,199.579471,34.686401,233.918509,0.347363
3,4,377.818933,382.826476,-5.007544,382.826476,-5.007544,352.058383,25.760549,382.826476,-5.007544,...,382.826476,-5.007544,352.058383,25.760549,321.290291,56.528642,382.826476,-5.007544,382.826476,-5.007544
4,5,277.248569,271.02607,6.222499,296.647358,-19.398789,271.02607,6.222499,245.404781,31.843788,...,245.404781,31.843788,296.647358,-19.398789,271.02607,6.222499,271.02607,6.222499,296.647358,-19.398789


In [44]:
# Add a new set of columns for absolute differences
for scenario in range(1, 11):
    diff_col = f'scenario{scenario}_difference'
    abs_diff_col = f'scenario{scenario}_abs_difference'
    if diff_col in comparison_df.columns:
        # Compute absolute difference
        comparison_df[abs_diff_col] = comparison_df[diff_col].abs()
    else:
        print(f"No difference data available for Scenario {scenario}.")

# Calculate cost per hour for each scenario
for scenario in range(1, 11):
    abs_diff_col = f'scenario{scenario}_abs_difference'
    cost_col = f'scenario{scenario}_cost_per_hour'
    if abs_diff_col in comparison_df.columns:
        # Calculate cost per hour
        comparison_df[cost_col] = comparison_df[abs_diff_col] * 0.01 
    else:
        print(f"No absolute difference data available for Scenario {scenario}.")

# Initialize a dictionary to store total costs
total_costs = {}

# Sum costs over all hours for each scenario
for scenario in range(1, 11):
    cost_col = f'scenario{scenario}_cost_per_hour'
    if cost_col in comparison_df.columns:
        # Sum the costs
        total_cost = comparison_df[cost_col].sum()
        total_costs[f'Scenario {scenario}'] = total_cost
    else:
        print(f"No cost data available for Scenario {scenario}.")

total_costs

{'Scenario 1': 5.280882606575306,
 'Scenario 2': 5.674344224261326,
 'Scenario 3': 6.354211944545325,
 'Scenario 4': 4.311525142123321,
 'Scenario 5': 4.267577898609753,
 'Scenario 6': 4.026098509520683,
 'Scenario 7': 4.780717356957502,
 'Scenario 8': 5.38432227311877,
 'Scenario 9': 5.711623639642635,
 'Scenario 10': 4.095325208011705}

In [51]:
# Example of how to calculate total costs for each scenario

# Create a DataFrame to display total costs
total_costs_df = pd.DataFrame(list(total_costs.items()), columns=['Scenario', 'Cost Dispatch'])



total_costs_df['Base Cost'] = -282.0652478293156

total_costs_df['Total Cost'] = total_costs_df['Cost Dispatch'] + total_costs_df['Base Cost']
# Display the total costs DataFrame
display(total_costs_df)



Unnamed: 0,Scenario,Cost Dispatch,Base Cost,Total Cost
0,Scenario 1,5.280883,-282.065248,-276.784365
1,Scenario 2,5.674344,-282.065248,-276.390904
2,Scenario 3,6.354212,-282.065248,-275.711036
3,Scenario 4,4.311525,-282.065248,-277.753723
4,Scenario 5,4.267578,-282.065248,-277.79767
5,Scenario 6,4.026099,-282.065248,-278.039149
6,Scenario 7,4.780717,-282.065248,-277.28453
7,Scenario 8,5.384322,-282.065248,-276.680926
8,Scenario 9,5.711624,-282.065248,-276.353624
9,Scenario 10,4.095325,-282.065248,-277.969923


In [46]:
for date in dates_to_process:
    # Convert date to string in 'YYYYMMDD' format
    analysis_date = date.strftime('%Y%m%d')
    
    print(f"Processing date: {analysis_date}")
    
    # Retrieve deterministic data for the chosen date
    det_data = deterministic_data.get(analysis_date)
    
    # Check if data exists
    if det_data is not None and not det_data.empty:
        # Initialize a DataFrame with time steps and deterministic 'heat_demand'
        det_heat_demand = det_data['heat_demand'].reset_index(drop=True)
        det_heat_demand.name = 'deterministic_heat_demand'
        comparison_df = pd.DataFrame({
            't': det_data['t'],
            'deterministic_heat_demand': det_heat_demand
        })
        
        # Loop through each scenario for the date
        for scenario in range(1, 11):
            sto_data = stochastic_data[analysis_date].get(scenario)
            if sto_data is not None and not sto_data.empty:
                # Extract the 'heat_demand_scenario' column
                scenario_col_name = f'Scenario{scenario}.heat_demand_scenario'
                sto_heat_demand = sto_data[scenario_col_name].reset_index(drop=True)
                sto_heat_demand.name = f'scenario{scenario}_heat_demand_scenario'
                
                # Compute the difference
                difference = det_heat_demand - sto_heat_demand
                difference.name = f'scenario{scenario}_difference'
                
                # Add the scenario 'heat_demand_scenario' and difference to the comparison DataFrame
                comparison_df[f'scenario{scenario}_heat_demand_scenario'] = sto_heat_demand
                comparison_df[f'scenario{scenario}_difference'] = difference
            else:
                print(f"No stochastic data available for date {analysis_date}, Scenario {scenario}.")
        
        # Perform cost calculations as before
        # Step 1: Compute absolute differences
        for scenario in range(1, 11):
            diff_col = f'scenario{scenario}_difference'
            abs_diff_col = f'scenario{scenario}_abs_difference'
            if diff_col in comparison_df.columns:
                comparison_df[abs_diff_col] = comparison_df[diff_col].abs()
            else:
                print(f"No difference data available for Scenario {scenario}.")
        
        # Step 2: Calculate cost per hour for each scenario
        for scenario in range(1, 11):
            abs_diff_col = f'scenario{scenario}_abs_difference'
            cost_col = f'scenario{scenario}_cost_per_hour'
            if abs_diff_col in comparison_df.columns:
                comparison_df[cost_col] = comparison_df[abs_diff_col] * 0.01
            else:
                print(f"No absolute difference data available for Scenario {scenario}.")
        
        # Step 3: Sum costs over all hours for each scenario
        total_costs = {}
        for scenario in range(1, 11):
            cost_col = f'scenario{scenario}_cost_per_hour'
            if cost_col in comparison_df.columns:
                total_cost = comparison_df[cost_col].sum()
                total_costs[f'Scenario {scenario}'] = total_cost
            else:
                print(f"No cost data available for Scenario {scenario}.")
        
        # Step 4: Display total costs
        total_costs_df = pd.DataFrame(list(total_costs.items()), columns=['Scenario', 'Total Cost'])
        print(f"Total Costs for date {analysis_date}:")
        display(total_costs_df)
    else:
        print(f"No deterministic data available for date {analysis_date}.")


Processing date: 20230303
Total Costs for date 20230303:


Unnamed: 0,Scenario,Total Cost
0,Scenario 1,3.442595
1,Scenario 2,3.784239
2,Scenario 3,3.392275
3,Scenario 4,4.06154
4,Scenario 5,4.20955
5,Scenario 6,3.544443
6,Scenario 7,3.798941
7,Scenario 8,4.023848
8,Scenario 9,3.618931
9,Scenario 10,2.943745


Processing date: 20230626
Total Costs for date 20230626:


Unnamed: 0,Scenario,Total Cost
0,Scenario 1,5.280883
1,Scenario 2,5.674344
2,Scenario 3,6.354212
3,Scenario 4,4.311525
4,Scenario 5,4.267578
5,Scenario 6,4.026099
6,Scenario 7,4.780717
7,Scenario 8,5.384322
8,Scenario 9,5.711624
9,Scenario 10,4.095325
