In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import colorcet as cc
import os
from datetime import datetime
import json
from PIL import Image

# 0. Define Input-Data

### 0.1 General Helpers

In [None]:
def print_df(df: pd.DataFrame):
    """printing data in table for easy readable in console output.

    Args:
        df (pd.DataFrame): any Dataframe for visualization
    """
    
    df.columns = df.columns.str.replace(' ', '')
    
    # Determine column width (including index)
    widths = [max(len("t"), 10)] + [max(len(str(x)), 20) for x in df.columns]

    # Center header (including “Index” as column index)
    header = " | ".join(str(col).center(width) for col, width in zip(["t"] + list(df.columns), widths))

    # Center values (including row index)
    rows = "\n".join(
    " | ".join(str(val).center(width) for val, width in zip([index] + list(row), widths))
    for index, row in zip(df.index, df.values)
    )

    # Merge and output
    print(header)
    print("-" * len(header))  # Dividing line
    print(rows)
    
    
def change_energy_units(
    values: list, 
    actual_unit: str, 
    target_unit: str):
    """Iterates through a list of values (int/float) and checks unit. If actual and target units differ it processes 
    all values into new unit and gives back new list as return value.

    Args:
        values (list): power or energy values
        actual_unit (str): actual unit of input values
        target_unit (str): target unit for values

    Returns:
        list: target values with intended target unit.
    """
    
    target_values = []
    
    # Permitted units
    kW_units = ['kW', 'kWh']
    MW_units = ['MW', 'MWh']
    GW_units = ['GW', 'GWh']
    
    for value in values: 
        if actual_unit in kW_units and target_unit in MW_units: 
            target_values.append(round((value / 1000),2))
        elif actual_unit in kW_units and target_unit in GW_units:
            target_values.append(round((value / 1000000),2))
        elif actual_unit in kW_units and target_unit in kW_units:
            target_values.append(round((value),2))
        elif actual_unit in MW_units and target_unit in GW_units:
            target_values.append(round((value / 1000),2))
        elif actual_unit in MW_units and target_unit in kW_units:
            target_values.append(round((value * 1000),2))
        elif actual_unit in MW_units and target_unit in MW_units:
            target_values.append(round((value),2))
        elif actual_unit in GW_units and target_unit in MW_units:
            target_values.append(round((value * 1000),2))
        elif actual_unit in GW_units and target_unit in kW_units:
            target_values.append(round((value * 1000000),2))
        elif actual_unit in GW_units and target_unit in GW_units:
            target_values.append(round((value),2))
        else: 
            raise KeyError("Unit not found. Check spelling or add unit in function.")
    
    # print(f"Calculation done for actual unit: {actual_unit} to target unit: {target_unit}")

    return target_values


def get_months_from_timestamp(
    timestamps: list
    ):
    """Adjust the strings contained by default for the month, 
    by deleting the year and leaving only the month as a string.

    Args:
        timestamps (list): input timestamps in list ['YYYY-MM', ...]

    Returns:
        list: converted list with months as strings
    """
    months = []
    for timestamp in timestamps:
        year = timestamp[:4]
        months.append(timestamp.replace(f'{year}-', ''))
        
    return months


def get_weeks_from_timestamp(
    timestamps: list
    ):
    """generates weeks from input-dates by cutting string and find out KW. Result is a list of Strings. 
    Useful for plotting diagrams based on weeks and not months.

    Args:
        timestamps (list): list of timestamps in format: ['YYYY-MM-DD/YYYY-MM-DD', ...] if weeks are set as granularity.

    Returns:
        list: list with strings of weeknumber [01, 02, 03, ...] based on startdate (suitbale for x axis)
    """
    weeks = []
    for timestamp in timestamps:
        start_str = timestamp.split('/')[0]  # note only the start value before the '/'
        
        # Convert start date to datetime
        start_date = datetime.strptime(start_str, "%Y-%m-%d")
        
        # Retrieve calendar week
        start_week = start_date.isocalendar()[1]
        weeks.append(start_week)
        
    return weeks


def get_days_from_timestamp(
    timestamps: list
    ):
    """generates plottable days from input-dates by cutting string. Result is a list of Strings. 
    Useful for plotting diagrams based on days (x-axis).

    Args:
        timestamps (list): list of timestamps in format: ['YYYY-MM-DD', ...] if days are set as granularity.

    Return:
        list: list with strings of days [01, 02, 03, ...] based on startdate (suitbale for x axis)
    """
    for timestamp in timestamps:
        date = timestamp[5:] # characters from MM-DD
        days = [datetime.strptime(date, "%Y-%m-%d").strftime("%d-%m") for date in timestamps] # Change to DD-MM
        
    # print("Days from Timestamp: ", days)
        
    return days


def get_hours_from_timestamp(
    timestamps: list
    ):
    """generates hours from input-dates by cutting string. Result is a list of Strings. 
    Useful for plotting diagrams based on hours (x-axis).

    Args:
        timestamps (list): list of timestamps in format: ['YYYY-MM-DD HH:MM', ...] if hours are set as granularity.

    Returns:
        list: list with strings of hours. New day gets date additionally (suitbale for x axis).
    """

    hours = []  # Liste zur Speicherung der formatierten Timestamps

    for timestamp in timestamps:
        dt = datetime.strptime(timestamp, "%Y-%m-%d %H:%M")  # String zu datetime-Objekt konvertieren
        formatted_time = dt.strftime("%d-%m %H:%M")  # Immer "DD-MM HH:MM" verwenden
        hours.append(formatted_time)  # Speichern des formatierten Timestamps

    # print("Hours from Timestamp:" , hours)
    
    return hours

### 0.2 Load data from CSV Output:

In [None]:
def load_csv_results_in_df(
    input_path: str
    ):
    """Load output csv in dataframe.

    Args:
        input_path (str): path from which csv is loaded. 

    Returns:
        pd.DataFrame: csv data in dataframe as table type.
    """
    
    input_values_df = pd.read_csv(
        input_path, 
        index_col=0
        )
    
    return input_values_df

# 1. Functions for data processing from csv

In [None]:
def add_timestamp_and_filter(
    input_df = pd.DataFrame, 
    start_date = str, 
    end_date = str, 
    time_column = 'date'
    ):
    """Insert Dates (YYYY-MM-DD) in DataFrame and filter after given start and end date.
    Assumption: 8784 values (366 days) beginning at 01.01.2028. If necessary needs to be 
    flexibilized.

    Args:
        df (pd.DataFrame, optional): Input Dataframe from results. Defaults to pd.DataFrame.
        start_date (str, optional): first date for filtering. Defaults to str.
        end_date (str, optional): end date for filtering. It's always inkluded in time. Defaults to str.
        time_column (str, optional): name of column with time steps. Default value 'date'.

    Returns:
        pd.DataFrame: added colomn with dates and filtered Dataframe after time span.
    """
    # Create List of dates and hours belonging to timestep:
    dates = list(pd.date_range('2028-01-01', periods=8784, freq='H')) 
    input_df.insert(0, time_column, dates, allow_duplicates=False)
    input_df[time_column] = pd.to_datetime(input_df[time_column])
    # print_df(input_df)
    # Delete blank spaces of colomn names:
    input_df.columns = input_df.columns.str.replace(' ', '')
    # print("Inserted date columns: /n", print_df(input_df)) # show assigned dates and hours
    # Ensure that end_date covers the entire last day:
    end_date = pd.Timestamp(end_date) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
    # Filter the DataFrame based on the corrected time range:
    return input_df[
        (input_df[time_column] >= pd.Timestamp(start_date)) & 
        (input_df[time_column] <= end_date)
    ]
    
def extract_data_to_dict(
    input_values_df: pd.DataFrame,
    keys_from_output: list,
    pathes: list, 
    type_of_power: str
):
    # extract values from input csv: 
    for i, path in enumerate(pathes):
        
    
    
    
    # extract values from output csv: 

# 2. Functions for plot preparation (y values and x values)

In [None]:
def get_periods_for_plot(
    time_series_df: pd.DataFrame,  
    granularity: str,
    period_column = 'period'
    ):
    """Add a key with set period and time data to dictionary in a plottable format.

    Args:
        time_series_df (pd.DataFrame): Input Dataframe with filtered values and groups.
        granularity (str): granularity of time: "hours", "days", "weeks", "months", "years".
        period_column (str, optional): name of column with set period. Defaults to 'period'.

    Raises:
        KeyError: period needs to be an column name of DataFrame.

    Returns:
        list: period data for visualization in plot.
    """
    if period_column in time_series_df:
            raw_periods = time_series_df[period_column].tolist()
    else: 
        raise KeyError("period not found in time_series_df.")
    
    # print("Raw periods: ", raw_periods)
    
    if granularity == 'month':
        periods = get_months_from_timestamp(timestamps=raw_periods)
    elif granularity == 'week':
        periods = get_weeks_from_timestamp(timestamps=raw_periods)
    elif granularity == 'day':
        periods = get_days_from_timestamp(timestamps=raw_periods)
    elif granularity == 'hour':
        periods = get_hours_from_timestamp(timestamps=raw_periods)
    else: periods = raw_periods
    
    return periods

# 3. Functions for plotting

In [None]:
# Leistung Strang durch Gesamtleistung Strang

# 1 Daten auslesen, input = Name der Spalte
# 2 csv mit Namen und Pfad connecten und max rauslesen
# 3 Berechnung Wert / Max Wert
# dict: Zeit, Werte, min und max Werte
# 4 Plotten in Diagramm in %
# y label Stunden
# x label Tage 
# flexibel angeben, welche ich darstellen will im Vergleich (Bars nebeneinander)
# Idee: Heatmap y = Stunde des Tages, X Tag im Jahr (366), Farbe = Prozentwert

# ALternativ:
# Stunde 
# 1 Daten auslesen, input = Name der Spalte
# 2 csv mit Namen und Pfad connecten und max rauslesen
# 3 Berechnung Wert / Max Wert
# 4 Plot mehrere Balken im Vergleich nebeneinander

# 7. Function for saving plots of any kind.