In [None]:
import numpy as np

def calculate_rmse(df1, col1, df2, col2):
    # Get the values from the specified columns
    values1 = df1[col1].values
    values2 = df2[col2].values
    
    # Calculate the squared differences
    squared_diff = (values1 - values2) ** 2
    
    # Calculate the mean squared error
    mse = np.mean(squared_diff)
    
    # Calculate the root mean squared error
    rmse = np.sqrt(mse)
    
    return rmse


In [7]:
import numpy as np 
import pandas as pd
import lxml.etree as ET 
def extract_energy_demand_for_analysis(gml_path):
    """
    Extracts 'EnergyDemand' data along with 'energy:values' and their 'uom' from a GML file,
    converting the values into a format suitable for analysis (e.g., numpy array).

    Parameters:
    gml_path (str): Path to the GML file.

    Returns:
    pandas.DataFrame: DataFrame containing the 'EnergyDemand' data along with values and units of measure.
    """

    # Parse the GML file
    tree = ET.parse(gml_path)
    root = tree.getroot()

    # Define the namespace map to simplify finding elements
    namespaces = {k if k is not None else 'default': v for k, v in root.nsmap.items()}

    # Find all 'EnergyDemand' elements
    energy_demand_elements = tree.xpath('//energy:EnergyDemand', namespaces=namespaces)

    # Extract relevant data from these elements
    data = []
    for elem in energy_demand_elements:
        energy_demand_data = {'gml_id': elem.get('{http://www.opengis.net/gml}id')}
        for child in elem.iterdescendants():
            if child.tag.endswith('acquisitionMethod'):
                energy_demand_data['acquisition_method'] = child.text
            elif child.tag.endswith('interpolationType'):
                energy_demand_data['interpolation_type'] = child.text
            elif child.tag.endswith('source'):
                energy_demand_data['source'] = child.text
            elif child.tag.endswith('thematicDescription'):
                energy_demand_data['thematic_description'] = child.text
            elif child.tag.endswith('beginPosition'):
                energy_demand_data['begin_position'] = child.text
            elif child.tag.endswith('endPosition'):
                energy_demand_data['end_position'] = child.text
            elif child.tag.endswith('uom'):
                energy_demand_data['unit_of_measure'] = child.text
            elif child.tag.endswith('values'):
                # Splitting the values string and converting to a numpy array of floats
                values = np.array(child.text.split(), dtype=float)
                energy_demand_data['energy_values'] = values

        data.append(energy_demand_data)

    # Convert the extracted data to a pandas DataFrame
    df = pd.DataFrame(data)
    return df

In [8]:
fzk_demand = extract_energy_demand_for_analysis(r'C:\Users\felix\Programmieren\tecdm\data\examples\FZKHouseLoD3-ADE-results.gml')

In [10]:
fzk_demand.head()

Unnamed: 0,gml_id,acquisition_method,interpolation_type,source,thematic_description,begin_position,end_position,energy_values
0,GML_9e2120cd-89d5-4a65-b0dc-f6976f79df51,simulation,averageInSucceedingInterval,EnergyPlus,Heating energy,2022-01-01T00:00:00,2022-12-31T23:00:00,"[5.9217373997837095, 6.048223325349117, 5.8854..."
1,GML_fcf26a15-2992-40d8-b1bf-3d0f650c8f32,simulation,averageInSucceedingInterval,EnergyPlus,Cooling energy,2022-01-01T00:00:00,2022-12-31T23:00:00,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [24]:
heating_demand = fzk_demand[fzk_demand['thematic_description'] == 'Heating energy']["energy_values"].to_numpy()

In [29]:
len(heating_demand[0])

8760

In [34]:
data_fzk = np.loadtxt(r'C:\Users\felix\Programmieren\tecdm\src\districtgenerator\results\demands\heat_SFH_1_0.csv')
data_fzk 

array([3059.64, 3240.23, 3365.42, ..., 5134.33, 5202.11, 5206.83])

In [35]:
import numpy as np

def calculate_rmse(array1, array2):
    """
    Calculate the Root Mean Square Error (RMSE) between two NumPy arrays.

    Parameters:
    array1 (numpy.ndarray): First array.
    array2 (numpy.ndarray): Second array.

    Returns:
    float: The RMSE value.
    """
    # Ensure that the input arrays are NumPy arrays
    array1 = np.array(array1)
    array2 = np.array(array2)

    # Check if the arrays have the same shape
    if array1.shape != array2.shape:
        raise ValueError("The input arrays must have the same shape.")

    # Calculate the Mean Squared Error (MSE)
    mse = np.mean((array1 - array2) ** 2)

    # Calculate the Root Mean Square Error (RMSE)
    rmse = np.sqrt(mse)

    return rmse
