In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

from pathlib import Path
import plotly.express as px
import numpy as np
import pandas as pd


import src.data_loading as data_loading
import src.make_features as make_features

In [16]:
RAW_DATA_FOLDER = Path("../data/raw")

GAS_AND_HEAT_FOLDER = RAW_DATA_FOLDER.joinpath("Data gas and heat - new")

building_names = [
    '10.10-PF1',
    '10.25-PF1',
    '10.26-TF1',
    '10.28-PF1',
    '10.33-PF1',
    '10.4-TF1',
    # '10.6-PF1',
    '10.6-PF2',
    '10.9.2-PF1',
    '2.2-PF1',
    '2.3.-PF1',
    '2.7-PF1',
    '2.8.-PF1',
    '2.9-PF1',
    '22.3-PF1'
]

In [17]:
all_buildings_metadata_df = data_loading.get_building_metadata_df(data_folder=RAW_DATA_FOLDER,
                                                                  building_list=building_names)

all_buildings_diff_df = pd.read_parquet("../data/processed/all_buildings_cleaned_diff_kwh.parquet")


In [None]:
all_buildings_diff_kwh_m2_df = all_buildings_diff_df.copy()


for building in all_buildings_diff_df.columns:

    if building == "2.3.-PF1":
        building_1 = "2.3/PF1"
    elif building == "2.8.-PF1":
        building_1 = "2.8/PF1"
    elif building == "22.3-PF1":
        building_1 = "22.3/P1"
    else:
        building_1 = building.replace("-", "/")
    
    print(building_1)
    
    building_m2 = all_buildings_metadata_df[["Addr meter", "Energy reference surface (m2)"]]

    building_m2 = building_m2[building_m2["Addr meter"].apply(lambda x: building_1 == x)]["Energy reference surface (m2)"].values
    if len(building_m2) == 1:
        building_m2 = building_m2[0]
    else:
        print(f"Dropping building due to missing in xlsx {building}")
        all_buildings_diff_kwh_m2_df.drop(columns=[building], inplace=True)
        continue

    if not np.isnan(building_m2):
        print(f"Using m2 data for {building}: {building_m2}")
        all_buildings_diff_kwh_m2_df[building] = all_buildings_diff_df[building] / building_m2
    else:
        print(f"Dropping building due to missing m2 data xlsx {building}")
        all_buildings_diff_kwh_m2_df.drop(columns=[building], inplace=True)
        continue
    
    # print(building_m2["Addr meter"].str.split("/"))
    
    
    # building_m2 = all_buildings_metadata_df[]
    

In [None]:

px.bar(all_buildings_diff_kwh_m2_df.resample("YE").sum(),
       barmode="group",
       title="Monthly primary energy demand [kWh/m2]")

## Monthly behaviour

In [None]:
time_features_daily_df = make_features.compute_time_features(pd.DataFrame(index=all_buildings_diff_kwh_m2_df.resample("d").sum().index))

all_buildings_daily_df = all_buildings_diff_kwh_m2_df.resample("d").sum()
all_buildings_daily_df["year"] = time_features_daily_df["year"]
all_buildings_daily_df.index = time_features_daily_df["day_of_year"]

px.scatter(
    all_buildings_daily_df,
    facet_row="year",
    width=900,
    height=600
)

In [None]:
time_features_daily_df

## Include weather data

In [20]:
weather_df = pd.read_csv("../data/interim/temperature_features.csv", index_col=0)

weather_df.index = pd.to_datetime(weather_df.index)
weather_df = weather_df.tz_localize("CET", ambiguous="NaT", nonexistent="shift_forward")

In [29]:
all_buildings_daily_df = all_buildings_diff_kwh_m2_df.resample("d").sum()
weather_daily_df = weather_df.resample("d").mean()

all_buildings_daily_df = all_buildings_daily_df.tz_localize(None).merge(weather_daily_df.tz_localize(None), left_index=True, right_index=True)

all_buildings_daily_df = make_features.compute_time_features(all_buildings_daily_df)

In [None]:
all_buildings_daily_df

In [None]:
ID = 4

px.scatter(all_buildings_daily_df,
           x="t_mean",
           y=building_names[ID],
           color="is_weekend",
           hover_name=all_buildings_daily_df.index)

In [None]:
all_buildings_diff_kwh_m2_df