In [2]:
import os
# Change native directory to root
os.chdir(os.path.dirname(os.getcwd()))

In [3]:
import glob
import pandas as pd
import numpy as np
import random
import pickle
from tsfeatures import tsfeatures
import matplotlib.pyplot as plt
import plotly.express as px
from src.utils.functions import validation

from tqdm import tqdm

random.seed(123)
model_dir = 'models/'

In [4]:
# Read the features from the csv file
features = pd.read_csv('data/social_features.csv' ,index_col=0).loc[:,'yearly'].to_frame()
features

Unnamed: 0,yearly
411,15870946.00
1208,338142.00
588,187320.45
1116,34916.35
279,246941.30
...,...
1660,311778.15
642,648113.40
597,3236158.00
235,195611.65


In [5]:
# read metadata csv
metadata = pd.read_csv('data/EANLIJST_METADATA.csv', index_col=0, sep   = ';')
# ADD the functietype column to the features
features['function'] = metadata['Patrimonium Functietype']
# read more metrics from csv
metrics = pd.read_csv('data/ts_metrics.csv', usecols = ['ID', 'mean', 'std'], index_col='ID')
# add the metrics to the features
features = features.join(metrics)
features.isnull().sum()
features.dropna(inplace=True)
features['ID'] = features.index
# drop rows with kast as function
features = features[features['function'] != 'Kast']
features

Unnamed: 0,yearly,function,mean,std,ID
1208,338142.00,Stadhuis/Gemeentehuis,9.313439,3.516002,1208
588,187320.45,Academie,4.929265,4.023414,588
1116,34916.35,Cultureel centrum,0.675075,1.535042,1116
144,167898.77,Lagere school,4.773989,4.691106,144
510,32513.85,Andere gebouwen,1.146714,3.083926,510
...,...,...,...,...,...
56,136389.35,Ontmoetingscentrum,4.722174,3.758550,56
13,373862.80,Administratief centrum,10.530915,5.580256,13
1660,311778.15,Cultureel centrum,7.875420,4.729058,1660
235,195611.65,Werkplaats,5.620527,4.201984,235


In [6]:
cl_A = ['Sporthal', 'Sportcomplex', 'Stadion']
cl_B = ['Administratief centrum', 'Stadhuis/Gemeentehuis', 'OCMW Administratief centrum']
cl_C = ['Lagere school', 'School', 'Kinderdagverblijf/BKO/IBO', 'Algemene middelbare school', 'Technische middelbare school', 'Buitengewoon lager onderwijs (MPI)', 'Buitengewoon middelbaar onderwijs (BUSO)', 'Kleuterschool']
cl_D = ['Andere gebouwen', 'Kast', 'Kerk', 'Straatverlichting', 'Laadeiland', 'Park', 'Pomp', 'Voetbalveld', 'Andere', 'Andere terreinen', 'Containerpark', 'Fontein', 'Looppiste', 'Parking', 'Ziekenhuis']
cl_E = ['Cultureel centrum', 'Ontmoetingscentrum', 'Bibliotheek', 'Academie', 'Museum', 'Jeugdhuis']
cl_G = ['RVT/WZC/revalidatiecentrum', 'Dienstencentrum/CAW/dagverblijf']
cl_H = ['Werkplaats']
cl_I = ['Zwembad']
cl_K = ['Brandweerkazerne', 'Politiegebouw']
cl_F = ['OCMW Woningen']
# list of all lists
clusters = [cl_A, cl_B, cl_C, cl_D, cl_E, cl_G, cl_H, cl_I, cl_K, cl_F]

In [10]:
st_p = pd.DataFrame()
names = ['A', 'B', 'C', 'D', 'E', 'G', 'H', 'I', 'K', 'F']
# Create stanard profiles for each cluster
for k, clust in enumerate(clusters):
    for i, ID in enumerate(features[features['function'].isin(clust)].ID):
        #print(ID)
        if i == 0:
            agg = pd.read_csv('./data/buildings/' + str(ID) + '.csv', index_col=0, usecols=['ds','Power'], parse_dates=['ds'])
            agg = agg / agg['2019'].sum()
        else:
            agg[ID] = pd.read_csv('./data/buildings/' + str(ID) + '.csv', index_col=0, usecols=['ds','Power'], parse_dates=['ds'])
            agg[ID] = agg[ID] / agg['2019'][ID].sum()
    # Create an average profile over columns
    agg = agg.mean(axis=1)
    st_p[names[k]] = agg

In [11]:
st_p.to_csv('data/Kris_profiles.csv')

In [13]:
st_p = pd.read_csv('data/Kris_profiles.csv', index_col=0, parse_dates=['ds'])
st_p

Unnamed: 0_level_0,A,B,C,D,E,G,H,I,K,F
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-01 00:15:00,0.000016,0.000022,0.000016,0.000040,0.000025,0.000021,0.000022,0.000022,0.000026,0.000022
2019-01-01 00:30:00,0.000016,0.000022,0.000017,0.000041,0.000025,0.000021,0.000021,0.000022,0.000027,0.000021
2019-01-01 00:45:00,0.000015,0.000022,0.000017,0.000038,0.000024,0.000021,0.000022,0.000022,0.000027,0.000021
2019-01-01 01:00:00,0.000016,0.000022,0.000017,0.000037,0.000024,0.000021,0.000022,0.000022,0.000027,0.000020
2019-01-01 01:15:00,0.000016,0.000022,0.000016,0.000038,0.000024,0.000021,0.000022,0.000022,0.000026,0.000019
...,...,...,...,...,...,...,...,...,...,...
2021-12-31 23:00:00,0.000013,0.000020,0.000017,inf,0.000016,0.000021,0.000021,0.000020,0.000021,0.000026
2021-12-31 23:15:00,0.000013,0.000019,0.000015,0.000029,0.000016,0.000020,0.000022,0.000019,0.000021,0.000022
2021-12-31 23:30:00,0.000012,0.000019,0.000015,inf,0.000016,0.000020,0.000022,0.000019,0.000021,0.000020
2021-12-31 23:45:00,0.000012,0.000019,0.000014,inf,0.000016,0.000020,0.000022,0.000019,0.000021,0.000019


In [14]:
subset = pd.read_csv('data/social_features_test.csv', index_col=0).loc[:,'yearly'].to_frame()
# read metadata csv
metadata = pd.read_csv('data/EANLIJST_METADATA.csv', index_col=0, sep   = ';')
# ADD the functietype column to the features
subset['function'] = metadata['Patrimonium Functietype']
# read more metrics from csv
metrics = pd.read_csv('data/ts_metrics.csv', usecols = ['ID', 'mean', 'std'], index_col='ID')
# add the metrics to the features
subset = subset.join(metrics)
subset.isnull().sum()
subset.dropna(inplace=True)
subset['ID'] = subset.index
# drop rows with kast as function
subset = subset[subset['function'] != 'Kast']
subset

Unnamed: 0,yearly,function,mean,std,ID
666,308362.90,Andere gebouwen,5.563986,17.810517,666
399,100270.95,School,2.888369,3.724561,399
1544,496847.76,Stadhuis/Gemeentehuis,13.306562,8.142373,1544
1655,43067.00,Lagere school,1.054809,1.796610,1655
844,411595.23,School,10.690519,9.587791,844
...,...,...,...,...,...
320,2473361.00,RVT/WZC/revalidatiecentrum,70.525623,35.880629,320
944,905537.58,Bibliotheek,22.370093,18.574790,944
444,25718.20,Werkplaats,0.896637,1.061082,444
778,995995.20,Stadion,22.055094,21.576259,778


In [107]:
mae = {}
rmse = {}
smape = {}
for ID in subset.index:
    func = subset.loc[ID, 'function']
    ts = pd.read_csv('./data/buildings/{}.csv'.format(ID), usecols=['Power', 'ds'], index_col='ds')
    ts_syn =  st_p[names[get_index(func, clusters)]].copy() * subset.loc[ID, 'yearly']
    ts_syn = ts_syn.to_frame()
    ts.index = pd.to_datetime(ts.index)
    ts_syn.index = pd.to_datetime(ts_syn.index)
    # drop inf values from ts_syn
    ts_syn = ts_syn[ts_syn < np.inf]
    # drop the nan values
    ts_syn = ts_syn.dropna()
    # keep the same index in ts as ts_syn
    ts = ts.loc[ts_syn.index]
    mae[ID] = validation(ts.values, ts_syn.values, 'MAE')
    rmse[ID] = validation(ts.values, ts_syn.values, 'RMSE')
    smape[ID] = validation(ts.values, ts_syn.values, 'SMAPE')
# make a dataframe with 3 dictionaries as columns
temp_df = pd.DataFrame({'MAE': mae, 'RMSE': rmse, 'SMAPE': smape})
temp_df.to_csv('./results/Kris.csv')

In [109]:
temp_df.mean()

MAE       6.655499
RMSE     20.860885
SMAPE    26.659543
dtype: float64

In [41]:
# get index of the list in the list of lists where the function is in
def get_index(func, clusters):
    for i, clust in enumerate(clusters):
        if func in clust:
            return i
    return -1