In [None]:
import pandas as pd
from glob import glob
import pathlib
import json

In [None]:
path_to_bdgp_data = "../../../building-data-genome-project-2/data/"

In [None]:
files_weather = glob(path_to_bdgp_data + "weather/weather.csv")
files_weather

In [None]:
df_weather = pd.read_csv(files_weather[0], index_col=0)
list_iso = []
for datetime in df_weather.index:
    list_iso.append(pd.Timestamp(datetime).isoformat())
df_weather.index = list_iso
df_weather

In [None]:
files_meter = glob(path_to_bdgp_data + "meters/raw//*.csv")
files_meter

In [None]:
list_meters = []
for file in files_meter:
    list_meters.append(file.rsplit("\\",1)[1].split(".")[0])
    
list_meters

In [None]:
dict_region_bldgname = {}

for file in files_meter:
    df_meter = pd.read_csv(file, index_col=0)
    
    for region_bldgtype_bldgname in df_meter.columns:
                
        region = region_bldgtype_bldgname.split("_")[0]
        bldgtype_bldgname = region_bldgtype_bldgname.split("_", 1)[1]
        bldgname = region_bldgtype_bldgname.split("_")[2]
        
        if region in dict_region_bldgname.keys():    
            dict_region_bldgname[region].append(bldgtype_bldgname)
        else:
            dict_region_bldgname[region] = [bldgtype_bldgname]
            
for region in dict_region_bldgname.keys():
       
    dict_region_bldgname[region] = pd.DataFrame(dict_region_bldgname[region]).drop_duplicates().values.flatten().tolist()
    
dict_region_bldgname

In [None]:
for region in dict_region_bldgname.keys():
    
    print("### proceeing: {}".format(region))
    
    for bldgname in dict_region_bldgname[region]:
        
        path_to_save_processed_files = "../../data/BDGP2/".format(bldgname)
        pathlib.Path(path_to_save_processed_files).mkdir(parents=True, exist_ok=True)
        
        datetime_start = pd.Timestamp(year=2200, month=1, day=1, hour=0)
        datetime_end = pd.Timestamp(year=1900, month=1, day=1, hour=0)
        
        #################################################################
        # process predictors
        #################################################################

        df_predictors = pd.DataFrame()

        df_weather_filtered = df_weather.loc[df_weather["site_id"]==region, :].sort_index().copy()
        df_weather_filtered = df_weather_filtered.loc[:,df_weather_filtered.columns!="site_id"].copy()

        df_predictors = df_predictors.join(df_weather_filtered, how="outer")
        df_predictors.index.names = ['Timestamp']
        df_predictors.to_csv(path_to_save_processed_files+"{}_Predictors.csv".format(bldgname))  
        
        if pd.Timestamp(df_predictors.index[0]) < datetime_start:
            datetime_start = pd.Timestamp(df_predictors.index[0])
            
        if pd.Timestamp(df_predictors.index[-1]) > datetime_end:
            datetime_end = pd.Timestamp(df_predictors.index[-1])
        
        #################################################################
        # process targets
        #################################################################
        
        df_targets = pd.DataFrame()
        
        for meter in list_meters:
            
            df = pd.read_csv(path_to_bdgp_data + "meters/raw/{}.csv".format(meter), index_col=0)
            df_bldg = df.loc[:, df.columns=="{}_{}".format(region, bldgname)]
            list_iso = []
            for datetime in df_bldg.index:
                list_iso.append(pd.Timestamp(datetime).isoformat())
            df_bldg.index = list_iso
            
            if df_bldg.shape[1] != 0:
                df_bldg.columns = [meter]
                df_targets = df_targets.join(df_bldg, how="outer")
                
        df_targets.index.names = ['Timestamp']
        df_targets.to_csv(path_to_save_processed_files+"{}_Targets.csv".format(bldgname))
        
        if pd.Timestamp(df_targets.index[0]) < datetime_start:
            datetime_start = pd.Timestamp(df_targets.index[0])
            
        if pd.Timestamp(df_targets.index[-1]) > datetime_end:
            datetime_end = pd.Timestamp(df_targets.index[-1])
            
        #################################################################
        # process configs
        #################################################################
            
        configs = {}
                
        # ---------------------------------------------------------------
        configs["dates"] = {}
        configs["dates"]["start"] = "t:"+datetime_start.isoformat()+"-00:00"
        configs["dates"]["end"] = "t:"+datetime_end.isoformat()+"-00:00"
        
        # ---------------------------------------------------------------
        configs["predictors"] = []
        list_predictors = glob(path_to_save_processed_files + "*_Predictors.csv")
        if len(list_predictors)==1:
            df_predictors = pd.read_csv(list_predictors[0], index_col=0)
            
            for predictor in df_predictors.columns:
                
                content = {}
                content["id"] = predictor
                content["dis"] = predictor
                content["column"] = predictor
                content["unit"] = "na"      
                configs["predictors"].append(content)
                
        # ---------------------------------------------------------------
        configs["targets"] = []
        list_targets = glob(path_to_save_processed_files + "*_Targets.csv")
        if len(list_targets)==1:
            df_targets = pd.read_csv(list_targets[0], index_col=0)
            
            for target in df_targets.columns:
                
                content = {}
                content["id"] = target
                content["dis"] = target
                content["column"] = target
                content["unit"] = "na"      
                configs["targets"].append(content)
                
        # ---------------------------------------------------------------
        configs["files"] = []
        
        content_p = {}
        content_p["filename"] = list_predictors[0].rsplit("\\", 1)[1]
        content_p["contentType"] = "predictors"
        content_p["start"] = "t:"+ pd.Timestamp(pd.read_csv(list_predictors[0], index_col=0).index[0]).isoformat() +"-00:00"
        content_p["end"] = "t:"+ pd.Timestamp(pd.read_csv(list_predictors[0], index_col=0).index[-1]).isoformat() +"-00:00"
        configs["files"].append(content_p)
        
        content_t = {}
        content_t["filename"] = list_targets[0].rsplit("\\", 1)[1]
        content_t["contentType"] = "targets"
        content_t["start"] = "t:"+ pd.Timestamp(pd.read_csv(list_targets[0], index_col=0).index[0]).isoformat() +"-00:00"
        content_t["end"] = "t:"+ pd.Timestamp(pd.read_csv(list_targets[0], index_col=0).index[-1]).isoformat() +"-00:00"
        configs["files"].append(content_t)
                
        with open(path_to_save_processed_files+"{} Config.json".format(bldgname), "w") as fp:
            json.dump(configs, fp, ensure_ascii=False)
        
#         break
        
#     break
