In [9]:
import pandas as pd

import DataRetriever as dr

retriever = dr.DataRetriever()

df = retriever.get_data("All-Subsystems-minute-Year2.pkl")

pd.options.mode.chained_assignment = None

In [10]:
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

In [11]:
# Creating a pd.Series with the timestamp shifted one downwards. Adding the first value twice, and excluding the last.
timestamp_plus_one = pd.concat([pd.Series(df["Timestamp"][0]), df["Timestamp"][:-1]], ignore_index=True)

# Creating a new column with the time delta in seconds.
df["Timestamp_Delta"] = (df["Timestamp"] - timestamp_plus_one).astype('timedelta64[s]')

In [12]:
df_index_split = df[df["Timestamp_Delta"] > 300].index

In [13]:
for row in df.index:
    dt = df["Timestamp"][row]
    df.at[row, "Timestamp"] = pd.Timestamp(year=dt.year, month=dt.month, day=dt.day,
                                       hour=dt.hour, minute=dt.minute, second=dt.second)

df["Timestamp"] = pd.to_datetime(df["Timestamp"])

# Function to create interpolated records based on a DataFrame

In [14]:
def interpolate_df(dataframe):
    dataframe.index = dataframe["Timestamp"]
    dataframe.drop(["Timestamp", "TimeStamp_Count", "DayOfWeek", "Timestamp_Delta"], axis=1, inplace=True)

    interpolation_records = dataframe.resample('1T') # Rækker svarende til 1 min i forskel
    interpolation_records = interpolation_records.interpolate()

    df_concat = pd.concat([dataframe, interpolation_records], axis=0)
    df_concat.sort_index(inplace=True)

    df_concat.interpolate(method="time", inplace=True)

    return df_concat.drop(index=dataframe.index.tolist()).interpolate(method="bfill")

# Handle boolean attributes

In [33]:
metadata = retriever.get_data("metadata-year2.pkl")
metadata.rename(columns={"Unnamed: 0": "Attribute"}, inplace=True)

In [34]:
boolean_attributes = metadata["Attribute"][metadata["Units"] == "Binary Status"].tolist()

In [35]:
list_boolean_attributes = list(set(boolean_attributes) & set(df.columns.tolist())) # Some columns does not exist in df

In [36]:
def resolve_boolean(dataframe, columns=list_boolean_attributes):
    dataframe[columns] = dataframe[columns].round()
    return dataframe

# Sub-DataFrames interpolated

In [37]:
df_1_interpolated = resolve_boolean(dataframe=interpolate_df(dataframe=df[:df_index_split[0]]))
df_2_interpolated = resolve_boolean(dataframe=interpolate_df(dataframe=df[df_index_split[0]:df_index_split[1]]))
df_3_interpolated = resolve_boolean(dataframe=interpolate_df(dataframe=df[df_index_split[1]:df_index_split[2]]))
df_4_interpolated = resolve_boolean(dataframe=interpolate_df(dataframe=df[df_index_split[2]:]))

In [38]:
df_1_interpolated

Unnamed: 0_level_0,Load_LatentHeatWaterVolume,Load_RefrigeratorTemp,Load_StatusBA1Lights,Load_StatusKitchenLightsA,Load_StatusKitchenLightsB,Load_StatusKitchenLightsC,Load_StatusDRLights,Load_StatusLRLights3,Load_StatusEntryHallLights,Load_StatusBR4Lights,...,SHW_GlycolFlowHXCoriolisSHW,SHW_WaterFlowHXCoriolisSHW,SHW_GlycolFlowRateHXCoriolisSHW,SHW_WaterFlowRateHXCoriolisSHW,HVAC_HeatPumpIndoorUnitPower,HVAC_HeatPumpOutdoorUnitPower,HVAC_DehumidifierPower,HVAC_DehumidifierInletAirTemp,HVAC_DehumidifierExitAirTemp,HVAC_DehumidifierAirflow
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-02-01 00:01:00,0.000000,5.003394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14719.183333,35508.000000,6.073449e-07,-0.000005,184.446333,1106.900333,4.477167,72.101000,74.662667,0.0
2015-02-01 00:02:00,0.000000,5.003394,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14719.183333,35508.000000,6.073449e-07,-0.000005,184.446333,1106.900333,4.477167,72.101000,74.662667,0.0
2015-02-01 00:03:00,0.000000,5.060575,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14719.183333,35508.000000,6.073449e-07,-0.000005,182.155667,1095.666333,4.508333,72.092833,74.710667,0.0
2015-02-01 00:04:00,0.000330,5.135167,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14719.183333,35508.000000,6.073449e-07,-0.000005,169.420000,1085.025167,4.513167,72.076500,74.770667,0.0
2015-02-01 00:05:00,0.001981,5.175073,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14719.183333,35508.000000,6.073449e-07,-0.000005,169.912667,1074.405333,4.513833,72.076000,74.834667,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-10-19 23:54:00,0.799205,5.112026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,316.233238,677.418936,1.896195e-03,0.001210,9.940000,22.300000,4.539000,68.618000,69.998000,0.0
2015-10-19 23:55:00,0.799205,5.112026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,316.233238,677.418936,1.896195e-03,0.001210,9.940000,22.300000,4.539000,68.618000,69.998000,0.0
2015-10-19 23:56:00,0.799205,5.112026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,316.233238,677.418936,1.896195e-03,0.001210,9.940000,22.300000,4.539000,68.618000,69.998000,0.0
2015-10-19 23:57:00,0.799205,5.112026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,316.233238,677.418936,1.896195e-03,0.001210,9.940000,22.300000,4.539000,68.618000,69.998000,0.0
