In [89]:
import datetime
import pytz
import xbos_services_getter
import pandas as pd
import scipy.signal
from scipy import spatial
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit
import numpy as np

start = datetime.datetime(year=2017, month=12, day=31, hour=16, minute=0).replace(tzinfo=pytz.utc)
end = datetime.datetime(year = 2018, month = 5, day = 18, hour = 23, minute = 45).replace(tzinfo=pytz.utc)

window = "15m"
aggregate = 'MEAN'

building_stub = xbos_services_getter.xbos_services_getter.get_building_zone_names_stub("ms.xbos.io:9001", secure=True)


buildings = xbos_services_getter.xbos_services_getter.get_buildings(building_stub)
building_zone_names_stub = xbos_services_getter.get_building_zone_names_stub("ms.xbos.io:9001")

zones = xbos_services_getter.get_zones(building_stub, "orinda-community-center")

temperature_band_stub = xbos_services_getter.get_temperature_band_stub("ms.xbos.io:9001",secure=True)
outdoor_temp_stub = xbos_services_getter.get_outdoor_temperature_historic_stub("ms.xbos.io:9001",secure=True)
outdoor_temp = xbos_services_getter.get_outdoor_temperature_historic(outdoor_temp_stub, "orinda-community-center", start, end,  window) 
# consumption_stub = xbos_services_getter.get_consumption_historic_stub()
hvac_consumption_stub = xbos_services_getter.get_hvac_consumption_stub("ms.xbos.io:9001",secure=True)
hvac_consumption = xbos_services_getter.get_hvac_consumption(hvac_consumption_stub, "orinda-community-center", zone = 'hvac_zone_ac_7')
indoor_data_historical_stub = xbos_services_getter.get_indoor_historic_stub("ms.xbos.io:9001", secure=True)
meter_data_historical_stub = xbos_services_getter.get_meter_data_historical_stub("ms.xbos.io:9001", secure = True)


_Rendezvous: <_Rendezvous of RPC that terminated with:
	status = StatusCode.INVALID_ARGUMENT
	details = "did not fetch data from pymortar with query: SELECT ?temp WHERE {
        ?temp rdf:type brick:Weather_Temperature_Sensor .
    };"
	debug_error_string = "{"created":"@1558133261.520962000","description":"Error received from peer","file":"src/core/lib/surface/call.cc","file_line":1017,"grpc_message":"did not fetch data from pymortar with query: SELECT ?temp WHERE {\n        ?temp rdf:type brick:Weather_Temperature_Sensor .\n    };","grpc_status":3}"
>

In [3]:
zones

['hvac_zone_ac_7',
 'hvac_zone_rm7',
 'hvac_zone_kinder_gym',
 'hvac_zone_ac_6',
 'hvac_zone_ac_3',
 'hvac_zone_rm1',
 'hvac_zone_ac_4',
 'hvac_zone_ac_5',
 'hvac_zone_rm6',
 'hvac_zone_ac_1',
 'hvac_zone_front_office',
 'hvac_zone_ac_2',
 'hvac_zone_rm2',
 'hvac_zone_ac_8']

In [7]:
hvac_consumption

{0: 0, 1: 0.075, 2: 3.3, 3: 0.01, 4: nan, 5: nan, 'UNIT': 'kWh'}

In [48]:
bldg = "avenal-recreation-center"

electric_meter_data = xbos_services_getter.get_meter_data_historical(meter_data_historical_stub,bldg,start,end,electric_point_type,aggregate,window)

zones = xbos_services_getter.get_zones(building_zone_names_stub,bldg)

data = pd.concat([action_df, electric_meter_data], axis =1)
data["X"] = data.index

data.empty

False

In [51]:
electric_point_type = 'Building_Electric_Meter'

best_params_dict = {}

for bldg in buildings:
    zones = xbos_services_getter.get_zones(building_zone_names_stub,bldg)
    action_df=pd.DataFrame()
    for zone in zones:
        s = xbos_services_getter.get_indoor_actions_historic(indoor_data_historical_stub,bldg,zone,start,end,window,aggregate)
        action_df = pd.concat([action_df, s], axis =1)

    if bldg == "csu-dominguez-hills":
        print("no meter or greenbutton data for csu-dominguez-hills")
        continue
        
# NO GREENBUTTON DATA FOR jesse-turner-center
    if bldg == "jesse-turner-center":
        electric_meter_data = xbos_services_getter.get_meter_data_historical(meter_data_historical_stub,bldg,start,end,electric_point_type,aggregate,window)

    electric_meter_data = xbos_services_getter.get_meter_data_historical(meter_data_historical_stub,bldg,start,end,electric_point_type,aggregate,window)
    
    data = pd.concat([action_df, electric_meter_data], axis =1)
    data["X"] = data.index
    data.rename(columns={"power":"y"}, inplace =True)
    
    scalerY = MinMaxScaler(feature_range=(0, 1))
    scalerX = MinMaxScaler(feature_range=(0, 1))
    scalerS = MinMaxScaler(feature_range=(0, 1))

    data["is_weekday"] = [is_weekday(x.weekday()) for x in data["X"]]
    data["is_workday"] = [is_workday(x.hour) for x in data["X"]]
    data["is_spring"]= [is_Spring(x.month) for x in data["X"]]
    data["is_summer"]= [is_Summer(x.month) for x in data["X"]]
    data["is_autumn"]= [is_Autumn(x.month) for x in data["X"]]
    data["is_winter"]= [is_Winter(x.month) for x in data["X"]]

    data.dropna(inplace=True)
    
    if data.empty:
        print(bldg+" is empty")
        continue
    
    data["X"] = scalerX.fit_transform(data["X"].values.reshape(-1,1))
    data["y"] = scalerY.fit_transform(data["y"].values.reshape(-1,1))

    prediction_window=16
    training_data=data[:-prediction_window]
    test_data = data[-prediction_window:]

    train_labels = training_data["y"]
    train_features = training_data.drop("y", axis = 1)
    test_labels = test_data["y"]
    test_features = test_data.drop("y", axis = 1)
    
    rf = RandomForestRegressor()

    parameters = {
        'n_estimators':[50, 75,125],
        'max_depth': [3, 5, 15,20],
        "min_samples_split":[5,10, 15, 20, 25],
        'min_samples_leaf': [3, 7, 12, 15, 20],
        "min_impurity_decrease": [1e-7, 1e-6, 1e-8]
    }

    clf = GridSearchCV(rf, parameters, cv=3, verbose = 1)
    clf.fit(train_features, train_labels)
    
    print(clf.best_params_)
    best_params_dict[bldg] = clf.best_params_  



Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed:  7.6min finished


{'max_depth': 15, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 3, 'min_samples_split': 15, 'n_estimators': 125}
avenal-recreation-center is empty




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 48.2min finished


{'max_depth': 20, 'min_impurity_decrease': 1e-06, 'min_samples_leaf': 3, 'min_samples_split': 5, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed:  6.3min finished


{'max_depth': 3, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 7, 'min_samples_split': 15, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 18.4min finished


{'max_depth': 3, 'min_impurity_decrease': 1e-07, 'min_samples_leaf': 20, 'min_samples_split': 20, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 29.0min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-06, 'min_samples_leaf': 7, 'min_samples_split': 25, 'n_estimators': 50}
                           power
datetime                        
2017-12-31 16:00:00+00:00    NaN
2017-12-31 16:15:00+00:00    NaN
2017-12-31 16:30:00+00:00    NaN
2017-12-31 16:45:00+00:00    NaN
2017-12-31 17:00:00+00:00    NaN
2017-12-31 17:15:00+00:00    NaN
2017-12-31 17:30:00+00:00    NaN
2017-12-31 17:45:00+00:00    NaN
2017-12-31 18:00:00+00:00    NaN
2017-12-31 18:15:00+00:00    NaN
2017-12-31 18:30:00+00:00    NaN
2017-12-31 18:45:00+00:00    NaN
2017-12-31 19:00:00+00:00    NaN
2017-12-31 19:15:00+00:00    NaN
2017-12-31 19:30:00+00:00    NaN
2017-12-31 19:45:00+00:00    NaN
2017-12-31 20:00:00+00:00    NaN
2017-12-31 20:15:00+00:00    NaN
2017-12-31 20:30:00+00:00    NaN
2017-12-31 20:45:00+00:00    NaN
2017-12-31 21:00:00+00:00    NaN
2017-12-31 21:15:00+00:00    NaN
2017-12-31 21:30:00+00:00    NaN
2017-12-31 21:45:00+00:00    NaN
2017-12-31 22:00:00+00:00



Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 22.2min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-06, 'min_samples_leaf': 15, 'min_samples_split': 5, 'n_estimators': 50}
no meter or greenbutton data for csu-dominguez-hills




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed:  9.2min finished


{'max_depth': 3, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 20, 'min_samples_split': 20, 'n_estimators': 125}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 1146.0min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-07, 'min_samples_leaf': 15, 'min_samples_split': 10, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 24.1min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 7, 'min_samples_split': 25, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed:  9.4min finished


{'max_depth': 3, 'min_impurity_decrease': 1e-07, 'min_samples_leaf': 3, 'min_samples_split': 20, 'n_estimators': 50}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 13.3min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-06, 'min_samples_leaf': 20, 'min_samples_split': 10, 'n_estimators': 75}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed: 13.6min finished


{'max_depth': 5, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 3, 'min_samples_split': 25, 'n_estimators': 75}




Fitting 3 folds for each of 900 candidates, totalling 2700 fits
{'max_depth': 3, 'min_impurity_decrease': 1e-08, 'min_samples_leaf': 3, 'min_samples_split': 25, 'n_estimators': 50}


[Parallel(n_jobs=1)]: Done 2700 out of 2700 | elapsed:  8.6min finished


In [52]:
best_params_dict

{'avenal-veterans-hall': {'max_depth': 15,
  'min_impurity_decrease': 1e-08,
  'min_samples_leaf': 3,
  'min_samples_split': 15,
  'n_estimators': 125},
 'orinda-community-center': {'max_depth': 20,
  'min_impurity_decrease': 1e-06,
  'min_samples_leaf': 3,
  'min_samples_split': 5,
  'n_estimators': 50},
 'local-butcher-shop': {'max_depth': 3,
  'min_impurity_decrease': 1e-08,
  'min_samples_leaf': 7,
  'min_samples_split': 15,
  'n_estimators': 50},
 'avenal-public-works-yard': {'max_depth': 3,
  'min_impurity_decrease': 1e-07,
  'min_samples_leaf': 20,
  'min_samples_split': 20,
  'n_estimators': 50},
 'hayward-station-1': {'max_depth': 5,
  'min_impurity_decrease': 1e-06,
  'min_samples_leaf': 7,
  'min_samples_split': 25,
  'n_estimators': 50},
 'ciee': {'max_depth': 5,
  'min_impurity_decrease': 1e-06,
  'min_samples_leaf': 15,
  'min_samples_split': 5,
  'n_estimators': 50},
 'berkeley-corporate-yard': {'max_depth': 3,
  'min_impurity_decrease': 1e-08,
  'min_samples_leaf': 20,


## Do the predictions

In [None]:
## Evaluate model for predictions (quantitative)
## prediction function 
# if time, make it autoregressive 
## lit review for HVAC consumption 

In [135]:
evaluate_metrics={}

for bldg in buildings:
    if bldg == "avenal-recreation-center":
        # no data for this one to have previously trained on
        continue
        
    zones = xbos_services_getter.get_zones(building_zone_names_stub,bldg)
    action_df=pd.DataFrame()
    
    for zone in zones:
        s = xbos_services_getter.get_indoor_actions_historic(indoor_data_historical_stub,bldg,zone,start,end,window,aggregate)
        action_df = pd.concat([action_df, s], axis =1)

    if bldg == "csu-dominguez-hills":
        print("no meter or greenbutton data for csu-dominguez-hills")
        continue
        
    if bldg == "jesse-turner-center":
        continue

    electric_meter_data = xbos_services_getter.get_meter_data_historical(meter_data_historical_stub,bldg,start,end,electric_point_type,aggregate,window)
    
    data = pd.concat([action_df, electric_meter_data], axis =1)
    data["X"] = data.index
    data.rename(columns={"power":"y"}, inplace =True)
    
    scalerY = MinMaxScaler(feature_range=(0, 1))
    scalerX = MinMaxScaler(feature_range=(0, 1))
    scalerS = MinMaxScaler(feature_range=(0, 1))

    data["is_weekday"] = [is_weekday(x.weekday()) for x in data["X"]]
    data["is_workday"] = [is_workday(x.hour) for x in data["X"]]
    data["is_spring"]= [is_Spring(x.month) for x in data["X"]]
    data["is_summer"]= [is_Summer(x.month) for x in data["X"]]
    data["is_autumn"]= [is_Autumn(x.month) for x in data["X"]]
    data["is_winter"]= [is_Winter(x.month) for x in data["X"]]

    data.dropna(inplace=True)
    
    if data.empty:
        print(bldg+" is empty")
        continue
    
    data["X"] = scalerX.fit_transform(data["X"].values.reshape(-1,1))
    data["y"] = scalerY.fit_transform(data["y"].values.reshape(-1,1))

    prediction_window=16
    training_data=data[:-prediction_window]
    test_data = data[-prediction_window:]

    train_labels = training_data["y"]
    train_features = training_data.drop("y", axis = 1)
    test_labels = test_data["y"]
    test_features = test_data.drop("y", axis = 1)
    
    params = best_params_dict[bldg]
        
    rf = RandomForestRegressor(max_depth = params["max_depth"],
                              min_impurity_decrease= params["min_impurity_decrease"],
                              min_samples_leaf = params["min_samples_leaf"],
                              min_samples_split = params["min_samples_split"],
                              n_estimators = params["n_estimators"])
    rf.fit(train_features, train_labels)
    
    # printing evaluation criteria
   
    evaluate_metrics[bldg] = evaluate(rf, train_features,train_labels)



no meter or greenbutton data for csu-dominguez-hills




In [137]:
evaluate_metrics

{'avenal-veterans-hall': (0.061132208828488833, -inf),
 'orinda-community-center': (0.106635924842908, -inf),
 'local-butcher-shop': (0.11054764679974657, 56.943964948999394),
 'avenal-public-works-yard': (0.0852879161829426, -inf),
 'hayward-station-1': (0.05217284597382923, -inf),
 'ciee': (0.070558040288807, -inf),
 'berkeley-corporate-yard': (0.11146597285339532, 65.49463529776713),
 'south-berkeley-senior-center': (0.14204553570422593, -3.873610772623053),
 'avenal-movie-theatre': (0.06984034380386699, -inf),
 'avenal-animal-shelter': (0.07083312798799471, -inf),
 'north-berkeley-senior-center': (0.15847659029941874, -inf),
 'word-of-faith-cc': (0.07548532712345768, -937.1049653232194),
 'hayward-station-8': (0.03536464598881902, -inf)}

In [138]:
def evaluate(model, train_features, train_labels):
    tscv = TimeSeriesSplit(n_splits=5)
    e = []
    a = []
    for train_index, test_index in tscv.split(train_features):
        rf.fit(train_features.iloc[train_index],train_labels.iloc[train_index])
        predictions = model.predict(train_features.iloc[test_index])
        errors = abs(predictions - train_labels.iloc[test_index])
        mape = 100 * np.mean(errors / train_labels.iloc[test_index])
        accuracy = 100 - mape
        e.append(np.mean(errors))
        a.append(accuracy)
    return(np.mean(e), np.mean(a))


In [139]:
def predict(model, test_features):
    return(model.predict(test_features))

## Utils

In [35]:
def is_Spring(month):
    if month >3 and month<7:
        return 1
    else: 
        return 0

def is_Summer(month):
    if month> 6 and month < 10:
        return 1
    else: 
        return 0 

def is_Autumn(month): 
    if month > 9:
        return 1 
    else:
        return 0 

def is_Winter(month):
    if month ==12 or month <3:
        return 1 
    else:
        return 0

def timeseries_to_supervised(data, lag=1, steps = 10, dropnan= True, prefix= ""):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    for i in range(lag, 0, -1):
        cols.append(df.shift(i))
        names += [(prefix + 'var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    for i in range(0, steps):
        cols.append(df.shift(-i))
        if i == 0:
            names += [(prefix + 'var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [(prefix + 'var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan: 
        agg.dropna(inplace=True)
    return agg

def is_weekday(day):
    if day in range(0,5):
        return 1
    else:
        return 0


def is_workday(hour):
    if hour in range(8,18):
        return 1
    else:
        return 0

def is_holiday(day):
    holidays = pd.DatetimeIndex(['2016-11-11'])
    for d in ('2016-11-11', '2016-11-24', '2016-11-25', '2016-12-23', '2016-12-26', '2016-12-30', '2017-01-16', '2017-02-20', 
              '2017-02-20', '2017-03-27', '2017-03-28', '2017-03-29', '2017-03-30', '2017-03-31', '2017-07-04', '2017-09-04',
              '2017-11-10', '2017-11-23', '2017-11-24', '2017-12-22', '2017-12-25', '2017-12-29', '2018-01-01', '2018-01-15',
              '2018-02-19', '2018-03-26', '2018-03-27', '2018-03-28', '2018-03-30', '2018-03-30', '2018-04-28', '2018-07-04',
              '2018-09-03'
             ):
         holidays = holidays.append(pd.DatetimeIndex([d]))
    
    dateOnly = pd.DataFrame(pd.DatetimeIndex((data.index).date))

    data['Holiday'] = pd.DatetimeIndex(dateOnly[0]).isin(holidays)
    data['Holiday'] = data['Holiday'].replace({ True : 1, False : 0 })
    

def interpolate_uncontrolled(data):

    data.ix[data["s0"]>.2, "y"] = np.nan
    data.ix[data["s1"]>.2, "y"] = np.nan
    data.ix[data["s2"]>.2, "y"] = np.nan
    data.ix[data["s3"]>.2, "y"] = np.nan
    data.ix[data["s4"]>.2, "y"] = np.nan
    data.ix[data["s5"]>.2, "y"] = np.nan
    data.ix[data["s6"]>.2, "y"] = np.nan
    data.ix[data["s7"]>.2, "y"] = np.nan
    data.ix[data["s8"]>.2, "y"] = np.nan
    data.ix[data["s9"]>.2, "y"] = np.nan
    data.ix[data["s10"]>.2, "y"] = np.nan
    data.ix[data["s11"]>.2, "y"] = np.nan
    data.ix[data["s12"]>.2, "y"] = np.nan
    data.ix[data["s13"]>.2, "y"] = np.nan

    data.ix[data["y"].isna(), "interpolated"]=1
    data.ix[data["interpolated"].isna(), "interpolated"]=0

    data["y"].interpolate(method = "piecewise_polynomial", inplace=True)

    data.drop(["s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13"], axis = 1, inplace = True)
    data.dropna(inplace=True)

    return data

def add_features(data):
    data["is_weekday"] = [is_weekday(x.weekday()) for x in data["X"]]
    data["is_workday"] = [is_workday(x.hour) for x in data["X"]]
    data["is_spring"]= [is_Spring(x.month) for x in data["X"]]
    data["is_summer"]= [is_Summer(x.month) for x in data["X"]]
    data["is_autumn"]= [is_Autumn(x.month) for x in data["X"]]
    data["is_winter"]= [is_Winter(x.month) for x in data["X"]]
    return data 
