Installing the required software

In [86]:
!pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ ocs-hackdavis==0.34.0
!pip install plotly

Looking in indexes: https://test.pypi.org/simple/, https://pypi.org/simple/


Importing the required libraries

In [16]:
from ocs_hackdavis import (
    ucdavis_buildings,  # list of campus buildings
    ucdavis_ceeds_of,   # list of CEED element of a building (Electricity, Steam, Chilled Water, etc)
    ucdavis_streams_of, # The list of all OCS data streams for a building and CEED pair  
    ucdavis_building_metadata,  # Metadata for a building: building code, lat/long, usage, etc.  
    ocs_stream_interpolated_data,  # Interpolated data from a stream given a time range + interpolation interval
    ucdavis_outside_temperature,  # Outside temperature at UC Davis for a given a time range + interpolation interval
)
from ocs_sample_library_preview import OCSClient
import configparser
import io
import json
import pandas as pd

Configure the OSI credintial 

In [8]:
config_text = u"""
; IMPORTANT: replace these values with those provided by OSIsoft
[Configurations]
Namespace = UC__Davis

[Access]
Resource = https://dat-b.osisoft.com
Tenant = 65292b6c-ec16-414a-b583-ce7ae04046d4
ApiVersion = v1-preview

[Credentials] 
ClientId = 82fca0c2-3004-42c0-81cf-cc6968df1f47
ClientSecret = 3VYy318vxlFGKVuQ4+toahSyg7IqWUVKiGHJDvH/IvY=
"""

In [42]:
config = configparser.ConfigParser(allow_no_value=True)
config.read_file(io.StringIO(config_text))

ocs_client = OCSClient(
    config.get("Access", "ApiVersion"),
    config.get("Access", "Tenant"),
    config.get("Access", "Resource"),
    config.get("Credentials", "ClientId"),
    config.get("Credentials", "ClientSecret"),
)

namespace_id = config.get("Configurations", "Namespace")
print(f"namespace_id: '{namespace_id}'")

namespace_id: 'UC__Davis'


In [82]:
def downloadOneMonth(pandaDataframe,stream_id,start,end,interval):      
    result = ocs_stream_interpolated_data(
            ocs_client,
            namespace_id,
            stream_id,
            start=start,  # UTC
            end=end,
            interval=interval,  # 2 minutes
        )
    
    
    pandaDataframe=pandaDataframe.append(
            pd.read_json(json.dumps(result)).set_index("Timestamp", drop=True)
        )
    return pandaDataframe

    

In [83]:
building_name="Activities and Recreation Center"
interval=60

# Step 1: get the stream Id 
stream_id_steam = ucdavis_streams_of(building_name,"Steam")["Demand"]
stream_id_electricity = ucdavis_streams_of(building_name,"Electricity")["Demand"]
stream_id_chilledWater = ucdavis_streams_of(building_name,"ChilledWater")["Demand"]

train_steam = pd.Series()
train_electricity = pd.Series()
train_chilledWater = pd.Series()

for start_year in [2017,2018]:
    for start_month in range(1, 12):
        start_date=f"{start_year}-{start_month}-01"
        end_date=f"{start_year}-{start_month+1}-01"
        print(f"> processing {start_date} to {end_date}")
        train_steam = downloadOneMonth(train_steam,stream_id_steam,start_date,end_date,interval)
        train_electricity = downloadOneMonth(train_electricity,stream_id_electricity,start_date,end_date,interval)
       # train_chilledWater = downloadOneMonth(train_chilledWater,stream_id_chilledWater,start_date,end_date,interval)
        
                
train_steam = train_steam.loc[~train_steam.index.duplicated(keep="first")]["Value"]
train_electricity = train_electricity.loc[~train_electricity.index.duplicated(keep="first")]["Value"]
#train_chilledWater = train_chilledWater.loc[~train_chilledWater.index.duplicated(keep="first")]["Value"]
# Note there is an additional column "0" with NaN (not a number), it will be remove in the next cell


train=train_steam+train_electricity
len(train_steam), len(train_electricity) 

> processing 2017-1-01 to 2017-2-01
> processing 2017-2-01 to 2017-3-01
> processing 2017-3-01 to 2017-4-01
> processing 2017-4-01 to 2017-5-01
> processing 2017-5-01 to 2017-6-01
> processing 2017-6-01 to 2017-7-01
> processing 2017-7-01 to 2017-8-01
> processing 2017-8-01 to 2017-9-01
> processing 2017-9-01 to 2017-10-01
> processing 2017-10-01 to 2017-11-01
> processing 2017-11-01 to 2017-12-01
> processing 2018-1-01 to 2018-2-01
> processing 2018-2-01 to 2018-3-01
> processing 2018-3-01 to 2018-4-01
> processing 2018-4-01 to 2018-5-01
> processing 2018-5-01 to 2018-6-01
> processing 2018-6-01 to 2018-7-01
> processing 2018-7-01 to 2018-8-01
> processing 2018-8-01 to 2018-9-01
> processing 2018-9-01 to 2018-10-01
> processing 2018-10-01 to 2018-11-01
> processing 2018-11-01 to 2018-12-01


(16025, 16025)

In [84]:
print(train[0])
print(train_steam[0])
print(train_electricity[0])




1341.2847747412109
1143.15979
198.12498474121094


In [87]:
import plotly.express as px 

In [112]:
df = train['2017-01-01':'2018-12-30'].to_frame().reset_index()
df.columns = ['Time', 'Demand']
df

Unnamed: 0,Time,Demand
0,2017-01-01 09:00:00+00:00,1341.284775
1,2017-01-01 10:00:00+00:00,1626.698980
2,2017-01-01 11:00:00+00:00,2107.854460
3,2017-01-01 12:00:00+00:00,1623.275194
4,2017-01-01 13:00:00+00:00,1632.956924
...,...,...
16020,2018-11-30 20:00:00+00:00,804.848425
16021,2018-11-30 21:00:00+00:00,767.300079
16022,2018-11-30 22:00:00+00:00,735.398651
16023,2018-11-30 23:00:00+00:00,861.003792


In [114]:
fig = px.line(df, x="Time", y="Demand")
fig.show()

In [115]:
import csv
wifi_url = "https://apimgmtstelkv30lahnuj362.blob.core.windows.net/content/MediaLibrary/hackdavis/ucdavis_wifi_data.csv"

In [117]:
with requests.Session() as s:
    s.post(url, data=payload)
    download = s.get(wifi)

In [155]:
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

In [164]:
timeNP = np.array(train.index.to_pydatetime(), dtype=np.datetime64)
demandNP=np.array(train)


parsing timezone aware datetimes is deprecated; this will raise an error in the future



In [162]:
demand_mean = uni_data[:TRAIN_SPLIT].mean()
uni_train_std = uni_data[:TRAIN_SPLIT].std()


array([1341.28477474, 1626.69897957, 2107.8544596 , ...,  735.39865084,
        861.00379155,  696.98423193])

In [170]:
building_name="Activities and Recreation Center"
interval=60

# Step 1: get the stream Id 
stream_id_steam = ucdavis_streams_of(building_name,"Steam")["Demand"]
stream_id_electricity = ucdavis_streams_of(building_name,"Electricity")["Demand"]

test_steam = pd.Series()
test_electricity = pd.Series()

for start_year in [2019]:
    for start_month in range(1, 12):
        start_date=f"{start_year}-{start_month}-01"
        end_date=f"{start_year}-{start_month+1}-01"
        print(f"> processing {start_date} to {end_date}")
        test_steam = downloadOneMonth(test_steam,stream_id_steam,start_date,end_date,interval)
        test_electricity = downloadOneMonth(test_electricity,stream_id_electricity,start_date,end_date,interval)
        
                
test_steam = test_steam.loc[~test_steam.index.duplicated(keep="first")]["Value"]
test_electricity = test_electricity.loc[~test_electricity.index.duplicated(keep="first")]["Value"]
#train_chilledWater = train_chilledWater.loc[~train_chilledWater.index.duplicated(keep="first")]["Value"]
# Note there is an additional column "0" with NaN (not a number), it will be remove in the next cell


test=test_steam+test_electricity
len(test_steam), len(test_electricity) 

> processing 2019-1-01 to 2019-2-01
> processing 2019-2-01 to 2019-3-01
> processing 2019-3-01 to 2019-4-01
> processing 2019-4-01 to 2019-5-01
> processing 2019-5-01 to 2019-6-01
> processing 2019-6-01 to 2019-7-01
> processing 2019-7-01 to 2019-8-01
> processing 2019-8-01 to 2019-9-01
> processing 2019-9-01 to 2019-10-01
> processing 2019-10-01 to 2019-11-01
> processing 2019-11-01 to 2019-12-01


(8017, 8017)

In [178]:
test_df = test['2019-01-01':'2019-12-30'].to_frame().reset_index()
test_df.columns = ['Time', 'Demand']
test_df

Unnamed: 0,Time,Demand
0,2019-01-01 00:00:00+00:00,798.467850
1,2019-01-01 01:00:00+00:00,1375.364107
2,2019-01-01 02:00:00+00:00,950.742767
3,2019-01-01 03:00:00+00:00,1664.408035
4,2019-01-01 04:00:00+00:00,2148.619893
...,...,...
8012,2019-11-30 20:00:00+00:00,158.087456
8013,2019-11-30 21:00:00+00:00,138.285233
8014,2019-11-30 22:00:00+00:00,209.008163
8015,2019-11-30 23:00:00+00:00,169.639992


In [207]:
trainX = np.asarray(train.index,dtype='M8[us]')
print(trainX)
print(train.index[0])
train.index-train.index[0]/ np.timedelta64(1, 's')


['2017-01-01T09:00:00.000000' '2017-01-01T10:00:00.000000'
 '2017-01-01T11:00:00.000000' ... '2018-11-30T22:00:00.000000'
 '2018-11-30T23:00:00.000000' '2018-12-01T00:00:00.000000']
2017-01-01 09:00:00+00:00


UFuncTypeError: ufunc 'true_divide' cannot use operands with types dtype('O') and dtype('<m8[s]')

In [229]:
trainX = np.asarray(train.index,dtype='long')
trainY=np.asarray(train)

testX = np.asarray(test.index,dtype='long')
testY=np.asarray(test)

BATCH_SIZE = 256
BUFFER_SIZE = 10000

train_univariate = tf.data.Dataset.from_tensor_slices((trainX, trainY))
train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_univariate = tf.data.Dataset.from_tensor_slices((testX, testY))
val_univariate = val_univariate.batch(BATCH_SIZE).repeat()


In [215]:
trainX.shape

(16025,)

In [226]:
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8),
    tf.keras.layers.Dense(1)
])

simple_lstm_model.compile(optimizer='adam', loss='mae')



In [230]:
EVALUATION_INTERVAL = 200
EPOCHS = 10

simple_lstm_model.fit(train_univariate, epochs=EPOCHS,
                      steps_per_epoch=EVALUATION_INTERVAL,
                      validation_data=val_univariate, validation_steps=50)

Train for 200 steps, validate for 50 steps
Epoch 1/10
  1/200 [..............................] - ETA: 13s

ValueError: Input 0 of layer sequential_4 is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 1]