In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

import pickle


mpl.rcParams['figure.figsize'] = (12, 8)
mpl.rcParams['axes.grid'] = True 

In [None]:
# load data located ashrae_data into dataframes 
train_csv_path = '/content/drive/My Drive/ashrae_data/train.csv'
weather_train_csv_path = '/content/drive/My Drive/ashrae_data/weather_train.csv'
test_csv_path = '/content/drive/My Drive/ashrae_data/test.csv'
building_metadata_csv_path = '/content/drive/My Drive/ashrae_data/building_metadata.csv' 
primary_usage_translations_csv_path = '/content/drive/My Drive/ashrae_data/primary_usage_translations.csv' 

train_df = pd.read_csv(train_csv_path) 
weather_train_df = pd.read_csv(weather_train_csv_path) 
building_metadata_df = pd.read_csv(building_metadata_csv_path) 
primary_usage_translations_df = pd.read_csv(primary_usage_translations_csv_path)

## Visualizing a sample
To get a feel for the data, let's have a look at a few example meter readings from a random building.
We pick a sample of size 120 to get energy consumptions for a five day timeframe.

In [None]:
# get random variables that will be used to collect the data...
sample_building_ids = [np.random.randint(200,300) for i in range(4)]
sample_starts = [np.random.randint(0,1000) for i in range(4)]


# collect samples
sample1 = train_df.meter_reading.loc[train_df.building_id == sample_building_ids[0]].loc[train_df.meter == 0][sample_starts[0]:sample_starts[0] + 120]
sample2 = train_df.meter_reading.loc[train_df.building_id == sample_building_ids[1]].loc[train_df.meter == 0][sample_starts[1]:sample_starts[1] + 120]
sample3 = train_df.meter_reading.loc[train_df.building_id == sample_building_ids[2]].loc[train_df.meter == 1][sample_starts[2]:sample_starts[2] + 120]
sample4 = train_df.meter_reading.loc[train_df.building_id == sample_building_ids[3]].loc[train_df.meter == 1][sample_starts[3]:sample_starts[3] + 120]


# init figure
fig, axes = plt.subplots(nrows=2,ncols=2,figsize=(12,6))
# plot the samples
sample1.plot(ax = axes[0][0],subplots=True, linestyle = (0, (1,1)), color = 'purple')
sample2.plot(ax = axes[0][1],subplots=True, linestyle = (0, (1,1)), color = 'purple')
sample3.plot(ax = axes[1][0],subplots=True, linestyle = (0, (1,1)), color = 'purple')
sample4.plot(ax = axes[1][1],subplots=True, linestyle = (0, (1,1)), color = 'purple')

In [None]:
def convert_timestamp(timestamp):
    return int(timestamp[5:14].replace(' ','').replace('-','').replace(':',''))


def get_sample(building_id, meter, start, size, split):
    """
    data:
                    site    building_id     meter   meter_reading   air_temp    cloud_coverage  wind_speed  wind_direction  ...
    time start
    time start+1
    ...
    time split

    labels:
                    meter_reading
    time split+1
    time split+2
    ....
    time start+size
    """
    site_id = building_metadata_df['site_id'].loc[building_metadata_df['building_id']==building_id].values[0] # get site_id of building

    # get train and weather data in relevant time window
    t = train_df[['timestamp', 'meter_reading']].loc[train_df['building_id']==building_id].loc[train_df['meter'] == meter][start:start+size] #?
    w = weather_train_df[['timestamp','air_temperature','cloud_coverage','precip_depth_1_hr','wind_speed']].loc[weather_train_df['site_id']==site_id][start:start+size] # TRY maybe remove timestamp to avoid merge problems
    t.set_index('timestamp')
    w.set_index('timestamp')

    sample_merged = pd.merge(t, w, on=['timestamp']) # merge the two
    sample_merged.timestamp = sample_merged.timestamp.apply(convert_timestamp) # make timestamp numeric
    sample_merged = (sample_merged - sample_merged.mean()) / sample_merged.std() # standardize the data
    sample_merged = sample_merged.fillna(0) # replace nan values with zeros
    sample_merged['site_id'] = [site_id for x in range(sample_merged.shape[0])] # add site_id as column
    sample_merged['building_id'] = [building_id for x in range(sample_merged.shape[0])]
    sample_merged['meter'] = [meter for x in range(sample_merged.shape[0])]


    data = sample_merged[:split] # split into data...
    labels = sample_merged['meter_reading'][split:] # ...aaand labels

    # print(data.shape)
    # print(labels.shape)
    return data.to_numpy(), labels.to_numpy()#, sample_merged # return as numpy array


def pick_samples(num_buildings = 1, start_range = (0,10), offset = 0):
    """ Use get_sample to build a training data set. """
    all_data, all_labels = [],[]
    building_ids = train_df.building_id.unique()
    c = 0
    for id in building_ids[offset:offset+num_buildings]:
        for start in range(start_range[0], start_range[1], 10):
            for meter in [0,1]: #just try 1
                c += 1
                data, labels = get_sample(id, meter, start, 5*24,4*24)
                all_data.append(data)
                all_labels.append(labels)
                if c%500 == 0:
                    print("sample {} of {}...".format(c, num_buildings * 2 * ((start_range[1]-start_range[0]) / 10)))

    return all_data, all_labels

In [None]:
display(dfs[np.random.randint(0,49)])

In [None]:
x_train, y_train = pick_samples(50, start_range = (0,1000), offset=200)
x_val, y_val = pick_samples(10, start_range= (0,1000), offset = 250)

## Cleaning the Data
Sometimes there aren't 24 values for a label, so we need to remove those rows. This way we will avoid problems down below.


In [None]:
def remove_broken_samples(data, labels, output_shape = 24):
    """ Removes samples where output dimension doesn't match the required size. """
    clean_data, clean_labels = [], []
    broken_sample_count = 0
    working_sample_count = 0
    for i, label in enumerate(labels):
        if label.shape[0] == output_shape:
            clean_data.append(data[i])
            clean_labels.append(labels[i])
            working_sample_count += 1
        else:
            broken_sample_count += 1
    
    print("Removed {} samples... {} remaining.".format(broken_sample_count, working_sample_count))
    
    return clean_data, clean_labels



x_train,y_train = remove_broken_samples(x_train, y_train)
x_val,y_val = remove_broken_samples(x_val, y_val)

## Store the training data
To avoid sampling the data all over again each time we store it in a pickle file. This will allow us to load it much faster the next time.

In [None]:
# !ls drive/My\ Drive/ashrae_data

def pickle_data(filename, data):
    with open('/content/drive/My Drive/ashrae_data/' + filename, 'wb') as f:
        pickle.dump(data,f)


pickle_data('train_samples_x.pkl', x_train)
pickle_data('train_samples_y.pkl', y_train)
pickle_data('val_samples_x.pkl', x_val)
pickle_data('val_samples_y.pkl', y_val)

## Load the pickled data
Let's load the data from the pickle files!

In [None]:
with open('/content/drive/My Drive/ashrae_data/' + 'train_samples_x.pkl', 'rb') as f:
    x_train = pickle.load(f)

with open('/content/drive/My Drive/ashrae_data/' + 'train_samples_y.pkl', 'rb') as f:
    y_train = pickle.load(f)

with open('/content/drive/My Drive/ashrae_data/' + 'val_samples_x.pkl', 'rb') as f:
    x_val = pickle.load(f)

with open('/content/drive/My Drive/ashrae_data/' + 'val_samples_y.pkl', 'rb') as f:
    y_val = pickle.load(f)

In [None]:
x_train = np.array(x_train)
y_train = np.array(y_train)

x_val = np.array(x_val)
y_val = np.array(y_val)



In [None]:
BUFFER_SIZE = 10000
BATCH_SIZE = 8

train_data = tf.data.Dataset.from_tensor_slices((x_train,y_train))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data = tf.data.Dataset.from_tensor_slices((x_val,y_val))
val_data = val_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()



## Defining the Model
After collecting and preparing all the data we create a model...

In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.LSTM(
    32,
    return_sequences = True,
    input_shape=(x_train[0].shape)
))

model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)))
model.add(tf.keras.layers.Dense(24))
model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),loss='mae')


In [None]:
class AshraeModel(tf.keras.Model):
    def __init__(self):
        super(AshraeModel, self).__init__()

        self.lstm1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128, return_sequences=True, input_shape=(96,9))) # was 32
        self.lstm2 = tf.keras.layers.LSTM(128,return_sequences=True)
        self.bi_lsmt = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, activation = 'tanh')) # is much faster than relu and performs similarly
        self.out = tf.keras.layers.Dense(24)


    def call(self,inputs):
        query_value_attention_seq = tf.keras.layers.Attention()([inputs, inputs])
        query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(query_value_attention_seq)
        query_encoding = tf.keras.layers.GlobalAveragePooling1D()(inputs)

        input_layer = tf.keras.layers.Concatenate()([inputs, query_value_attention_seq])

        x = self.lstm1(input_layer)
        x = tf.nn.dropout(x, 0.2)
        x = self.lstm2(x)
        x = tf.nn.dropout(x, 0.2)
        x = self.bi_lsmt(x)
        x = self.out(x)
        
        return x


model2 = AshraeModel()
model2.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0),loss='mae')
model2.build((8,96,9))
model2.summary()

In [None]:
EVALUATION_INTERVAL = 100
EPOCHS = 500


history = model2.fit(train_data, epochs=EPOCHS,
                                          steps_per_epoch=EVALUATION_INTERVAL,
                                          validation_data=val_data,
                                          validation_steps=50
                    )


In [None]:
mpl.rcParams['figure.figsize'] = (16, 4)


# make prediction after training
for x, y in val_data.take(4):
    # each is mini batch of size 8
    r = np.random.randint(0, BATCH_SIZE-1)
    meter = int(x[r][2][8])
    print(meter)
    meter_readings_x = [i[1] for i in x[r]] # pick random sample from mini batch
    prediction = model2.predict(x)[r]


    plt.plot([x for x in range(96)], meter_readings_x, color = 'darkslateblue')
    plt.plot([x for x in range(96, 120)],y[r], color = 'darkslateblue')#, linestyle = 'dashed')
    plt.plot([x for x in range(96, 120)], prediction, linestyle = 'dashed', color = 'darkorange')
    plt.show()


In [None]:
mpl.rcParams['figure.figsize'] = (12, 8)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()
