<a href="https://colab.research.google.com/github/mayhd3/NSF-REU-2021/blob/main/AMImodels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd

if not os.path.exists('full.zip'):
  !curl --remote-name -H 'Accept: application/vnd.github.v3.raw' --location 'https://github.com/mayhd3/NSF-REU-2021/raw/main/full.zip'

full = pd.read_csv('full.zip')
full.columns = range(len(full.columns))
print(full)

# group dataset by customer
groups = full.groupby(2)
meters = [groups.get_group(group) for group in groups.groups]

(79572, 51)


In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Conv2D, MaxPooling1D, MaxPool2D, Dropout, Flatten
from keras.utils import np_utils
from scipy.signal import find_peaks
from keras.optimizers import Adam

# partition training and testing datasets
def bisect_ratio(array, ratio):
  return (
    [array[i] for i in range(len(array)) if i%ratio == 0],
    [array[i] for i in range(len(array)) if i%ratio != 0]
  )

def bhwc(twod):
  return np.expand_dims(np.array(twod), axis=-1).astype('float32')

X = [meter.iloc[:,5:].to_numpy().flatten() for meter in meters]
y = [np_utils.to_categorical(min(meter.iloc[0,0],1), num_classes=2) for meter in meters]

X_test, X_train = (bhwc(x) for x in bisect_ratio(X, 3))
y_test, y_train = (np.array(yb) for yb in bisect_ratio(y, 3))

model = Sequential()
model.add(Conv1D(32, kernel_size=7, activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(2, activation='softmax'))    

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.fit(X_train, y_train, epochs=52, verbose=2, batch_size=128,validation_split=0.3)
print(model.evaluate(X_test, y_test))

Epoch 1/52
1/1 - 16s - loss: 3.2162 - accuracy: 0.2830 - val_loss: 951.7249 - val_accuracy: 0.0000e+00
Epoch 2/52
1/1 - 1s - loss: 340.1325 - accuracy: 0.7170 - val_loss: 445.9047 - val_accuracy: 0.0000e+00
Epoch 3/52
1/1 - 2s - loss: 158.0347 - accuracy: 0.7170 - val_loss: 133.8114 - val_accuracy: 1.0000
Epoch 4/52
1/1 - 1s - loss: 394.1600 - accuracy: 0.2830 - val_loss: 0.7126 - val_accuracy: 1.0000
Epoch 5/52
1/1 - 1s - loss: 200.8747 - accuracy: 0.2830 - val_loss: 292.5538 - val_accuracy: 0.0000e+00
Epoch 6/52
1/1 - 1s - loss: 244.9329 - accuracy: 0.7170 - val_loss: 453.0124 - val_accuracy: 0.0000e+00
Epoch 7/52
1/1 - 1s - loss: 155.7271 - accuracy: 0.7170 - val_loss: 505.0510 - val_accuracy: 0.0000e+00
Epoch 8/52
1/1 - 1s - loss: 172.6947 - accuracy: 0.7170 - val_loss: 439.4602 - val_accuracy: 0.0000e+00
Epoch 9/52
1/1 - 1s - loss: 148.2484 - accuracy: 0.7170 - val_loss: 297.5457 - val_accuracy: 0.0000e+00
Epoch 10/52
1/1 - 1s - loss: 96.1788 - accuracy: 0.7170 - val_loss: 139.710

In [9]:
# cnn input is a 2D array of semi hourly consumption by week
X0 = [meter.iloc[:,5:].to_numpy() for meter in meters]
y0 = [{0: [0,0], 1: [0,1], 2:[1,0], 3:[1,1]}[meter.iloc[0,0]] for meter in meters]

X0_test, X0_train = bisect_ratio(X0, 3) # (bhwc(X) for X in )
X0_test = bhwc(X0_test)
X0_train = bhwc(X0_train)
y0_test, y0_train = (np.array(y) for y in bisect_ratio(y0, 3))

# cnn structure is (conv -> pool) x3 -> flatten -> dense -> dropout -> dense -> softmax
cnn = Sequential()
cnn.add(Conv2D(32, (3,3), input_shape=X0_train.shape[1:], activation='relu'))
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Conv2D(64,(3,3),activation='relu'))
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Conv2D(128,(5,5),activation='relu'))
cnn.add(MaxPool2D(pool_size=(4,4)))
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.0125))
cnn.add(Dense(64, activation='relu'))
cnn.add(Dense(32, activation='relu'))
cnn.add(Dense(2, activation='softmax'))

cnn.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
cnn.fit(X0_train, y0_train, epochs=52, batch_size=38)
print(cnn.evaluate(X0_test, y0_test))

Epoch 1/52
Epoch 2/52
Epoch 3/52
Epoch 4/52
Epoch 5/52
Epoch 6/52
Epoch 7/52
Epoch 8/52
Epoch 9/52
Epoch 10/52
Epoch 11/52
Epoch 12/52
Epoch 13/52
Epoch 14/52
Epoch 15/52
Epoch 16/52
Epoch 17/52
Epoch 18/52
Epoch 19/52
Epoch 20/52
Epoch 21/52
Epoch 22/52
Epoch 23/52
Epoch 24/52
Epoch 25/52
Epoch 26/52
Epoch 27/52
Epoch 28/52
Epoch 29/52
Epoch 30/52
Epoch 31/52
Epoch 32/52
Epoch 33/52
Epoch 34/52
Epoch 35/52
Epoch 36/52
Epoch 37/52
Epoch 38/52
Epoch 39/52
Epoch 40/52
Epoch 41/52
Epoch 42/52
Epoch 43/52
Epoch 44/52
Epoch 45/52
Epoch 46/52
Epoch 47/52
Epoch 48/52
Epoch 49/52
Epoch 50/52
Epoch 51/52
Epoch 52/52
[2483177216.0, 0.1184210553765297]


In [None]:
# fnn input is a 1D array of consumption over the year
X1 = [x.flatten() for x in X0]
y1 = [meter[1].to_numpy() for meter in meters]

# rnn input is a time series between consumption minima and maxima
X2 = [np.diff(find_peaks(np.mean(x, axis=1))[0]) for x in X0]
y2 = [y != 0 for y in y0]