<a href="https://colab.research.google.com/github/mayhd3/NSF-REU-2021/blob/main/AMImodels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import pandas as pd

if not os.path.exists('full.zip'):
  !curl --remote-name -H 'Accept: application/vnd.github.v3.raw' --location 'https://github.com/mayhd3/NSF-REU-2021/raw/main/full.zip'

full = pd.read_csv('full.zip')
full.columns = range(len(full.columns))
print(full)

# group dataset by customer
groups = full.groupby(2)
meters = [groups.get_group(group) for group in groups.groups]

       0   1    2         3   ...         47         48         49         50
0       0   0    1  20160101  ...  41.593667  37.221333  38.499467  51.175717
1       0   0    1  20160102  ...  77.312267  59.396183  58.420417  69.247467
2       0   0    1  20160103  ...  74.508550  62.454550  47.916017  87.413983
3       0   0    1  20160104  ...  57.245617  59.145133  72.321350  57.762917
4       0   0    1  20160105  ...  55.368267  66.245233  55.601867  99.809000
...    ..  ..  ...       ...  ...        ...        ...        ...        ...
79567   1   1  228  20161210  ...   4.873561   3.429628   2.911512   4.280083
79568   1   1  228  20161211  ...   4.217476   2.840308   5.403259   3.768587
79569   1   1  228  20161212  ...   2.707539   3.863132   2.787403   2.745004
79570   1   1  228  20161213  ...   3.045367   3.111105   2.116264   4.023371
79571   1   1  228  20161214  ...   5.842780   4.448763   3.460053   4.307783

[79572 rows x 51 columns]


In [13]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Conv2D, MaxPooling1D, MaxPool2D, Dropout, Flatten
from keras.utils import np_utils
from scipy.signal import find_peaks
from keras.optimizers import Adam, SGD

# partition training and testing datasets
def bisect_ratio(array, ratio):
  return (
    [array[i] for i in range(len(array)) if i%ratio == 0],
    [array[i] for i in range(len(array)) if i%ratio != 0]
  )

def bhwc(twod):
  return np.expand_dims(np.array(twod), axis=-1).astype('float32')

X = [meter.iloc[:,5:].to_numpy().flatten() for meter in meters]
y = [np_utils.to_categorical(min(meter.iloc[0,0],1), num_classes=2) for meter in meters]

X_test, X_train = (bhwc(x) for x in bisect_ratio(X, 3))
y_test, y_train = (np.array(yb) for yb in bisect_ratio(y, 3))

model = Sequential()
model.add(Conv1D(32, kernel_size=7, activation='relu', input_shape=X_train.shape[1:]))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(2, activation='softmax'))    

model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
model.fit(X_train, y_train, epochs=52, verbose=2, batch_size=128,validation_split=0.3)
print(model.evaluate(X_test, y_test))

Epoch 1/52
1/1 - 3s - loss: 1.7667 - accuracy: 0.7264 - val_loss: 663.2472 - val_accuracy: 0.0000e+00
Epoch 2/52
1/1 - 2s - loss: 848.7089 - accuracy: 0.7170 - val_loss: 1063.4025 - val_accuracy: 0.0000e+00
Epoch 3/52
1/1 - 2s - loss: 370.0658 - accuracy: 0.7170 - val_loss: 690.8433 - val_accuracy: 0.0000e+00
Epoch 4/52
1/1 - 1s - loss: 230.8389 - accuracy: 0.7170 - val_loss: 71.9506 - val_accuracy: 0.4348
Epoch 5/52
1/1 - 1s - loss: 16.3994 - accuracy: 0.8208 - val_loss: 8.6641e-09 - val_accuracy: 1.0000
Epoch 6/52
1/1 - 1s - loss: 421.3506 - accuracy: 0.2830 - val_loss: 106.4271 - val_accuracy: 0.5217
Epoch 7/52
1/1 - 1s - loss: 117.4249 - accuracy: 0.8113 - val_loss: 414.8106 - val_accuracy: 0.0000e+00
Epoch 8/52
1/1 - 1s - loss: 133.3101 - accuracy: 0.7170 - val_loss: 562.5873 - val_accuracy: 0.0000e+00
Epoch 9/52
1/1 - 1s - loss: 184.8921 - accuracy: 0.7170 - val_loss: 540.0738 - val_accuracy: 0.0000e+00
Epoch 10/52
1/1 - 1s - loss: 176.5121 - accuracy: 0.7170 - val_loss: 422.2856

In [23]:
# cnn input is a 2D array of semi hourly consumption by week
X0 = [meter.iloc[:,5:].to_numpy() for meter in meters]
y0 = [{0: [0,0], 1: [0,1], 2:[1,0], 3:[1,1]}[meter.iloc[0,0]] for meter in meters]

X0_test, X0_train = bisect_ratio(X0, 3) # (bhwc(X) for X in )
X0_test = bhwc(X0_test)
X0_train = bhwc(X0_train)
y0_test, y0_train = (np.array(y) for y in bisect_ratio(y0, 3))

# cnn structure is (conv -> pool) x3 -> flatten -> dense -> dropout -> dense -> softmax
cnn = Sequential()
cnn.add(Conv2D(32, (5,5), input_shape=X0_train.shape[1:], activation='relu'))
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Conv2D(32,(3,3),activation='relu'))
#cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.125))
cnn.add(Dense(32, activation='relu'))
cnn.add(Dense(2, activation='softmax'))

cnn.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=2**-8), metrics=['accuracy'])
cnn.fit(X0_train, y0_train, epochs=52)
print(cnn.evaluate(X0_test, y0_test))

Epoch 1/52
Epoch 2/52
Epoch 3/52
Epoch 4/52
Epoch 5/52
Epoch 6/52
Epoch 7/52


KeyboardInterrupt: ignored

In [4]:
# fnn input is a 1D array of consumption over the year
X1 = [x.flatten() for x in X0]
y1 = [meter[1].to_numpy() for meter in meters]

# rnn input is a time series between consumption minima and maxima
X2 = [np.diff(find_peaks(np.mean(x, axis=1))[0]) for x in X0]
y2 = [y != 0 for y in y0]

In [10]:
print(2**-10)

0.0009765625
