In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from P6_code.FinishedCode.importData import ImportEV
from P6_code.FinishedCode.dataTransformation import createUsers
from P6_code.FinishedCode.functions import split_sequences
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from itertools import chain

In [2]:
start, end = "2018-06-01", "2018-11-09"
df_train = ImportEV().getCaltech(start_date=start, end_date=end, removeUsers=True, userSampleLimit=25)

In [3]:
users = createUsers(df_train, start, end)
userID = users.data.userID.unique()
user_df = []

for user in userID:
    user_df.append(users.getUserData(user=user))

print(user_df[2])

      chargingTime  kWhDelivered  Year Month Day  Weekday Hour
0              0.0           0.0  2018    06  01        4   00
1              0.0           0.0  2018    06  01        4   01
2              0.0           0.0  2018    06  01        4   02
3              0.0           0.0  2018    06  01        4   03
4              0.0           0.0  2018    06  01        4   04
...            ...           ...   ...   ...  ..      ...  ...
3859           0.0           0.0  2018    11  08        3   19
3860           0.0           0.0  2018    11  08        3   20
3861           0.0           0.0  2018    11  08        3   21
3862           0.0           0.0  2018    11  08        3   22
3863           0.0           0.0  2018    11  08        3   23

[3864 rows x 7 columns]


In [4]:
max_kWh = max([user["kWhDelivered"][i] for user in user_df for i in range(len(user))])

In [5]:
max_charging = max([user["chargingTime"][i] for user in user_df for i in range(len(user))])

In [6]:
X, Y = [], []


for user in user_df:
    Y.append(user.chargingTime)
    X.append(user.drop(columns=['chargingTime']))
Y

[0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 3859    0.0
 3860    0.0
 3861    0.0
 3862    0.0
 3863    0.0
 Name: chargingTime, Length: 3864, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 3859    0.0
 3860    0.0
 3861    0.0
 3862    0.0
 3863    0.0
 Name: chargingTime, Length: 3864, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 3859    0.0
 3860    0.0
 3861    0.0
 3862    0.0
 3863    0.0
 Name: chargingTime, Length: 3864, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 3859    0.0
 3860    0.0
 3861    0.0
 3862    0.0
 3863    0.0
 Name: chargingTime, Length: 3864, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 3859    0.0
 3860    0.0
 3861    0.0
 3862    0.0
 3863    0.0
 Name: chargingTime, Length: 3864, dtype: float64,
 0       0.0
 1       0.0
 2  

In [7]:
ss = StandardScaler()

for user in X:
    ss.fit(user)

X_scaled = []

for user in X:
    X_scaled.append(ss.transform(user))


In [8]:
sc = MinMaxScaler(feature_range = (0,1))

for user in Y:
    sc.fit(user.values.reshape(-1, 1))

user_df_scaled = []

for user in Y:
    user_df_scaled.append(sc.transform(user.values.reshape(-1, 1)))

user_df_scaled

[array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0.],
        [0.],
        [0.],
        ...,
        [0.],
        [0.],
        [0.]]),
 array([[0

In [9]:
total_samples = len(X[0])
train_test_cutoff = round(0.2 * total_samples)

In [10]:
X_train, X_val = [], []
Y_train,Y_val = [], []

for user in range(len(user_df_scaled)):
    user_X, user_Y = split_sequences(X_scaled[user], user_df_scaled[user], 10, 6)

    X_train.append(user_X[:-train_test_cutoff])
    X_val.append(user_X[-train_test_cutoff:])

    Y_train.append(user_Y[:-train_test_cutoff])
    Y_val.append(user_Y[-train_test_cutoff:])

In [11]:
model = Sequential()
model.add(LSTM(4, input_shape=(10, 6))) # todo: add inputs steps
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [12]:
compiled_X = np.array([])
for sublist in X_train:
    compiled_X = np.append(compiled_X, sublist)

compiled_Y = np.array([])
for sublist in Y_train:
    compiled_Y = np.append(compiled_Y, sublist)

In [13]:
X_train[0].shape, Y_train[0].shape

((3077, 10, 6), (3077, 6))

In [14]:
for i in range(len(X_train)):
    model.fit(x=X_train[i], y=Y_train[i], epochs=30, batch_size=20, verbose=2)

Epoch 1/30
154/154 - 3s - loss: 0.0629 - 3s/epoch - 17ms/step
Epoch 2/30
154/154 - 0s - loss: 0.0135 - 417ms/epoch - 3ms/step
Epoch 3/30
154/154 - 0s - loss: 0.0080 - 420ms/epoch - 3ms/step
Epoch 4/30
154/154 - 0s - loss: 0.0071 - 399ms/epoch - 3ms/step
Epoch 5/30
154/154 - 1s - loss: 0.0068 - 534ms/epoch - 3ms/step
Epoch 6/30
154/154 - 0s - loss: 0.0067 - 436ms/epoch - 3ms/step
Epoch 7/30
154/154 - 0s - loss: 0.0066 - 439ms/epoch - 3ms/step
Epoch 8/30
154/154 - 0s - loss: 0.0065 - 384ms/epoch - 2ms/step
Epoch 9/30
154/154 - 0s - loss: 0.0065 - 388ms/epoch - 3ms/step
Epoch 10/30
154/154 - 0s - loss: 0.0065 - 397ms/epoch - 3ms/step
Epoch 11/30
154/154 - 0s - loss: 0.0064 - 435ms/epoch - 3ms/step
Epoch 12/30
154/154 - 0s - loss: 0.0064 - 380ms/epoch - 2ms/step
Epoch 13/30
154/154 - 0s - loss: 0.0064 - 390ms/epoch - 3ms/step
Epoch 14/30
154/154 - 0s - loss: 0.0063 - 386ms/epoch - 3ms/step
Epoch 15/30
154/154 - 0s - loss: 0.0063 - 391ms/epoch - 3ms/step
Epoch 16/30
154/154 - 0s - loss: 0.0

In [15]:
test_start, test_end = "2018-11-09", "2019-01-01"
df_test = ImportEV().getCaltech(start_date=test_start, end_date=test_end, removeUsers=True, userSampleLimit=15)

In [16]:
users = createUsers(df_test, test_start, test_end)
user_id = users.data.userID.unique()
user_id

array(['000000743', '000001092', '000000560', '000001097', '000000858',
       '000000234', '000000559', '000000324', '000000567', '000000068',
       '000000891', '000001083', '000000714', '000001135', '000001095',
       '000001082', '000001124', '000000609', '000000632', '000000222',
       '000000754', '000001161', '000001093', '000000777', '000001126',
       '000000838', '000000569', '000000945', '000001102', '000001104',
       '000000566', '000000637', '000000556', '000001001', '000000562',
       '000000558', '000001366'], dtype=object)

In [17]:
user_df_test = []

for user in user_id:
    user_df_test.append(users.getUserData(user=user))

In [18]:
X_test, Y_test = [], []

for user in user_df_test:
    Y_test.append(user.chargingTime)
    X_test.append(user.drop(columns=['chargingTime']))

In [19]:
X_test_scaled = []

for user in X_test:
    X_test_scaled.append(ss.transform(user))

In [26]:
users_test_X, users_test_Y = [], []

for user in range(len(X_test_scaled)):
    user_test_X, user_test_Y = split_sequences(X_test_scaled[user], np.array(Y_test[user]).reshape(-1, 1), 10, 6)
    users_test_X.append(user_test_X)
    users_test_Y.append(user_test_Y)

[array([[   0., 6289.,    0.,    0.,    0.,    0.],
        [6289.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.],
        ...,
        [   0.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0.,

In [27]:
test_predict, test_Y = [], []

for i in range(len(users_test_X)):
    test_predict.append(sc.inverse_transform(model.predict(users_test_X[i]).reshape(-1, 1)))

In [33]:
Y_test

[0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 1267    0.0
 1268    0.0
 1269    0.0
 1270    0.0
 1271    0.0
 Name: chargingTime, Length: 1272, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 1267    0.0
 1268    0.0
 1269    0.0
 1270    0.0
 1271    0.0
 Name: chargingTime, Length: 1272, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 1267    0.0
 1268    0.0
 1269    0.0
 1270    0.0
 1271    0.0
 Name: chargingTime, Length: 1272, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 1267    0.0
 1268    0.0
 1269    0.0
 1270    0.0
 1271    0.0
 Name: chargingTime, Length: 1272, dtype: float64,
 0       0.0
 1       0.0
 2       0.0
 3       0.0
 4       0.0
        ... 
 1267    0.0
 1268    0.0
 1269    0.0
 1270    0.0
 1271    0.0
 Name: chargingTime, Length: 1272, dtype: float64,
 0       0.0
 1       0.0
 2  

In [61]:
mean_squared_error(test_predict[0], users_test_Y[0][:,0])

1221712.7209544787

In [64]:
mean_absolute_percentage_error(test_predict[0], users_test_Y[0][:,0])

2.252805580357705

In [65]:
mean_absolute_error(test_predict[0], users_test_Y[0][:,0])

506.30943466265745

In [70]:
model.evaluate(users_test_X[0], users_test_Y[0], verbose=2)

40/40 - 0s - loss: 1027532.6250 - 85ms/epoch - 2ms/step


1027532.625

In [60]:
users_test_Y[0][:,0]

array([   0., 6289.,    0., ...,    0.,    0.,    0.])

In [57]:
users_test_Y

[array([[   0., 6289.,    0.,    0.,    0.,    0.],
        [6289.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.],
        ...,
        [   0.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.],
        [   0.,    0.,    0.,    0.,    0.,    0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        ...,
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.]]),
 array([[0., 0.,