In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

## Import the Datasets and Scale Them

1. Put all dataset names from the datasets folder here
2. change how many timesteps are desired
3. choose the percentage of data that becomes test data

In [None]:
datasets = [
    '28mar18roof.csv',
    'finaltest_battery34_fieldtest_trail2.csv',
    'finaltest_battery34_fieldtest_trial2.csv',
    'finaltest_battery34_fieldtest_trial3.csv',
    'finaltest_battery34_fieldtest_trial4.csv',
    'finaltest_battery34_fieldtest_trial5.csv',
    'finaltest_battery34_rooftop1.csv',
    'finaltest_battery34_rooftop2.csv',
    'finaltest_battery34_rooftop3.csv',
    'finaltesting_battery34_fieldtest.csv',
    'test_25JAN_rooftop.csv']
datasets = ['datasets/'+ name for name in datasets]
timesteps = 5 * 10   
test_data_percentage = 0.1   # 10% of the data set 

In [None]:
from data_process import prepare
x_train, y_train, x_test, y_test, scaler = prepare(
                                    datasets,
                                    test_data_percentage,
                                    timesteps
                                    )

## Building the RNN

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation

- Build the Structure of the RNN and compile it

In [None]:
model = Sequential()

# add input layer with some dropout regularisation
model.add(LSTM(units=50, 
              return_sequences=True, 
              input_shape=(timesteps,6)))
model.add(Dropout(0.2))

# add 3 hidden layers
model.add(LSTM(units=50, 
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, 
               return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, 
               return_sequences=False))
model.add(Dropout(0.2))

# add output layer
model.add(Dense(units=1))
#model.add(Activation('sigmoid'))

model.compile(optimizer='rmsprop', loss='mean_squared_error')

## Training the Model

In [None]:
size = x_train.shape[0]
print(size, 'data points')
model.fit(x_train, y_train, epochs = 100, batch_size=150)

- Write the Model to Disk

In [None]:
from keras.models import model_from_json
model_json = model.to_json()
with open("battery_model.json","w") as json_file:
    json_file.write(model_json)
model.save_weights("battery_model.h5")

- Read Model from Disk

In [None]:
json_file = open('battery_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights('battery_model.h5')
model.compile(optimizer='rmsprop', loss='mean_squared_error')

## Testing the Model

- Test for accuracy accros all test data

In [None]:
#TODO: scaler.inverse_transform the output

score = model.evaluate(x_test, y_test, verbose=1)
#print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))
print('\t'+ str(model.metrics_names))
print('\t'+str(score))

- Test for accuracy in each 10th percentile of completeness

## HyperOptimization

In [None]:
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform, conditional

In [None]:
def model(x_train, y_train, x_test, y_test):
    """
    Function to optimize model:
    
    :returns: dict() with two elements
        - loss: numeric evaluation metric to minimize
        - status: STATUS_OK unless desire otherwise
        - model: keras model to use again
    """
    