In [13]:
import pandas  as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
np.random.seed(30)
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from collections import Counter
import keras
from keras.layers import Conv2D,Dense,Flatten
import keras.backend as K
from keras.callbacks import ModelCheckpoint
from keras.layers import Input
from keras.models import Model
from sklearn.model_selection import KFold
import time


In [2]:
days = pd.HDFStore('../Combined Datasets/daily.h5','r')
hours = pd.HDFStore('../Combined Datasets/hourly.h5','r')

In [3]:
train_prop = .65
v_prop = .2
test_prop = .15
keys = days.keys()

const = 1000.0

In [4]:
def return_data(keys,n_months):
    

    appliance1 = 'use'
    appliance2 = 'refrigerator1'
    
    

    
    X = np.array([days[i][appliance1].values for i in keys if (appliance1 in days[i].columns and appliance2 in days[i].columns )])[:,:28*n_months]/const
    y = np.array([hours[i][appliance2].values for i in keys if (appliance1 in hours[i].columns and appliance2 in hours[i].columns )])[:,:(28*24*n_months)]/const
    
    X = X.reshape((len(X)*n_months,4,7))
    X = np.expand_dims(X,axis=3)    
    
    
    
    y = y.reshape((len(y)*n_months,28*24))
    
    
    
    return X,y


def mae(truth,pred):
    return np.mean(const*np.abs((truth-pred)))

    
    
    

In [5]:
def network():
    
    input_layer = Input(shape=(4,7,1),name="Monthly_Matrix")
    
    op1 = Conv2D(10,kernel_size=(4,1), strides=(1,1),name="Filter_4X1")(input_layer)
    
    op1 = Flatten()(op1)
    
    op2 = Conv2D(10,kernel_size=(1,7), strides=(1,1),name="Filter_1X7")(input_layer)
    
    op2 = Flatten()(op2)
    
    op3 = Conv2D(10,kernel_size=(1,2),strides=(1,1),name="Filter_1X2")(input_layer)
    
    op3 = Flatten()(op3)
    
    op4 = Conv2D(10,kernel_size=(2,1),strides=(1,1),name="Filter_2X1")(input_layer)
    
    op4 = Flatten()(op4)
    
    merge_layer = keras.layers.concatenate([op1, op2, op3,op4],name="Concatenation_1")
    
    merge_layer =  keras.layers.Dropout(.05)(merge_layer)

    
    final_layer = Dense(168*4,name="Output_Dense")(merge_layer)
    
    
    final_layer = keras.layers.LeakyReLU(alpha=0.01)(final_layer)
    
    
    
    
    model = Model(inputs=[input_layer], outputs=final_layer)

    
    return model
    
    

In [6]:
def empty_dir(dir_name):
    for i in os.listdir(dir_name):
        os.remove(dir_name+"/"+i)

In [15]:
def fit_model(train_x,train_y,test_x,test_y,v_x,v_y):
    
    model = network()
    n_epochs = 3000
    sgd = keras.optimizers.SGD(lr=0.01)
    rmsprop  = keras.optimizers.RMSprop(lr=.001)
    adagrad = keras.optimizers.adagrad(lr=.001)
    model.compile(optimizer=sgd, loss='mean_squared_error')
    
    dir_name = 'temp'
    empty_dir(dir_name)
    
    print("Started Training!")
    checkpoint = ModelCheckpoint(dir_name+'/single-model-{epoch:03d}.h5', verbose=0, monitor='val_loss',save_best_only=True, mode='auto')  
    model.fit(train_x, train_y, epochs=n_epochs, validation_data=[v_x,v_y], callbacks=[checkpoint], verbose=0)

    all_files = os.listdir(dir_name)
    all_files.sort()
    weights_file = all_files[-1]
    
    model.load_weights(dir_name+"/"+weights_file)
    
    pred_train = model.predict(train_x)
    pred_v = model.predict(v_x)
    pred_test = model.predict(test_x)

    train_cost = mae(train_y,pred_train)
    v_cost = mae(v_y,pred_v)
    test_cost = mae(test_y,pred_test)
    

    return train_cost,v_cost,test_cost
    
    
    
    
    
    
    

In [16]:
n_experiments = 10
results_arr = [] 
for exp in range(n_experiments):
    
    keys = days.keys()

    np.random.seed(exp)
    
    np.random.shuffle(keys)
    
    a = time.time()
    train_homes = keys[:int(len(keys)*train_prop)]
    test_homes = keys[int(len(keys)*train_prop):int(-len(keys)*v_prop)]
    val_homes = keys[int(-len(keys)*v_prop):]
    
    train_x,train_y = return_data(train_homes,3)
    
    test_x,test_y = return_data(test_homes,3)
    
    v_x,v_y  = return_data(val_homes,3)
    
    
    results = fit_model(train_x,train_y,test_x,test_y,v_x,v_y)
    
    print (results)
    print("Elapsed {} Seconds".format(time.time()-a))
    
    results_arr.append(results)
    
    
    
    
    
    
    
    
    
    
    
    



Started Training!
(98.683716, 100.99344, 110.751274)
Elapsed 254.67761302 Seconds
Started Training!
(97.23483, 104.35092, 93.54968)
Elapsed 314.572471857 Seconds
Started Training!
(94.77948, 113.368385, 91.344315)
Elapsed 323.592671156 Seconds
Started Training!
(95.84046, 110.97157, 89.86833)
Elapsed 261.357469797 Seconds
Started Training!
(97.904854, 86.81688, 115.514946)
Elapsed 327.180912018 Seconds
Started Training!
(103.44756, 98.77467, 99.815384)
Elapsed 277.946527004 Seconds
Started Training!
(95.2123, 125.08311, 90.14418)
Elapsed 320.281327963 Seconds
Started Training!
(93.670105, 102.797935, 84.2972)
Elapsed 243.846777916 Seconds
Started Training!
(97.39621, 89.889854, 93.15521)
Elapsed 281.413692951 Seconds
Started Training!
(96.87955, 79.1717, 87.18652)
Elapsed 268.869931936 Seconds


In [17]:
results_arr = np.array(results_arr)

In [18]:
np.mean(results_arr,axis=0)

array([ 97.1049 , 101.22185,  95.5627 ], dtype=float32)