In [26]:
import pandas  as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
np.random.seed(30)
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from collections import Counter
import keras
from keras.layers import Conv2D,Dense,Flatten
import keras.backend as K
from keras.callbacks import ModelCheckpoint
from keras.layers import Input
from keras.models import Model
from sklearn.model_selection import KFold


In [27]:
days = pd.HDFStore('../Combined Datasets/daily.h5','r')
hours = pd.HDFStore('../Combined Datasets/hourly.h5','r')

In [28]:
train_prop = .65
v_prop = .2
test_prop = .15
keys = days.keys()

const = 1000.0

In [29]:
def return_data(keys,n_months):
    

    appliance1 = 'use'
    appliance2 = 'refrigerator1'
    
    

    
    X = np.array([days[i][appliance1].values for i in keys if (appliance1 in days[i].columns and appliance2 in days[i].columns )])[:,:28*n_months]/const
    y = np.array([hours[i][appliance2].values for i in keys if (appliance1 in hours[i].columns and appliance2 in hours[i].columns )])[:,:(28*24*n_months)]/const
    
    X = X.reshape((len(X)*n_months,4,7))
    X = np.expand_dims(X,axis=3)    
    
    
    
    y = y.reshape((len(y)*n_months,28*24))
    
    
    
    return X,y


def mae(truth,pred):
    return np.mean(const*np.abs((truth-pred)))

    
    
    

In [30]:
def network():
    
    input_layer = Input(shape=(4,7,1),name="Monthly_Matrix")
    input_layer_2 = Input(shape=(1,),name="Cooling_Degree_Day_Monthly")
    

    op1 = Conv2D(10,kernel_size=(4,1), strides=(1,1),name="Filter_4X1")(input_layer)
    
    op1 = Flatten()(op1)
    
    
    
    op2 = Conv2D(10,kernel_size=(1,7), strides=(1,1),name="Filter_1X7")(input_layer)
    
    op2 = Flatten()(op2)
    
    
    op3 = Conv2D(10,kernel_size=(1,2),strides=(1,1),name="Filter_1X2")(input_layer)
    
    op3 = Flatten()(op3)
    
    op4 = Conv2D(10,kernel_size=(2,1),strides=(1,1),name="Filter_2X1")(input_layer)
    
    op4 = Flatten()(op4)
    
    #op5 = Conv2D(20,kernel_size=(3,3),strides=(1,1),padding='same')(input_layer)
    
    #op5 = Conv2D(20,3,strides=(1,1),padding='same')(input_layer)
    
    #op5 = Flatten()(op5)
    
    #op5 =  keras.layers.Dropout(.2)(op5)
    
    #op5  = Dense(168*4)(op5)
    
    
    
    
    
    
    
    
    
    merge_layer = keras.layers.concatenate([op1, op2, op3,op4],name="Concatenation_1")
    

    
    merge_layer =  keras.layers.Dropout(.1)(merge_layer)
    
    
    
    merge_layer = keras.layers.concatenate(([merge_layer,input_layer_2]),name="Concatenation_2")
    
    final_layer = Dense(168*4,name="Output_Dense",activation='relu')(merge_layer)
    
    
    #final_layer = keras.layers.LeakyReLU(alpha=0.01)(final_layer)
    
    #predictions = Dense(1, activation='sigmoid')(merge_layer)

    
    
    
    model = Model(inputs=[input_layer,input_layer_2], outputs=final_layer)

    
    return model
    
    

In [31]:
def empty_dir(dir_name):
    for i in os.listdir(dir_name):
        os.remove(dir_name+"/"+i)

In [32]:
def fit_model(train_x,train_y,test_x,test_y,v_x,v_y):
    
    model = network()
    n_epochs = 3000
    sgd = keras.optimizers.SGD(lr=0.01)
    rmsprop  = keras.optimizers.RMSprop(lr=.001)
    adagrad = keras.optimizers.adagrad(lr=.001)
    model.compile(optimizer=sgd, loss='mean_squared_error')
    
    dir_name = 'temp'
    empty_dir(dir_name)
    
    print("Started Training!")
    checkpoint = ModelCheckpoint(dir_name+'/single-model-{epoch:04d}.h5', verbose=0, monitor='val_loss',save_best_only=True, mode='auto')  
    model.fit([train_x,train_temp], train_y, epochs=n_epochs, validation_data=[[v_x,v_temp],v_y], callbacks=[checkpoint], verbose=0)

    all_files = os.listdir(dir_name)
    all_files.sort()
    weights_file = all_files[-1]
    
    
    model.load_weights(dir_name+"/"+weights_file)
    
    
    pred_train = model.predict([train_x,train_temp])
    pred_v = model.predict([v_x,v_temp])
    pred_test = model.predict([test_x,test_temp])
    
    train_cost = mae(train_y,pred_train)
    v_cost = mae(v_y,pred_v)
    test_cost = mae(test_y,pred_test)
    
    return train_cost,v_cost,test_cost
    
    
    
    
    
    
    

In [33]:
n_experiments = 10
results_arr = [] 
for exp in range(n_experiments):
    
    keys = days.keys()

    np.random.seed(exp)
    
    np.random.shuffle(keys)
    
    train_homes = keys[:int(len(keys)*train_prop)]
    test_homes = keys[int(len(keys)*train_prop):int(-len(keys)*v_prop)]
    val_homes = keys[int(-len(keys)*v_prop):]
    
    train_x,train_y = return_data(train_homes,3)
    
    test_x,test_y = return_data(test_homes,3)
    
    v_x,v_y  = return_data(val_homes,3)
    
    
    
    vals = [16,18,21]
    
    
    train_temp = np.array(vals*(len(train_x)/3))
    
    v_temp = np.array(vals*(len(v_x)/3))
    
    test_temp = np.array(vals*(len(test_x)/3))
    
    
    
    
    results = fit_model(train_x,train_y,test_x,test_y,v_x,v_y)
    
    print (results)
    
    results_arr.append(results)
    
    

Started Training!
(66.01578, 64.748795, 61.82509)
Started Training!
(64.823875, 65.957085, 64.765785)
Started Training!
(64.38653, 63.411476, 71.69811)
Started Training!
(65.34517, 73.72097, 54.809002)
Started Training!
(66.917435, 62.248627, 60.391083)
Started Training!
(67.170204, 67.66694, 52.931362)
Started Training!
(69.36005, 56.20119, 57.12679)
Started Training!
(66.4393, 65.79034, 60.072693)
Started Training!
(67.82009, 62.120583, 58.15696)
Started Training!
(67.601585, 63.470036, 57.916035)


In [34]:
results_arr = np.array(results_arr)

In [35]:
np.mean(results_arr,axis=0)

array([66.588  , 64.53361, 59.96929], dtype=float32)