<a href="https://colab.research.google.com/github/RhysWangJunfei/nilm/blob/master/nilm_rnn_cde.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import tensorflow as tf
import numpy as np
import io
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split 
from google.colab import files
import matplotlib.pyplot as plt

In [0]:
'''Sliding window function'''
def create_dataset(dataset, look_back=1):
    dataX = []
    for i in range(len(dataset)-look_back+1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
    return np.array(dataX)

In [0]:
uploaded = files.upload()
'''Load data'''
WHE_data = pd.read_csv(io.BytesIO(uploaded['Electricity_WHE.csv']))['P']
#WHE_data = pd.read_csv('Electricity_WHE.csv')['P']

Saving Electricity_WHE.csv to Electricity_WHE.csv


In [0]:
uploaded = files.upload()
CDE_data = pd.read_csv(io.BytesIO(uploaded['Electricity_CDE.csv']))['P']
#CDE_data = pd.read_csv('Electricity_CDE.csv')['P']

Saving Electricity_CDE.csv to Electricity_CDE.csv


In [0]:
window_size=60

dataX_raw = create_dataset(WHE_data.as_matrix(), window_size)

#0,1-350,4401-5300,others

cde_Y_raw = CDE_data[window_size-1:].values.reshape([CDE_data.shape[0]-window_size+1,1])
dataX = np.concatenate([dataX_raw[0:472500,:],dataX_raw[475500:,:]],axis=0)
cde_Y = np.concatenate([cde_Y_raw[0:472500,:],cde_Y_raw[475500:,:]],axis=0)
categorized_cde_Y = np.ones(cde_Y.shape)*3
categorized_cde_Y[[np.where(cde_Y==0)[0]],:]=0
categorized_cde_Y[[np.where((cde_Y>0)&(cde_Y<=350))[0]],:]=1
categorized_cde_Y[[np.where((cde_Y>4400)&(cde_Y<=5300))[0]],:]=2

#cdeY=categorized_cde_Y[categorized_cde_Y>0].reshape(-1,1)
#cdeX=dataX[[np.where(categorized_cde_Y>0)[0]],:][0]


encoder = OneHotEncoder(handle_unknown='ignore',sparse=False)
cdeY_1hot = encoder.fit_transform(categorized_cde_Y)

X_train, X_test, y_train, y_test = train_test_split(dataX, cdeY_1hot, test_size=0.1, shuffle=True)
scaler = MinMaxScaler()
scaler.fit(X_train.astype(float))
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [0]:
from imblearn.under_sampling import RandomUnderSampler
tl = RandomUnderSampler(sampling_strategy={0:9000,1:9000,2:9000,3:0})
X_train, y_train = tl.fit_resample(X_train, y_train)
whole_train = np.concatenate([X_train,y_train],axis=1)
#from imblearn.under_sampling import EditedNearestNeighbours
#enn = EditedNearestNeighbours()
#X_train, y_train = enn.fit_resample(X_train, y_train)

#from imblearn.under_sampling import NeighbourhoodCleaningRule
#ncr = NeighbourhoodCleaningRule()
#X_train, y_train = ncr.fit_resample(X_train, y_train)

In [0]:
max_indice = np.argmax(cdeY_1hot,1)
df = pd.Series(max_indice)
df.value_counts()

0    1026791
2      11216
1      10095
3         39
dtype: int64

In [0]:
y_train.shape

(27000, 4)

In [0]:
'''Hyper parameters for deep learning'''
# Hyper Parameters
LR = 0.001               # learning rate
#cfg_list = nf.model_configs()
#error_list = []

#hyperparameters
batch_size=512
unit_num=128

In [0]:
'''RNN Model Definition'''
tf.reset_default_graph()
''''''
#define inputs
tf_x = tf.placeholder(tf.float32, [None, window_size,1],name='x')
tf_y = tf.placeholder(tf.int32, [None, 4],name='y')

lstm_cell =tf.contrib.rnn.BasicLSTMCell(num_units=unit_num,name='lstm_cell')
outputs, (h_c, h_n) = tf.nn.dynamic_rnn(
    lstm_cell,                   # cell you have chosen
    tf_x,                      # input
    initial_state=None,         # the initial hidden state
    dtype=tf.float32,           # must given if set initial_state = None
    time_major=False,           # False: (batch, time step, input); True: (time step, batch, input)
)
l1 = tf.layers.dense(outputs[:, -1, :],512,activation=tf.nn.leaky_relu,name='l1')
l2 = tf.layers.dense(l1,1024,activation=tf.nn.leaky_relu,name='l2')
l3 = tf.layers.dense(l2,512,activation=tf.nn.leaky_relu,name='l3')
l4 = tf.layers.dense(l3,256,activation=tf.nn.leaky_relu,name='l4')
l5 = tf.layers.dense(l4,128,activation=tf.nn.leaky_relu,name='l5')
l6 = tf.layers.dense(l5,84,activation=tf.nn.leaky_relu,name='l6')
l7 = tf.layers.dense(l6,64,activation=tf.nn.leaky_relu,name='l7')
l8 = tf.layers.dense(l7,48,activation=tf.nn.leaky_relu,name='l8')
l9 = tf.layers.dense(l8,32,activation=tf.nn.leaky_relu,name='l9')
l10 = tf.layers.dense(l9,24,activation=tf.nn.leaky_relu,name='l10')
l11 = tf.layers.dense(l10,16,activation=tf.nn.leaky_relu,name='l11')
l12 = tf.layers.dense(l11,8,activation=tf.nn.leaky_relu,name='l12')
pred = tf.layers.dense(l12,4,activation=tf.nn.relu,name='pred')

with tf.name_scope('loss'):
    cross_entropy =  tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_y, logits=pred) 
    loss = tf.reduce_mean(cross_entropy)
    tf.summary.scalar("loss",tensor=loss)

train_op = tf.train.AdamOptimizer(LR).minimize(loss)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf_y, axis=1), tf.argmax(pred, axis=1)), tf.float32))

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 
saver = tf.train.Saver()


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.dense instead.


In [0]:
sess = tf.Session()
#sess.run(init_op)
saver.restore(sess, 'my_net/save_net_rnn.ckpt')

flag = False
for j in range(0,300):
    print('###iteration: '+str(j)+'###')
    batch_index = np.random.choice(27000,batch_size)
    batch_train = whole_train[batch_index,:]
    batch_X = batch_train[:,:-4].reshape([batch_size,window_size,1])
    batch_y = batch_train[:,-4:]
    sess.run(train_op,{tf_x:batch_X , tf_y:batch_y})
    cost_ = sess.run(loss,{tf_x:batch_X, tf_y:batch_y})
    acc_train = sess.run(accuracy,{tf_x:batch_X, tf_y:batch_y})
    acc_test = sess.run(accuracy,feed_dict={tf_x: X_test.reshape([X_test.shape[0],window_size,1]), tf_y:y_test})
    print('train loss= %.4f' % cost_+', Acc=%.2f'% acc_train)
    print('Test Acc=%.2f'% acc_test)
    
    pre = sess.run(pred,feed_dict={tf_x: batch_X, tf_y:batch_y})
    y_lables_argmax = tf.argmax(tf_y,axis=1)  
    y_pred_argmax = tf.argmax(pre,axis=1)
    confusion = tf.confusion_matrix(labels=y_lables_argmax, predictions=y_pred_argmax, num_classes=4)
    #print('Confusion Matrix: \n\n', tf.Tensor.eval(confusion,feed_dict=None))
    print(confusion.eval(session=sess,feed_dict={tf_x: batch_X, tf_y:batch_y}))
    if acc_train>=0.99:
      print(j)
      flag = True
      break
    if(flag==True):
        print(flag)
        break
    if(j//100==0):
      save_path = saver.save(sess, "my_net/save_net_rnn.ckpt")
pre = sess.run(pred,feed_dict={tf_x: X_test.reshape([X_test.shape[0],window_size,1]), tf_y: y_test})
y_lables_argmax = np.argmax(y_test,1)
y_pred_argmax = np.argmax(pre,1)
confusion = tf.confusion_matrix(labels=y_lables_argmax, predictions=y_pred_argmax, num_classes=4)
#print('Confusion Matrix: \n\n', tf.Tensor.eval(confusion,feed_dict=None))
print(confusion.eval(session=sess))
sess.close()


INFO:tensorflow:Restoring parameters from my_net/save_net_rnn.ckpt
###iteration: 0###
train loss= 0.2972, Acc=0.87
Test Acc=0.92
[[141  11   1   0]
 [ 51 135   3   0]
 [  0   0 170   0]
 [  0   0   0   0]]
###iteration: 1###
train loss= 0.2911, Acc=0.88
Test Acc=0.92
[[151  13   1   0]
 [ 46 122   4   0]
 [  0   0 175   0]
 [  0   0   0   0]]
###iteration: 2###
train loss= 0.2668, Acc=0.89
Test Acc=0.94
[[146   9   1   0]
 [ 43 137   0   0]
 [  0   1 175   0]
 [  0   0   0   0]]
###iteration: 3###
train loss= 0.2909, Acc=0.89
Test Acc=0.95
[[164   5   1   0]
 [ 50 120   1   0]
 [  0   0 171   0]
 [  0   0   0   0]]
###iteration: 4###
train loss= 0.2848, Acc=0.88
Test Acc=0.94
[[164   7   1   0]
 [ 52 112   2   0]
 [  0   1 173   0]
 [  0   0   0   0]]
###iteration: 5###
train loss= 0.2977, Acc=0.87
Test Acc=0.94
[[145   9   0   0]
 [ 55 140   0   0]
 [  0   1 162   0]
 [  0   0   0   0]]
###iteration: 6###
train loss= 0.2864, Acc=0.89
Test Acc=0.93
[[178   8   0   0]
 [ 46 111   2   0]

In [0]:
sess.close()

In [0]:
#Generate Predictions for the raw data by batch
batch_size = 64
batch_num = len(dataX)//batch_size
nn_dataX = None
print(batch_num)
#restore rnn session
sess = tf.Session()
saver.restore(sess, 'my_net/save_net_rnn.ckpt')
for i in range(0,batch_num+1):
  print(i)
  if(i!=batch_num):
      batch_X = dataX[i*batch_size:(i+1)*batch_size,]
      batch_X = scaler.transform(batch_X).reshape([-1,window_size,1])
      batch_pred = sess.run(pred,feed_dict={tf_x: batch_X})
      if nn_dataX is None:
        nn_dataX = batch_pred
      else:
        nn_dataX = np.vstack([nn_dataX,batch_pred])
      
  else: 
      batch_X = dataX[i*batch_size:]
      batch_X = scaler.transform(batch_X).reshape([-1,window_size,1])
      batch_pred = sess.run(pred,feed_dict={tf_x: batch_X})
      nn_dataX = np.vstack([nn_dataX,batch_pred])

In [43]:
y_lables_argmax = np.argmax(cdeY_1hot,1)
y_pred_argmax = np.argmax(nn_dataX,1)
confusion = tf.confusion_matrix(labels=y_lables_argmax, predictions=y_pred_argmax, num_classes=4)
#print('Confusion Matrix: \n\n', tf.Tensor.eval(confusion,feed_dict=None))
print(confusion.eval(session=sess))

[[975728  46305   4758      0]
 [  2957   7040     98      0]
 [     0      0  11216      0]
 [     2      6     31      0]]


In [0]:
result_df = pd.DataFrame(data=nn_dataX)
result_df.to_csv('nn_data.csv',index=False)

In [40]:
df = pd.Series(y_pred_argmax)
df.value_counts()

0    978687
1     53351
2     16103
dtype: int64

In [46]:
whe_datetime = pd.read_csv('Electricity_WHE.csv')['unix_ts']
whe_datetime = pd.to_datetime(whe_datetime,unit='s')
whe_datetime['datetime'] = whe_datetime[window_size-1:]
whe_datetime['month'] = whe_dt['datetime'].dt.month
whe_datetime['hour'] = WHE_data['datetime'].dt.hour
whe_datetime['weekday'] = WHE_data['datetime'].dt.dayofweek

ValueError: ignored

In [0]:
#define the Feedforward Model
'''Feedforward Model Definition'''
tf.reset_default_graph()
''''''
#define inputs
tf_x = tf.placeholder(tf.float32, [None, window_size,47],name='ff_x')
tf_y = tf.placeholder(tf.int32, [None, 4],name='ff_y')

l1 = tf.layers.dense(tf_x,64,activation=tf.nn.leaky_relu,name='ff_l1')
l2 = tf.layers.dense(l1,128,activation=tf.nn.leaky_relu,name='ff_l2')
l3 = tf.layers.dense(l2,256,activation=tf.nn.leaky_relu,name='ff_l3')
l4 = tf.layers.dense(l3,128,activation=tf.nn.leaky_relu,name='ff_l4')
l5 = tf.layers.dense(l4,64,activation=tf.nn.leaky_relu,name='ff_l5')
l6 = tf.layers.dense(l5,32,activation=tf.nn.leaky_relu,name='ff_l6')
l7 = tf.layers.dense(l6,16,activation=tf.nn.leaky_relu,name='ff_l7')
l8 = tf.layers.dense(l7,8,activation=tf.nn.leaky_relu,name='ff_l8')
pred = tf.layers.dense(l8,4,activation=tf.nn.relu,name='ff_pred')

with tf.name_scope('loss'):
    cross_entropy =  tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_y, logits=pred) 
    loss = tf.reduce_mean(cross_entropy)
    tf.summary.scalar("loss",tensor=loss)

train_op = tf.train.AdamOptimizer(LR).minimize(loss)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf_y, axis=1), tf.argmax(pred, axis=1)), tf.float32))

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 
saver = tf.train.Saver()


In [0]:
#!mkdir my_net
mv checkpoint save_net_rnn.ckpt.data-00000-of-00001 save_net_rnn.ckpt.index save_net_rnn.ckpt.meta my_net