<a href="https://colab.research.google.com/github/RhysWangJunfei/nilm/blob/master/AMPDs/training/nilm_rnn_cwe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import tensorflow as tf
import numpy as np
import io
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split 
from google.colab import files
import matplotlib.pyplot as plt

In [0]:
'''Sliding window function'''
def create_dataset(dataset, look_back=1):
    dataX = []
    for i in range(len(dataset)-look_back+1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
    return np.array(dataX)

In [0]:
WHE_data = pd.read_csv('Electricity_WHE.csv')['P']
CWE_data = pd.read_csv('Electricity_CWE.csv')['P']


In [0]:
window_size=60

dataX_raw = create_dataset(WHE_data.as_matrix(), window_size)

cwe_Y_raw = CWE_data[window_size-1:].values.reshape([CWE_data.shape[0]-window_size+1,1])

dataX = np.concatenate([dataX_raw[0:472500,:],dataX_raw[475500:,:]],axis=0)
cwe_Y = np.concatenate([cwe_Y_raw[0:472500,:],cwe_Y_raw[475500:,:]],axis=0)
categorized_cwe_Y = np.ones(cwe_Y.shape)*2
categorized_cwe_Y[[np.where(cwe_Y==0)[0]],:]=0
categorized_cwe_Y[[np.where((cwe_Y>0)&(cwe_Y<=300))[0]],:]=1
#categorized_cwe_Y[[np.where((cwe_Y>300))[0]],:]=2

encoder = OneHotEncoder(handle_unknown='ignore',sparse=False)
cweY_1hot = encoder.fit_transform(categorized_cwe_Y)

X_train, X_test, y_train, y_test = train_test_split(dataX, cweY_1hot, test_size=0.01, shuffle=True)
scaler = MinMaxScaler()
scaler.fit(X_train.astype(float))
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
max_indice = np.argmax(cweY_1hot,1)
df = pd.Series(max_indice)
df.value_counts()

0    1018321
1      26327
2       3493
dtype: int64

In [0]:
# Hyper Parameters
LR = 0.001      # learning rate
batch_size=128
window_size=60
num_units = [256, 128]

'''RNN Model Definition'''
tf.reset_default_graph()
''''''
#define inputs
tf_x = tf.placeholder(tf.float32, [None, window_size,1],name='x')
tf_y = tf.placeholder(tf.int32, [None, 3],name='y')


cells = [tf.keras.layers.LSTMCell(units=n) for n in num_units]
stacked_rnn_cell = tf.keras.layers.StackedRNNCells(cells)
outputs, (h_c, h_n) = tf.nn.dynamic_rnn(
        stacked_rnn_cell,                   # cell you have chosen
        tf_x,                      # input
        initial_state=None,         # the initial hidden state
        dtype=tf.float32,           # must given if set initial_state = None
        time_major=False,           # False: (batch, time step, input); True: (time step, batch, input)
)
l1 = tf.layers.dense(outputs[:, -1, :],64,activation=tf.nn.relu,name='l1')
l2 = tf.layers.dense(l1,32,activation=tf.nn.relu,name='l2')
l3 = tf.layers.dense(l2,16,activation=tf.nn.relu,name='l3')
l4 = tf.layers.dense(l3,8,activation=tf.nn.relu,name='l4')
#l5 = tf.layers.dense(l4,16,activation=tf.nn.relu,name='l5')
#l6 = tf.layers.dense(l5,8,activation=tf.nn.relu,name='l6')
pred = tf.layers.dense(l4,3,activation=tf.nn.relu,name='pred')

with tf.name_scope('loss'):
    cross_entropy =  tf.nn.softmax_cross_entropy_with_logits_v2(labels=tf_y, logits=pred) 
    loss = tf.reduce_mean(cross_entropy)
    tf.summary.scalar("loss",tensor=loss)

train_op = tf.train.AdamOptimizer(LR).minimize(loss)

accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(tf_y, axis=1), tf.argmax(pred, axis=1)), tf.float32))

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 
saver = tf.train.Saver()

In [0]:
import pickle
file_name='tomekLinked_data'
fileObject = open(file_name,'rb')
X_train = pickle.load(fileObject)
y_train = pickle.load(fileObject)
fileObject.close()

In [0]:
from imblearn.under_sampling import RandomUnderSampler
encoder = OneHotEncoder(handle_unknown='ignore',sparse=False)
tl = RandomUnderSampler(sampling_strategy={0:3000,1:3000,2:3000})

In [0]:
sess = tf.Session()
#sess.run(init_op)
saver.restore(sess, 'my_net_cwe_central/save_cwe_rnn.ckpt')
for i in range(0,1000):
  X, y = tl.fit_resample(X_train, y_train)
  #y = encoder.fit_transform(y.reshape([-1,1]))
  whole_train = np.concatenate([X,y],axis=1)
  batch_index = np.random.choice(9000,batch_size)
  batch_train = whole_train[batch_index,:]
  batch_X = batch_train[:,:-3].reshape([batch_size,window_size,1])
  batch_y = batch_train[:,-3:]
  print('##·##Loop:'+str(i))
  sess.run(train_op,{tf_x:batch_X , tf_y:batch_y})
  cost_ = sess.run(loss,{tf_x:batch_X, tf_y:batch_y})
  print('train loss= %.16f' % cost_)
  if(i%99==0):
    acc_train = sess.run(accuracy,{tf_x:batch_X, tf_y:batch_y})
    acc_test = sess.run(accuracy,feed_dict={tf_x: X_test.reshape([X_test.shape[0],window_size,1]), \
                                            tf_y:y_test})
    print('train loss= %.16f' % cost_+', Acc=%.4f'% acc_train)
    print('Test Acc=%.2f'% acc_test)
    pre = sess.run(pred,feed_dict={tf_x: batch_X, tf_y:batch_y})
    y_lables_argmax = tf.argmax(tf_y,axis=1)  
    y_pred_argmax = tf.argmax(pre,axis=1)
    confusion = tf.confusion_matrix(labels=y_lables_argmax, predictions=y_pred_argmax, num_classes=3)
    #print('Confusion Matrix: \n\n', tf.Tensor.eval(confusion,feed_dict=None))
    print(confusion.eval(session=sess,feed_dict={tf_x: batch_X, tf_y:batch_y}))
    save_path = saver.save(sess, "my_net_cwe_central/save_cwe_rnn.ckpt")
      
#pre = sess.run(pred,feed_dict={tf_x: X_test.reshape([X_test.shape[0],window_size,1]), tf_y: y_test})
#y_lables_argmax = np.argmax(y_test,1)
#y_pred_argmax = np.argmax(pre,1)
#confusion = tf.confusion_matrix(labels=y_lables_argmax, predictions=y_pred_argmax, num_classes=2)
#print('Confusion Matrix: \n\n', tf.Tensor.eval(confusion,feed_dict=None))
#print(confusion.eval(session=sess))
sess.close()

INFO:tensorflow:Restoring parameters from my_net_cwe_central/save_cwe_rnn.ckpt
##·##Loop:0
train loss= 0.2006096243858337
train loss= 0.2006096243858337, Acc=0.9141
Test Acc=0.85
[[37  4  0]
 [ 4 39  2]
 [ 0  1 41]]
##·##Loop:1
train loss= 0.2464631944894791
##·##Loop:2
train loss= 0.2261790484189987
##·##Loop:3
train loss= 0.2423966079950333
##·##Loop:4
train loss= 0.2536152601242065
##·##Loop:5
train loss= 0.1777364313602448
##·##Loop:6
train loss= 0.3237663507461548
##·##Loop:7
train loss= 0.2081740349531174
##·##Loop:8
train loss= 0.2593451440334320
##·##Loop:9
train loss= 0.1776133924722672
##·##Loop:10
train loss= 0.2884507179260254
##·##Loop:11
train loss= 0.1359289437532425
##·##Loop:12
train loss= 0.1429767012596130
##·##Loop:13
train loss= 0.2053130567073822
##·##Loop:14
train loss= 0.2708986401557922
##·##Loop:15
train loss= 0.2020643502473831
##·##Loop:16
train loss= 0.2330212891101837
##·##Loop:17
train loss= 0.2462792694568634
##·##Loop:18
train loss= 0.1781722456216812
#

In [0]:

!mkdir my_net_cwe


In [0]:
rm -r my_net_cwe15

In [0]:
mv checkpoint save_cwe_rnn.ckpt.data-00000-of-00001 save_cwe_rnn.ckpt.index save_cwe_rnn.ckpt.meta my_net_cwe

In [0]:
rm tomekLinked_data

In [0]:
import pickle
file_name='scaler_rnn_cwe'
fileObject = open(file_name,'wb')
pickle.dump(scaler,fileObject)
fileObject.close()

In [0]:
#Generate Predictions for the raw data by batch
batch_size = 64
batch_num = len(dataX)//batch_size
nn_dataX = None
print(batch_num)
#restore rnn session
sess = tf.Session()
saver.restore(sess, 'my_net_cwe_central/save_cwe_rnn.ckpt')
for i in range(0,batch_num+1):
  print(i)
  if(i!=batch_num):
      batch_X = dataX[i*batch_size:(i+1)*batch_size,]
      batch_X = scaler.transform(batch_X).reshape([-1,window_size,1])
      batch_pred = sess.run(pred,feed_dict={tf_x: batch_X})
      if nn_dataX is None:
        nn_dataX = batch_pred
      else:
        nn_dataX = np.vstack([nn_dataX,batch_pred])
      
  else: 
      batch_X = dataX[i*batch_size:]
      batch_X = scaler.transform(batch_X).reshape([-1,window_size,1])
      batch_pred = sess.run(pred,feed_dict={tf_x: batch_X})
      nn_dataX = np.vstack([nn_dataX,batch_pred])