In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

tf.__version__

'1.14.0'

In [2]:
base_dir = '/home/parth/Machine Learning/Datasets/North Corp/DTC'
filePcode = "Top_LMD_DTC_SPN.csv"
file_list = os.listdir(base_dir)
file_list_corrected = []
for file_ in file_list:
    if file_.endswith(".csv") and file_ != filePcode:
        file_list_corrected.append(file_)
file_list = file_list_corrected
del file_list_corrected

In [3]:
index = 0
filename = os.path.join(base_dir , file_list[0])
data = pd.read_csv(filename)
pcode_data = pd.read_csv(os.path.join(base_dir , filePcode))

In [4]:
ids = data.deviceID.unique()
device_data = []
for id_ in ids:
    device_data.append(data[data.deviceID == id_])

In [5]:
for i , device_data_ in enumerate(device_data):
    device_data_ = device_data_.reset_index()
    device_data_["spn_fmi"] = None
    for index , (a, b) in enumerate(zip(device_data_["spn"] , device_data_["fmi"])):
        device_data_.at[ index , "spn_fmi"] = (a,b)
    device_data[i] = device_data_

In [6]:
pcode_data["spn_fmi"] = None
for index , (a, b) in enumerate(zip(pcode_data["SPN"] , pcode_data["FMI"])):
    pcode_data.at[ index , "spn_fmi"] = (a,b)

code2pcode = {}
for code , pcode in  zip(pcode_data["spn_fmi"] , pcode_data["Error codes "]):
    code2pcode[code] = pcode

In [7]:
for i in range(len(device_data)):
    device_data[i]["Pcode"] = None
    for index , spn_fmi_device in enumerate(device_data[i]["spn_fmi"] ):
        if spn_fmi_device in code2pcode.keys():
            device_data[i].at[index , "Pcode"] = code2pcode[spn_fmi_device]

In [8]:
for i in range(len(device_data)):
    device_data[i].sort_values("utc" , inplace=True , ascending=True)

In [9]:
time_interval = 30 # in mins
time_interval = time_interval * 60

In [10]:
device_pcodes_data = []
targetErrorsML = []
for i in range(len(device_data)):
    temp = device_data[i].copy()
    temp_list = []
    temp_targetErrorsML = []
    for index , pcode in enumerate(temp["Pcode"]):
        if pcode is not None:
            utc = temp["utc"].iloc[index]
            mask = temp["utc"].between(utc - time_interval , utc)
            temp_list.append(temp[mask])
            temp_targetErrorsML.append(pcode)
    device_pcodes_data.append(temp_list)
    targetErrorsML.append(temp_targetErrorsML)

In [11]:
vocab = set()
for i in range(len(device_data)):
    vocab.update(device_data[i]["spn_fmi"].unique())

In [12]:
code2id = {}
pcode2class = {}
severe_codes = 0
code2id[(-1 , -1)] = 0
for i , voc in enumerate(vocab):
    code2id[voc] = i + 1
    if voc in code2pcode.keys() :
        #code2id[voc] = code2id[voc] + 100000
        pcode = code2pcode[voc]
        pcode2class[pcode] = severe_codes + 1
        severe_codes = severe_codes + 1
id2code = {b:a for a,b in code2id.items()}

In [13]:
code2class = {}
for code in code2id.keys():
    code2class[code] = 0
    if code in code2pcode:
        code2class[code] = pcode2class[code2pcode[code]]

In [14]:
temp_ = []
for device in targetErrorsML:
    temp = []
    for pcode in device:
        c = pcode2class[pcode]
        temp.append(c)
    temp_.append(temp)
targetErrorsML = temp_
del temp
del temp_

In [15]:
deviceDataML = []
for device in device_pcodes_data:
    temp_deviceDataML = []
    for seq in device:
        temp_seq = []
        for code in seq["spn_fmi"]:
            temp_seq.append(code2id[code])
        temp_deviceDataML.append(temp_seq)
    deviceDataML.append(temp_deviceDataML)
del temp_seq
del temp_deviceDataML

In [16]:
target_errors = []
for i1 , device in enumerate(deviceDataML):
    temp_list = []
    for i2 , seq in enumerate(device):
        temp = []
        for element in seq:
            temp.append(code2class[id2code[element]])
        temp = temp[1:]
        temp.append(targetErrorsML[i1][i2])
        temp_list.append(temp)
    target_errors.append(temp_list)

In [17]:
batch_seq_lengths = []
batch_lengths = []
for i in range(len(ids)):
    max_len = 0
    for seq in deviceDataML[i]:
        if max_len < len(seq):
            max_len = len(seq)
    batch_seq_lengths.append(max_len)
    batch_lengths.append(len(deviceDataML[i]))

In [18]:
def getNextBatchID2(deviceID = 0 ):
    max_len = batch_seq_lengths[deviceID]
    seq_list = deviceDataML[deviceID]
    targets = target_errors[deviceID]
    #print(seq_list)
    lengths = []
    for seq in seq_list:
        lengths.append(len(seq))
    return \
            tf.keras.preprocessing.sequence.pad_sequences(seq_list , maxlen=max_len ,padding='post') , \
            tf.keras.preprocessing.sequence.pad_sequences(np.array(targets) , maxlen=max_len , padding='post') ,\
            lengths

In [43]:
batchMask = np.array(batch_lengths) == 0

In [19]:
first_batch , y  , seq = getNextBatchID2(deviceID=0)

In [20]:
Xtrain  , ytrain = first_batch , y
Xval , yval = None , None

In [21]:
n_neurons = 200
n_layers = 1
input_keep_prob = 0.8
vocab_size = len(vocab)
embed_size = 50

In [36]:
tf.reset_default_graph()
#tf.disable_eager_execution()
# one device feeded once if len < 50 else 2 or more batches 
with tf.name_scope("Inputs"):
    input_seq_len = tf.placeholder(tf.int32 , shape=[None]  , name="seq_len")
    # Targets - either critical or No case
    target_errors_placeholder = tf.placeholder(tf.int32 , shape=[None , None] , name="targets")
    inputs = tf.placeholder(tf.int32 , shape=[None , None] , name="inputs")

with tf.name_scope("Embeddings"):
    init_embeds = tf.random_uniform(shape= [ vocab_size , embed_size ]  , dtype=tf.float32)
    embeddings = tf.Variable(init_embeds)
    inputs_embeded = tf.nn.embedding_lookup(embeddings , inputs)

with tf.name_scope("RNN"):
    basic_cell = [tf.contrib.rnn.GRUCell(num_units = n_neurons)] * n_layers
    droupout_cell = [tf.contrib.rnn.DropoutWrapper(basic_cell_ , input_keep_prob=input_keep_prob) for basic_cell_ in basic_cell]
    stacked_cells = tf.contrib.rnn.MultiRNNCell(droupout_cell)
    outputs , final_state = tf.nn.dynamic_rnn(stacked_cells , inputs_embeded , sequence_length=input_seq_len , dtype=tf.float32)

with tf.name_scope("Hidden_Layers"):
    hidden_layer = tf.layers.dense(outputs , units=10 , activation=tf.nn.elu , kernel_initializer=tf.variance_scaling_initializer)
    logits = tf.layers.dense(hidden_layer , severe_codes + 1 , activation=None , kernel_initializer=tf.variance_scaling_initializer())

#final_state_concat = tf.concat(final_state , axis=1)
#hidden_layer = tf.layers.dense(final_state_concat , units=10 , activation=tf.nn.elu , kernel_initializer=tf.variance_scaling_initializer())
#logits = tf.layers.dense(hidden_layer , severe_codes + 1 , activation=None , kernel_initializer=tf.variance_scaling_initializer())

with tf.name_scope("Loss"):
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target_errors_placeholder , logits=logits)
    loss = tf.reduce_mean(loss)

with tf.name_scope("Evaluation"):
    predictions = tf.cast( tf.argmax(logits , axis=-1), tf.int32)
    accuracy = tf.reduce_mean(tf.cast( tf.equal(predictions , target_errors_placeholder) , tf.float32))
with tf.name_scope("Training"):
    optimizer = tf.train.AdamOptimizer()
    gradients = optimizer.compute_gradients(loss , var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
    gradients = [ (tf.clip_by_value(var[0] , clip_value_max=1.5 , clip_value_min=-1.5)  , var[1] ) for var in gradients]
    training_op = optimizer.apply_gradients(gradients)
    init = tf.global_variables_initializer()

with tf.name_scope("Logs"):
    file_writer = tf.summary.FileWriter("tf_logs/")
    saver = tf.train.Saver()


In [34]:
#predictions.get_shape()
target_errors_placeholder.get_shape()

TensorShape([Dimension(None), Dimension(None)])

In [23]:
n_epoch = 50
num_examples =Xtrain.shape[0]
batch_size = 10
n_batches = num_examples // batch_size
evaluation_gap = 1
maxcheckswithoutprogress = 2000
savedir = "."

In [24]:
def get_model_params(sess):
    return { gvar.op.name : sess.run(gvar) for gvar in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)}

def restore_model_params(sess , params):
    pass

In [44]:
batchMask[0]

False

In [45]:
with tf.Session() as sess:
    file_writer.add_graph(tf.get_default_graph())
    accuracy_val = 0
    best_params = None
    sess.run(init)
    global_step = 0
    feed_dict_val = None
    if Xval is not None and yval is not None :
        feed_dict_val = {inputs : Xval , target_reviews : yval , input_seq_len : seq_val}
    best_yet = 0
    checkswithoutprogress = 0
    for epoch in range(n_epoch):
        step = 0
        #perms = np.random.permutation(num_examples)
        #Xbatches = np.array_split(Xtrain[perms] , n_batches)
        #ybatches = np.array_split(ytrain[perms] , n_batches)
        for batch_id in range(len(ids)):
            if batchMask[batch_id]:
                break
            #print(batch_id)
            Xb , yb , seq = getNextBatchID2(deviceID=batch_id)
            step = step + 1
            global_step = global_step + 1
            #print([batch_seq_lengths[batch_id]]*batch_lengths[batch_id])
            batch_id = int(batch_id)
            np.array([ batch_seq_lengths[batch_id] ] * batch_lengths[batch_id])
            feed_dict = {
                inputs : Xb ,
                target_errors_placeholder : yb ,
                input_seq_len : [batch_seq_lengths[batch_id]]*batch_lengths[batch_id] , 
                        }
            print(batch_id)
            sess.run(training_op , feed_dict = feed_dict)
            if global_step % evaluation_gap == 0 :
                if feed_dict_val is None :
                    feed_dict_val = feed_dict
                accuracy_val = sess.run(accuracy  , feed_dict=feed_dict_val)
                if accuracy_val > best_yet:
                    best_yet = accuracy_val
                    best_params = get_model_params(sess)
                    checkswithoutprogress = 0
                else :
                    checkswithoutprogress = checkswithoutprogress + 1
            if checkswithoutprogress > maxcheckswithoutprogress :
                print("Epoch {} Accuracy {} [Incomplete epoch , No progress for {} steps]".format(epoch + 1 , accuracy_val , checkswithoutprogress))
                break
        else :
            print("Epoch {} Accuracy {} ".format(epoch + 1 , accuracy_val))
            continue
        break
    restore_model_params(sess , best_params)
    saver.save(sess , savedir)

0


KeyboardInterrupt: 

In [39]:
with tf.Session() as sess:
    file_writer.add_graph(tf.get_default_graph())

In [None]:
#feed_dict = {inputs : Xb , target_errors : yb }#,input_seq_len : [10]*50 }
target_errors

In [None]:
[batch_seq_lengths[0]]*batch_lengths[0]