# Naive Detection Network Training Script
This script contains the code necessary to train the so called "naive" fault *detection* network

# Setup
The script requires tensorflow version 1.15.0

In [None]:
!pip install tensorflow==1.15.0

Collecting tensorflow==1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/3f/98/5a99af92fb911d7a88a0005ad55005f35b4c1ba8d75fba02df726cd936e6/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (412.3MB)
[K     |████████████████████████████████| 412.3MB 35kB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 63.4MB/s 
Collecting tensorflow-estimator==1.15.1
[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_estimator-1.15.1-py2.py3-none-any.whl (503kB)
[K     |████████████████████████████████| 512kB 57.3MB/s 
Collecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Buildi

Package Import and Dependencies

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import io
import os
import pickle
from pathlib import Path
import random
# Authentication for Managing Data
from google.colab import drive
drive.mount('/content/drive')

trainPercent  = 2 # percentage of training data to use
rootPath = '/content/drive/My Drive/Data/'
name = "NaiveDetect"
stateful = False # boolea to toggle network state memory
debug = False # boolean to toggle debug options
distribute = False # boolean to toggle distribution of training (not functional)
loadModel = False # boolean to toggle loading a saved model
if debug:
    tf.enable_eager_execution()
if loadModel:
    checkpointPath = rootPath + 'Results/Detection/'
    weightsName = 'detectNetwork10Epoch.hdf5'
    startEpoch = 7
else:
    startEpoch = 1
nEpoch = 20 # max number of epochs

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


Properly mount the drive by reading a file from the drive

In [None]:
register = np.zeros(1)
while not np.any(register):
    try:
        with open(rootPath + 'Detection/Training/FileRegister.csv','r') as f:
            register = np.genfromtxt(f,delimiter = ",")
    except:
        pass
np.shape(register)

(644, 3)

Preprocessing Functions


In [None]:
def decode_TFRecord(exampleProto):
    # Read TFRecord file
    # Define features
    featureDescription = {
        'x1': tf.VarLenFeature(dtype=tf.float32),
        'y1': tf.VarLenFeature(dtype=tf.float32),
        'z1': tf.VarLenFeature(dtype=tf.float32),
        'vx1': tf.VarLenFeature(dtype=tf.float32),
        'vy1': tf.VarLenFeature(dtype=tf.float32),
        'vz1':tf.VarLenFeature(dtype=tf.float32),
        'x2': tf.VarLenFeature(dtype=tf.float32),
        'y2': tf.VarLenFeature(dtype=tf.float32),
        'z2': tf.VarLenFeature(dtype=tf.float32),
        'vx2': tf.VarLenFeature(dtype=tf.float32),
        'vy2': tf.VarLenFeature(dtype=tf.float32),
        'vz2':tf.VarLenFeature(dtype=tf.float32),
        'x3': tf.VarLenFeature(dtype=tf.float32),
        'y3': tf.VarLenFeature(dtype=tf.float32),
        'z3': tf.VarLenFeature(dtype=tf.float32),
        'vx3': tf.VarLenFeature(dtype=tf.float32),
        'vy3': tf.VarLenFeature(dtype=tf.float32),
        'vz3': tf.VarLenFeature(dtype=tf.float32),
        'x4': tf.VarLenFeature(dtype=tf.float32),
        'y4': tf.VarLenFeature(dtype=tf.float32),
        'z4': tf.VarLenFeature(dtype=tf.float32),
        'vx4': tf.VarLenFeature(dtype=tf.float32),
        'vy4': tf.VarLenFeature(dtype=tf.float32),
        'vz4': tf.VarLenFeature(dtype=tf.float32),
        'label': tf.VarLenFeature(dtype=tf.int64),
        'time': tf.VarLenFeature(dtype=tf.int64),
        'sats': tf.VarLenFeature(dtype=tf.int64)}

    # Extract features from serialized data
    return  tf.io.parse_single_example(exampleProto, featureDescription)



In [None]:
def preprocess(dataset,seqLen = 50):
    # take the dataset and return the properly labelled slices 
    labels = tf.sparse.to_dense(dataset['label'])
    time = tf.sparse.to_dense(dataset['time'])

    x1  = tf.sparse.to_dense(dataset['x1'])
    y1  = tf.sparse.to_dense(dataset['y1'])
    z1  = tf.sparse.to_dense(dataset['z1'])
    vx1 = tf.sparse.to_dense(dataset['vx1'])
    vy1 = tf.sparse.to_dense(dataset['vy1'])
    vz1 = tf.sparse.to_dense(dataset['vz1'])

    x2  = tf.sparse.to_dense(dataset['x2'])
    y2  = tf.sparse.to_dense(dataset['y2'])
    z2  = tf.sparse.to_dense(dataset['z2'])
    vx2 = tf.sparse.to_dense(dataset['vx2'])
    vy2 = tf.sparse.to_dense(dataset['vy2'])
    vz2 = tf.sparse.to_dense(dataset['vz2'])

    x3  = tf.sparse.to_dense(dataset['x3'])
    y3  = tf.sparse.to_dense(dataset['y3'])
    z3  = tf.sparse.to_dense(dataset['z3'])
    vx3 = tf.sparse.to_dense(dataset['vx3'])
    vy3 = tf.sparse.to_dense(dataset['vy3'])
    vz3 = tf.sparse.to_dense(dataset['vz3'])

    x4  = tf.sparse.to_dense(dataset['x4'])
    y4  = tf.sparse.to_dense(dataset['y4'])
    z4  = tf.sparse.to_dense(dataset['z4'])
    vx4 = tf.sparse.to_dense(dataset['vx4'])
    vy4 = tf.sparse.to_dense(dataset['vy4'])
    vz4 = tf.sparse.to_dense(dataset['vz4'])

    data = tf.stack([x1,y1,z1,vx1,vy1,vz1,
                     x2,y2,z2,vx2,vy2,vz2,
                     x3,y3,z3,vx3,vy3,vz3,
                     x4,y4,z4,vx4,vy4,vz4])
    data = tf.transpose(data)
    
    # take indices only the indices where the time at the beginning of a slice
    # is less than the time at the end of slice (ensures continuity)
    indices = tf.where(time[:-seqLen]<time[seqLen:])
    zeros = tf.zeros_like(indices)
    # the indices need zeros in the second column
    begin = tf.stack([indices,zeros],axis = 1)
    begin = tf.reshape(begin,tf.shape(begin)[:2])
    # Construct dataset from the beginning indices
    dsBegin = tf.data.Dataset.from_tensor_slices(begin)

    # Map dataset as sequence of length seq_len and labels
    dataSlices = dsBegin.map(lambda x: tf.slice(data,x,[seqLen,24]))

    # select labels for data in the same way
    correctLabels = tf.boolean_mask(labels,time[:-seqLen]<time[seqLen:])
    correctLabels = tf.reshape((correctLabels > 0),(-1,1))
    dataLabels = tf.data.Dataset.from_tensor_slices(correctLabels)
    # zip up data and labels to one conistent dataset
    dsReturn = tf.data.Dataset.zip((dataSlices,dataLabels))
    return dsReturn

Model Definition Function

In [None]:
def createDetectModel(seq_len=32, batch_size=None, stateful=True, 
              num_units=[32, 32]):
  source = tf.keras.Input(
  name='seed', shape=(seq_len, 24), 
      batch_size=batch_size)
  
  lstm_1 = tf.keras.layers.LSTM(num_units[0], stateful=stateful, return_sequences=True,dropout=0.1, recurrent_dropout=0.1)(source)
  lstm_2 = tf.keras.layers.LSTM(num_units[1], stateful=stateful, return_sequences=False, dropout=0.1)(lstm_1)
  dense_1 = tf.keras.layers.Dense(64, activation='relu')(lstm_2)
  
  predict = tf.keras.layers.Dense(1, activation='sigmoid')(lstm_2)
  
  return tf.keras.Model(inputs=[source], outputs=[predict])

# Training Options

In [None]:
# Parameter Definitions
TFinal = 5602
nSats = 6
# Define model parameters
nUnits = [128,128]
nTimesteps = 50
if debug:
    batchSize = 1024
else:
    batchSize = 4096
learningRate = 0.005


In [None]:
# Compile Model
if distribute:
    # Create distributed strategy
    # topology = tf.contrib.distribute.initialize_tpu_system()
    #device_assignment = tf.contrib.tpu.DeviceAssignment(topology, core_assignment=tf.contrib.tpu.SINGLE_CORE_ASSIGNMENT)
    #tpu_strategy = tf.contrib.distribute.TPUStrategy(device_assignment=device_assignment)
    #strategy = tf.distribute.experimental.TPUStrategy(resolver)
    strategy = tf.distribute.MirroredStrategy()
    if loadModel:
        detectModel = tf.keras.models.load_model(checkpointPath+weightsName)
    else:
        with strategy.scope():
            detectModel = createDetectModel(seq_len=nTimesteps, stateful=stateful, num_units=nUnits, batch_size=batchSize)
            adams = tf.keras.optimizers.Adam(learning_rate=learningRate)
            sgd = tf.keras.optimizers.SGD(momentum=0.006)
            detectModel.compile(optimizer=adams,
                            loss=tf.keras.losses.BinaryCrossentropy(),
                            metrics=['binary_accuracy']) # Compile with adam optimizer
else:
    if loadModel:
        detectModel = tf.keras.models.load_model(checkpointPath+weightsName)
    else:
        detectModel = createDetectModel(seq_len=nTimesteps, stateful=stateful, num_units=nUnits, batch_size=batchSize)
    adams = tf.keras.optimizers.Adam(learning_rate=learningRate)
    detectModel.compile(optimizer=adams,
                        loss=tf.keras.losses.BinaryCrossentropy(),
                        metrics=['binary_accuracy']) # Compile with adam optimizer
tf.keras.utils.plot_model(
    detectModel, to_file=rootPath + 'naiveDetectModel.png', show_shapes=True, show_layer_names=True,
    rankdir='LR', expand_nested=False, dpi=96)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [None]:
#Load Datasets
pathTrain = rootPath + 'Detection/Training/'
listdirTrain = []
if debug:
    globTrain = pathTrain + 'Train_[0].tfrecord'
    listdirTrain = tf.io.gfile.glob(globTrain)
else:
    # Select the worst and best 5% of faults
    with open(pathTrain + 'FaultRegister.csv','r') as f:
        faultRegister = np.genfromtxt(f,delimiter = ",")
    sortedReg = faultRegister[faultRegister[:,2].argsort()]
    nFiles = int(trainPercent/2 /100 * faultRegister[-1,0])
    fileIndices = np.concatenate([sortedReg[:nFiles,0].astype(int),sortedReg[-nFiles:,0].astype(int)])
    for index in fileIndices:
        listdirTrain.append(pathTrain + 'TrainCorrected_' + str(index) + '.tfrecord')
nFilesTrain = len(listdirTrain)

# Validation set
pathTest = rootPath + 'Detection/Testing/'
if debug:
    globTest = pathTest + 'Test_0.tfrecord'
else:
    globTest = pathTest + 'Test_[0].tfrecord'
listdirTest = tf.io.gfile.glob(globTest)

['/content/drive/My Drive/Data/Detection/Training/TrainCorrected_478.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_499.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_20.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_205.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_326.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_383.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_330.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_223.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_68.tfrecord', '/content/drive/My Drive/Data/Detection/Training/TrainCorrected_324.tfrecord']


In [None]:
# Batch, shuffle and repeat training dataset
fileListDatasetTrain = tf.data.TFRecordDataset(listdirTrain)
decodedDataset = fileListDatasetTrain.map(decode_TFRecord)
processedDataset = decodedDataset.flat_map(preprocess)
trainDataset = processedDataset

if not stateful:
    trainDataset = trainDataset.shuffle(10*batchSize)
trainDataset = trainDataset.repeat(nEpoch).batch(batchSize,drop_remainder = True).prefetch(tf.data.experimental.AUTOTUNE)
nFilesTest = len(listdirTest)

# Validation Dataset
fileListDatasetTest = tf.data.TFRecordDataset(listdirTest)
decodeValDataset = fileListDatasetTest.map(decode_TFRecord)
processedVal = decodeValDataset.flat_map(preprocess)
validationDataset = processedVal.batch(batchSize,drop_remainder = True).prefetch(tf.data.experimental.AUTOTUNE)
# Determine steps per epoch
trainSteps = int(nFilesTrain*6*100*(TFinal-nTimesteps)/batchSize)
testSteps = int(nFilesTest*6*100*(TFinal-nTimesteps)/batchSize)

print("Training Files: {}".format(nFilesTrain))
print("Testing Files: {}".format(nFilesTest))



['/content/drive/My Drive/Data/Detection/Testing/Test_0.tfrecord']
Training Files: 10
Testing Files: 1


# Training

In [None]:
# Checkpoint to save the model every two epochs
checkpoint = tf.keras.callbacks.ModelCheckpoint(rootPath + "Results/Detection/"+name+"_weights.{epoch:02d}-{binary_accuracy:.2f}.hdf5", 
                                                monitor='binary_accuracy', verbose=0, save_best_only=False, 
                                                save_weights_only=False, mode='auto', save_freq = 8132)
# Stopper to stop training if loss does not improve three times in a row
stopper = tf.keras.callbacks.EarlyStopping(patience=3,monitor='val_loss')
VAL = True
history = detectModel.fit(trainDataset, 
                             epochs=nEpoch, steps_per_epoch = trainSteps,
                            callbacks=[checkpoint,stopper], 
                            initial_epoch = startEpoch-1,
                            validation_data= validationDataset if VAL else None,
                            validation_steps = testSteps if VAL else None)
#


Train on 8132 steps, validate on 813 steps
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


In [None]:
# Saving to local and to google drive
detectModel.save(rootPath +'Results/Detection/{0}.hdf5'.format(name), overwrite=True)
detectModel.save_weights(rootPath + 'Results/Detection/weights_{0}.h5'.format(name), overwrite=True)

# Saving the training history
with open(rootPath + 'Results/Detection/trainHistoryDict{0}.pkl'.format(name), 'wb') as file_pi:
    pickle.dump(history.history, file_pi)