#  Individual Isolation Network Training Script
This script contains the code necessary to train the so called "individual" fault *isolation* network

# Setup
The script requires tensorflow version 1.15.0

In [None]:
!pip install tensorflow==1.15.0

Collecting tensorflow==1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/3f/98/5a99af92fb911d7a88a0005ad55005f35b4c1ba8d75fba02df726cd936e6/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (412.3MB)
[K     |████████████████████████████████| 412.3MB 41kB/s 
[?25hCollecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Collecting keras-applications>=1.0.8
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
[K     |████████████████████████████████| 51kB 8.1MB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 72.9MB/s 
Collecting te

Package Import and Dependencies

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import io
import os
import shutil as sh
import pickle
from pathlib import Path
import random
# Authentication for Managing Data
from google.colab import drive
drive.mount('/content/drive')
rootPath = '/content/drive/My Drive/Data/'

Mounted at /content/drive


In [None]:
register = np.zeros(1)
while not np.any(register):
    try:
        with open(rootPath + 'Isolation/Training/FileRegister.csv','r') as f:
            register = np.genfromtxt(f,delimiter = ",")
    except:
        pass
np.shape(register)


(97, 3)

Preprocessing Functions


In [None]:
def decode_TFRecord(exampleProto):
# Read TFRecord file
    # Define features
    featureDescription = {
        'x1': tf.VarLenFeature(dtype=tf.float32),
        'y1': tf.VarLenFeature(dtype=tf.float32),
        'z1': tf.VarLenFeature(dtype=tf.float32),
        'vx1': tf.VarLenFeature(dtype=tf.float32),
        'vy1': tf.VarLenFeature(dtype=tf.float32),
        'vz1':tf.VarLenFeature(dtype=tf.float32),
        'x2': tf.VarLenFeature(dtype=tf.float32),
        'y2': tf.VarLenFeature(dtype=tf.float32),
        'z2': tf.VarLenFeature(dtype=tf.float32),
        'vx2': tf.VarLenFeature(dtype=tf.float32),
        'vy2': tf.VarLenFeature(dtype=tf.float32),
        'vz2':tf.VarLenFeature(dtype=tf.float32),
        'x3': tf.VarLenFeature(dtype=tf.float32),
        'y3': tf.VarLenFeature(dtype=tf.float32),
        'z3': tf.VarLenFeature(dtype=tf.float32),
        'vx3': tf.VarLenFeature(dtype=tf.float32),
        'vy3': tf.VarLenFeature(dtype=tf.float32),
        'vz3': tf.VarLenFeature(dtype=tf.float32),
        'x4': tf.VarLenFeature(dtype=tf.float32),
        'y4': tf.VarLenFeature(dtype=tf.float32),
        'z4': tf.VarLenFeature(dtype=tf.float32),
        'vx4': tf.VarLenFeature(dtype=tf.float32),
        'vy4': tf.VarLenFeature(dtype=tf.float32),
        'vz4': tf.VarLenFeature(dtype=tf.float32),
        'label': tf.VarLenFeature(dtype=tf.int64),
        'time': tf.VarLenFeature(dtype=tf.int64),
        'sats': tf.VarLenFeature(dtype=tf.int64)}

    # Extract features from serialized data
    return  tf.io.parse_single_example(exampleProto, featureDescription)



In [None]:
def preprocess(dataset,seqLen = 50):
    #Set the Satellite viewpoint
    satView = tf.constant(0,dtype=tf.int64)
    
    sats = tf.sparse.to_dense(dataset['sats'])
    labels = tf.sparse.to_dense(dataset['label'])
    time = tf.sparse.to_dense(dataset['time'])

    x1  = tf.sparse.to_dense(dataset['x1'])
    y1  = tf.sparse.to_dense(dataset['y1'])
    z1  = tf.sparse.to_dense(dataset['z1'])
    vx1 = tf.sparse.to_dense(dataset['vx1'])
    vy1 = tf.sparse.to_dense(dataset['vy1'])
    vz1 = tf.sparse.to_dense(dataset['vz1'])

    x2  = tf.sparse.to_dense(dataset['x2'])
    y2  = tf.sparse.to_dense(dataset['y2'])
    z2  = tf.sparse.to_dense(dataset['z2'])
    vx2 = tf.sparse.to_dense(dataset['vx2'])
    vy2 = tf.sparse.to_dense(dataset['vy2'])
    vz2 = tf.sparse.to_dense(dataset['vz2'])

    x3  = tf.sparse.to_dense(dataset['x3'])
    y3  = tf.sparse.to_dense(dataset['y3'])
    z3  = tf.sparse.to_dense(dataset['z3'])
    vx3 = tf.sparse.to_dense(dataset['vx3'])
    vy3 = tf.sparse.to_dense(dataset['vy3'])
    vz3 = tf.sparse.to_dense(dataset['vz3'])

    x4  = tf.sparse.to_dense(dataset['x4'])
    y4  = tf.sparse.to_dense(dataset['y4'])
    z4  = tf.sparse.to_dense(dataset['z4'])
    vx4 = tf.sparse.to_dense(dataset['vx4'])
    vy4 = tf.sparse.to_dense(dataset['vy4'])
    vz4 = tf.sparse.to_dense(dataset['vz4'])


    data = tf.stack([x1,y1,z1,vx1,vy1,vz1,
                     x2,y2,z2,vx2,vy2,vz2,
                     x3,y3,z3,vx3,vy3,vz3,
                     x4,y4,z4,vx4,vy4,vz4])
    data = tf.transpose(data)

    # take indices only the indices where 
    # (1) the time at the beginning of a slice is less than the time at the end 
    # of slice (ensures continuity)
    # (2) the satellite index matches the provided one
    indices = tf.where((time[:-seqLen]<time[seqLen:]) & tf.equal(sats[:-seqLen],satView))
    zeros = tf.zeros_like(indices)
    # the indices need zeros in the second column
    begin = tf.stack([indices,zeros],axis = 1)
    begin = tf.reshape(begin,tf.shape(begin)[:2])
    # Construct dataset
    dsBegin = tf.data.Dataset.from_tensor_slices(begin)

    # Map dataset as sequence of length seq_len and labels

    dataSlices = dsBegin.map(lambda x: tf.slice(data,x,[seqLen,24]))
    # select labels for data in the same way
    correctLabels = tf.boolean_mask(labels,(time[:-seqLen]<time[seqLen:]) & tf.equal(sats[:-seqLen],satView))
    # Correct labels is a vector with a single 1 (one-hot) in the position
    # of the faulty thruster
    correctLabels = tf.one_hot(correctLabels,36,dtype=tf.int32)
    dataLabels = tf.data.Dataset.from_tensor_slices(correctLabels)

    dsReturn = tf.data.Dataset.zip((dataSlices,dataLabels))
    return dsReturn

Model Definition Function

In [None]:
def createIsolateModel(seq_len=32, batch_size=None, stateful=True, 
              num_units=[32, 32]):
  source = tf.keras.Input(
  name='seed', shape=(seq_len, 24), 
      batch_size=batch_size)
  
  lstm_1 = tf.keras.layers.LSTM(num_units[0], stateful=stateful, return_sequences=True,dropout=0.1, recurrent_dropout=0.1)(source)
  lstm_2 = tf.keras.layers.LSTM(num_units[1], stateful=stateful, return_sequences=False, dropout=0.1)(lstm_1)
  dense_1 = tf.keras.layers.Dense(100, activation='relu')(lstm_2)
  
  predict = tf.keras.layers.Dense(36, activation='softmax')(dense_1)
  
  return tf.keras.Model(inputs=[source], outputs=[predict])

# Training Options

In [None]:
stateful = False
debug = False
distribute = False
loadModel = False
dataSetSizeTest = True
satView = 0
name = "isolateInd0HalfData"
if debug:
    tf.enable_eager_execution()
if loadModel:
    checkpointPath = rootPath + 'Results/Isolation/'
    weightsName = 'isolateInd0_weights.11-0.62.hdf5'
    startEpoch = 12
else:
    startEpoch = 1
nEpoch = 32

#resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
#tf.config.experimental_connect_to_cluster(resolver)
#tf.tpu.experimental.initialize_tpu_system(resolver)
#config = tf.ConfigProto()
#config.gpu_options.allow_growth = True
#sess = tf.Session(config=config)

# Parameter Definitions


TFinal = 5602
nSats = 6
# Define model parameters
nUnits = [256,356]
nTimesteps = 50

if debug:
    batchSize = 4096
else:
    batchSize = 4096
learningRate = 0.001


In [None]:
# Compile Model
if distribute:
    # Create distributed strategy
    # topology = tf.contrib.distribute.initialize_tpu_system()
    #device_assignment = tf.contrib.tpu.DeviceAssignment(topology, core_assignment=tf.contrib.tpu.SINGLE_CORE_ASSIGNMENT)
    #tpu_strategy = tf.contrib.distribute.TPUStrategy(device_assignment=device_assignment)
    #strategy = tf.distribute.experimental.TPUStrategy(resolver)
    strategy = tf.distribute.MirroredStrategy()
    if loadModel:
        isolateModel = tf.keras.models.load_model(checkpointPath+weightsName)
    else:
        with strategy.scope():
            isolateModel = createIsolateModel(seq_len=nTimesteps, stateful=stateful, num_units=nUnits, batch_size=batchSize)
            adams = tf.keras.optimizers.Adam(learning_rate=learningRate)
            sgd = tf.keras.optimizers.SGD(momentum=0.006)
            isolateModel.compile(optimizer=adams,
                            loss=tf.keras.losses.CategoricalCrossentropy(),
                            metrics=['categorical_accuracy']) # Compile with adam optimizer
else:
    if loadModel:
        isolateModel = tf.keras.models.load_model(checkpointPath+weightsName)
    else:
        isolateModel = createIsolateModel(seq_len=nTimesteps, stateful=stateful, num_units=nUnits, batch_size=batchSize)
    adams = tf.keras.optimizers.Adam(learning_rate=learningRate)
    isolateModel.compile(optimizer=adams,
                        loss=tf.keras.losses.CategoricalCrossentropy(),
                        metrics=['categorical_accuracy']) # Compile with adam optimizer
tf.keras.utils.plot_model(
    isolateModel, to_file=rootPath + 'indIsolateModel.png', show_shapes=True, show_layer_names=True,
    rankdir='LR', expand_nested=False, dpi=96)

In [None]:
pathTrain = rootPath + 'Isolation/Training/'
listdirTrain = []
if debug:
    globTrain = pathTrain + 'TrainCorrected_[0].tfrecord'
    listdirTrain = tf.io.gfile.glob(globTrain)
else:
    if dataSetSizeTest:
        globTrain = pathTrain + 'TrainCorrected_[0-7].tfrecord'
        #glob2 = pathTrain + 'TrainCorrected_[1][0-5].tfrecord'
        #glob3 = pathTrain + 'TrainCorrected_[6][0-3].tfrecord'
        listdirTrain = tf.io.gfile.glob(globTrain) #+ tf.io.gfile.glob(glob2)# + tf.io.gfile.glob(glob3)
        folder = 'IndIsolate_QuarterData/'
    else:
        globTrain = pathTrain + 'TrainCorrected_[0-9].tfrecord'
        glob2 = pathTrain + 'TrainCorrected_[1-2][0-9].tfrecord'
        glob3 = pathTrain + 'TrainCorrected_[3][0-1].tfrecord'
        listdirTrain = tf.io.gfile.glob(globTrain) + tf.io.gfile.glob(glob2) + tf.io.gfile.glob(glob3)
        folder = ''
nFilesTrain = len(listdirTrain)
# Validation set
pathTest = rootPath + 'Isolation/Testing/'
if debug:
    globTest = pathTest + 'TestCorrected_0.tfrecord'
else:
    globTest = pathTest + 'TestCorrected_[0-9].tfrecord'
listdirTest = tf.io.gfile.glob(globTest)
nFilesTest = len(listdirTest)


['/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_0.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_1.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_2.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_3.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_4.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_5.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_6.tfrecord', '/content/drive/My Drive/Data/Isolation/Training/TrainCorrected_7.tfrecord']


In [None]:
# Batch, shuffle and repeat dataset
fileListDatasetTrain = tf.data.TFRecordDataset(listdirTrain)
decodedDataset = fileListDatasetTrain.map(decode_TFRecord)
processedDataset = decodedDataset.flat_map(preprocess)
trainDataset = processedDataset
trainDataset = trainDataset.repeat(nEpoch).batch(batchSize,drop_remainder = True).prefetch(tf.data.experimental.AUTOTUNE)

# Validation Dataset
fileListDatasetTest = tf.data.TFRecordDataset(listdirTest)
decodeValDataset = fileListDatasetTest.map(decode_TFRecord)
processedVal = decodeValDataset.flat_map(preprocess)
validationDataset = processedVal.batch(batchSize,drop_remainder = True).prefetch(tf.data.experimental.AUTOTUNE)
# Determine steps per epoch
trainSteps = int(nFilesTrain*100*(TFinal-nTimesteps)/batchSize)
testSteps = int(nFilesTest*100*(TFinal-nTimesteps)/batchSize)
print("Training Files: {}".format(nFilesTrain))
print("Testing Files: {}".format(nFilesTest))


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
['/content/drive/My Drive/Data/Isolation/Testing/TestCorrected_0.tfrecord', '/content/drive/My Drive/Data/Isolation/Testing/TestCorrected_1.tfrecord', '/content/drive/My Drive/Data/Isolation/Testing/TestCorrected_2.tfrecord', '/content/drive/My Drive/Data/Isolation/Testing/TestCorrected_3.tfrecord', '/content/drive/My Drive/Data/Isolation/Testing/TestCorrected_4.tfrecord']
Training Files: 8
Testing Files: 10


# Training

In [None]:
# Checkpoint to save the model every two epochs
checkpoint = tf.keras.callbacks.ModelCheckpoint(rootPath + "Results/Isolation/"+folder+name+"_weights.{epoch:02d}-{categorical_accuracy:.4f}.hdf5", 
                                                monitor='categorical_accuracy', verbose=0, save_best_only=False, 
                                                save_weights_only=False, mode='auto', save_freq = 'epoch')
# Stopper to stop training if loss does not improve three times in a row
stopper = tf.keras.callbacks.EarlyStopping(patience=3,monitor='val_loss')
VAL = True

history = isolateModel.fit(trainDataset, 
                             epochs=nEpoch, steps_per_epoch = trainSteps,
                            callbacks=[checkpoint,stopper], 
                            initial_epoch = startEpoch-1,
                            validation_data= validationDataset if VAL else None,
                            validation_steps = testSteps if VAL else None)


Train on 1084 steps, validate on 1355 steps
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32


In [None]:
# Saving to local and to google drive
isolateModel.save(rootPath +'Results/Isolation/' + folder + '{0}.hdf5'.format(name), overwrite=True)
isolateModel.save_weights(rootPath + 'Results/Isolation/' + folder + 'weights_{0}.h5'.format(name), overwrite=True)

# Saving the training history
with open(rootPath + 'Results/Isolation/' + folder + 'trainHistoryDict{0}.pkl'.format(name), 'wb') as file_pi:
  pickle.dump(history.history, file_pi)