In [1]:
!pip install tensorflow==1.15.0



In [2]:
import numpy as np
import pandas as pd
import io
import tensorflow as tf
import tensorflow.train as tft
import time
import re
import matplotlib.pyplot as plt
from collections import Counter
from IPython.display import clear_output
from pathlib import Path
# Authentication for Managing Data
from google.colab import drive,files

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
dataOpen = np.zeros(1)
while not np.any(dataOpen):
    try:
        with open('/content/drive/My Drive/DataRaw/Detection/Training/DataOpenFault4N_11_1.csv','r') as f:
            dataOpen = np.genfromtxt(f,delimiter = ",")
    except:
        pass
np.shape(dataOpen)

(141649, 6)

In [4]:
def process(fileName):
    # Load correct File
    with open(fileName,'r') as f:
        data = np.genfromtxt(f,delimiter = ",")
    # Seperate Relative Position & Velocity data
    info = data[0,:]
    settleIndex = 300
    faultTime = int(np.ceil(info[0]))
    if faultTime<1:
        faultTime = 1e10
    faultSat = info[1]
    faultThruster = info[2]
    faultType = info[3]

    posvelData = data[1:,:]
    noRows = np.size(posvelData, 0)//24 - faultTime
    noCols = 6
    dataSat1 = np.zeros([noRows, 4*noCols])
    dataSat2 = np.zeros([noRows, 4*noCols])
    dataSat3 = np.zeros([noRows, 4*noCols])
    dataSat4 = np.zeros([noRows, 4*noCols])
    dataSat5 = np.zeros([noRows, 4*noCols])
    dataSat6 = np.zeros([noRows, 4*noCols])

    faultLabel = (6*(faultSat-1)+faultThruster-1)*np.ones([noRows, 1])
    settleOffset = faultTime * 24 
    dataSat1[:,0*noCols:1*noCols] = posvelData[settleOffset + 0::24,0:noCols]
    dataSat1[:,1*noCols:2*noCols] = posvelData[settleOffset + 1::24,0:noCols]
    dataSat1[:,2*noCols:3*noCols] = posvelData[settleOffset + 2::24,0:noCols]
    dataSat1[:,3*noCols:4*noCols] = posvelData[settleOffset + 3::24,0:noCols]

    dataSat2[:,0*noCols:1*noCols] = posvelData[settleOffset + 4::24,0:noCols]
    dataSat2[:,1*noCols:2*noCols] = posvelData[settleOffset + 5::24,0:noCols]
    dataSat2[:,2*noCols:3*noCols] = posvelData[settleOffset + 6::24,0:noCols]
    dataSat2[:,3*noCols:4*noCols] = posvelData[settleOffset + 7::24,0:noCols]

    dataSat3[:,0*noCols:1*noCols] = posvelData[settleOffset + 8::24,0:noCols]
    dataSat3[:,1*noCols:2*noCols] = posvelData[settleOffset + 9::24,0:noCols]
    dataSat3[:,2*noCols:3*noCols] = posvelData[settleOffset + 10::24,0:noCols]
    dataSat3[:,3*noCols:4*noCols] = posvelData[settleOffset + 11::24,0:noCols]

    dataSat4[:,0*noCols:1*noCols] = posvelData[settleOffset + 12::24,0:noCols]
    dataSat4[:,1*noCols:2*noCols] = posvelData[settleOffset + 13::24,0:noCols]
    dataSat4[:,2*noCols:3*noCols] = posvelData[settleOffset + 14::24,0:noCols]
    dataSat4[:,3*noCols:4*noCols] = posvelData[settleOffset + 15::24,0:noCols]

    dataSat5[:,0*noCols:1*noCols] = posvelData[settleOffset + 16::24,0:noCols]
    dataSat5[:,1*noCols:2*noCols] = posvelData[settleOffset + 17::24,0:noCols]
    dataSat5[:,2*noCols:3*noCols] = posvelData[settleOffset + 18::24,0:noCols]
    dataSat5[:,3*noCols:4*noCols] = posvelData[settleOffset + 19::24,0:noCols]

    dataSat6[:,0*noCols:1*noCols] = posvelData[settleOffset + 20::24,0:noCols]
    dataSat6[:,1*noCols:2*noCols] = posvelData[settleOffset + 21::24,0:noCols]
    dataSat6[:,2*noCols:3*noCols] = posvelData[settleOffset + 22::24,0:noCols]
    dataSat6[:,3*noCols:4*noCols] = posvelData[settleOffset + 23::24,0:noCols]

    sats = np.arange(6)
    sats = np.repeat(sats,noRows)
    sats = sats.reshape(-1,1)
    time = np.arange(faultTime,noRows+faultTime)

    time = np.tile(time,6).reshape(-1,1)

    data = np.concatenate((dataSat1,dataSat2,dataSat3,dataSat4,dataSat5,dataSat6),0)
    labels = np.concatenate((faultLabel,faultLabel,faultLabel,faultLabel,faultLabel,faultLabel),0)
    data = np.concatenate((data,labels,time,sats),axis = 1)

    return data # ds3.map(lambda a,b,c: (a,b)), ds3.map(lambda a,b,c: c)
 


In [5]:
def create_tfrecord(filePath,fileName, data):
    # Create tfrecord
    
    header = ['x1','y1','z1','vx1','vy1','vz1',
                'x2','y2','z2','vx2','vy2','vz2',
                'x3','y3','z3','vx3','vy3','vz3',
                'x4','y4','z4','vx4','vy4','vz4',
                'label','time','sat']
    # Create dict
    x1  = tft.Feature(float_list = tft.FloatList(value = data[:,0]))
    y1  = tft.Feature(float_list = tft.FloatList(value = data[:,1]))
    z1  = tft.Feature(float_list = tft.FloatList(value = data[:,2]))
    vx1 = tft.Feature(float_list = tft.FloatList(value = data[:,3]))
    vy1 = tft.Feature(float_list = tft.FloatList(value = data[:,4]))
    vz1 = tft.Feature(float_list = tft.FloatList(value = data[:,5]))

    x2  = tft.Feature(float_list = tft.FloatList(value = data[:,6]))
    y2  = tft.Feature(float_list = tft.FloatList(value = data[:,7]))
    z2  = tft.Feature(float_list = tft.FloatList(value = data[:,8]))
    vx2 = tft.Feature(float_list = tft.FloatList(value = data[:,9]))
    vy2 = tft.Feature(float_list = tft.FloatList(value = data[:,10]))
    vz2 = tft.Feature(float_list = tft.FloatList(value = data[:,11]))

    x3  = tft.Feature(float_list = tft.FloatList(value = data[:,12]))
    y3  = tft.Feature(float_list = tft.FloatList(value = data[:,13]))
    z3  = tft.Feature(float_list = tft.FloatList(value = data[:,14]))
    vx3 = tft.Feature(float_list = tft.FloatList(value = data[:,15]))
    vy3 = tft.Feature(float_list = tft.FloatList(value = data[:,16]))
    vz3 = tft.Feature(float_list = tft.FloatList(value = data[:,17]))

    x4  = tft.Feature(float_list = tft.FloatList(value = data[:,18]))
    y4  = tft.Feature(float_list = tft.FloatList(value = data[:,19]))
    z4  = tft.Feature(float_list = tft.FloatList(value = data[:,20]))
    vx4 = tft.Feature(float_list = tft.FloatList(value = data[:,21]))
    vy4 = tft.Feature(float_list = tft.FloatList(value = data[:,22]))
    vz4 = tft.Feature(float_list = tft.FloatList(value = data[:,23]))

    label = tft.Feature(int64_list = tft.Int64List(value = data[:,24].astype(int)))
    time  = tft.Feature(int64_list = tft.Int64List(value = data[:,25].astype(int)))
    sats  = tft.Feature(int64_list = tft.Int64List(value = data[:,26].astype(int)))



    feature_dict = {'x1':x1,'y1':y1,'z1':z1,'vx1':vx1,'vy1':vy1,'vz1':vz1,
                    'x2':x2,'y2':y2,'z2':z2,'vx2':vx2,'vy2':vy2,'vz2':vz2,
                    'x3':x3,'y3':y3,'z3':z3,'vx3':vx3,'vy3':vy3,'vz3':vz3,
                    'x4':x4,'y4':y4,'z4':z4,'vx4':vx4,'vy4':vy4,'vz4':vz4,
                    'label':label,'time':time,'sats':sats}
    features = tft.Features(feature = feature_dict)
    DataExample = tft.Example(features = features)

    with tf.python_io.TFRecordWriter(filePath+fileName) as writer:
        writer.write(DataExample.SerializeToString())
    return


In [6]:
filePathWrite = '/content/drive/My Drive/Data/Isolation/Training/'

with open(filePathWrite+"FaultSevRegister.csv",'r') as f:
    register = pd.read_csv(f,names=['fileName','index','faultSev'])
sortReg = register.sort_values(by='faultSev')
high = sortReg[sortReg['faultSev']>0.8]
sortedNames = sortReg['fileName'].reset_index(drop=True)
print(sortedNames[0])
print(sortedNames)
print(high)


/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_42_739.csv
0        /content/drive/My Drive/DataRaw/Isolation/Trai...
1        /content/drive/My Drive/DataRaw/Isolation/Trai...
2        /content/drive/My Drive/DataRaw/Detection/Trai...
3        /content/drive/My Drive/DataRaw/Isolation/Trai...
4        /content/drive/My Drive/DataRaw/Detection/Trai...
                               ...                        
64825    /content/drive/My Drive/DataRaw/Detection/Trai...
64826    /content/drive/My Drive/DataRaw/Detection/Trai...
64827    /content/drive/My Drive/DataRaw/Detection/Trai...
64828    /content/drive/My Drive/DataRaw/Detection/Trai...
64829    /content/drive/My Drive/DataRaw/Detection/Trai...
Name: fileName, Length: 64830, dtype: object
                                                fileName  index  faultSev
3259   /content/drive/My Drive/DataRaw/Detection/Test...   3259   0.80001
1603   /content/drive/My Drive/DataRaw/Detection/Test...   1603   0.80001
54766 

In [7]:
highVP = sortReg[sortReg['faultSev']>0.995]
highVPlist = list(highVP['fileName'])
for fileName in highVPlist:
    if "ClosedFault" in fileName:
        if "_21_" in fileName:
           print(fileName)
           #files.download(fileName)

downOF11 = "/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_11_852.csv"
downOF21 = "/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_21_506.csv"
downCF11 = "/content/drive/My Drive/DataRaw/Isolation/Training/DataClosedFault4N_11_852.csv"
downCF21 = "/content/drive/My Drive/DataRaw/Detection/Training/DataClosedFault4N_21_359.csv"
downNF = "/content/drive/My Drive/DataRaw/Detection/Training/DataNoFault4N_1.csv"
#files.download(downOF11)
#files.download(downOF21)
#files.download(downCF11)
#files.download(downCF21)
#files.download(downNF)

/content/drive/My Drive/DataRaw/Isolation/Training/DataClosedFault4N_21_703.csv
/content/drive/My Drive/DataRaw/Detection/Training/DataClosedFault4N_21_90.csv
/content/drive/My Drive/DataRaw/Isolation/Training/DataClosedFault4N_21_506.csv
/content/drive/My Drive/DataRaw/Detection/Training/DataClosedFault4N_21_359.csv


In [8]:
seed = 6 
percent = 24
nSims = 64830
high = sortReg[sortReg['faultSev']>0.8]
low = sortReg[sortReg['faultSev']<0.3]
mid =  sortReg[(sortReg['faultSev']>0.3) & (sortReg['faultSev'] < 0.8)]
indicesHigh = high['index'].to_numpy()
indicesLow = low['index'].to_numpy()
indicesMid = mid['index'].to_numpy()


highVals = np.random.choice(indicesHigh,int(0.4*0.01*percent*nSims),replace =False).tolist()
lowVals = np.random.choice(indicesLow,int(0.4*0.01*percent*nSims),replace =False).tolist()
midVals = np.random.choice(indicesMid,int(0.2*0.01*percent*nSims),replace =False).tolist()
comb = highVals+lowVals+midVals

# Get a different 10 % not already included


In [9]:
files = register.iloc[comb]
fileNames = files['fileName'].to_numpy()
faultClosedOcc = []
faultOpenOcc = []
for file in fileNames:
    closedFound = re.search('ClosedFault4N_(.+?)_',file)
    if closedFound:
        found = closedFound.group(1)
        faultClosedOcc.append(int(found))
    openFound = re.search('OpenFault4N_(.+?)_',file)
    if openFound:
        found = openFound.group(1)
        faultOpenOcc.append(int(found))
print(len(faultClosedOcc))
print(len(faultOpenOcc))
print(Counter(faultClosedOcc))
print(Counter(faultOpenOcc))

7822
7735
Counter({26: 233, 56: 232, 14: 230, 61: 229, 12: 226, 46: 225, 11: 225, 51: 224, 62: 224, 24: 223, 64: 222, 44: 221, 42: 220, 34: 220, 15: 220, 13: 218, 63: 217, 55: 217, 25: 216, 16: 216, 23: 216, 41: 215, 65: 215, 35: 214, 43: 213, 36: 213, 31: 212, 53: 212, 45: 212, 32: 211, 21: 210, 54: 208, 52: 207, 66: 203, 33: 202, 22: 201})
Counter({23: 244, 14: 235, 35: 235, 62: 234, 65: 234, 25: 228, 64: 226, 63: 225, 43: 224, 33: 223, 53: 222, 34: 222, 22: 222, 42: 222, 51: 219, 56: 215, 13: 214, 15: 214, 21: 213, 66: 212, 52: 210, 61: 208, 44: 208, 31: 207, 36: 206, 26: 205, 55: 205, 45: 204, 11: 204, 24: 203, 12: 203, 46: 201, 32: 201, 54: 197, 41: 197, 16: 193})


In [None]:
# Seed is 6 DO NOT CHANGE
seed = 6
np.random.seed(seed)
fileList = np.random.permutation(fileNames)
print(fileList)

['/content/drive/My Drive/DataRaw/Detection/Training/DataOpenFault4N_11_212.csv'
 '/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_62_867.csv'
 '/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_15_659.csv'
 ...
 '/content/drive/My Drive/DataRaw/Isolation/Training/DataClosedFault4N_34_861.csv'
 '/content/drive/My Drive/DataRaw/Detection/Training/DataOpenFault4N_35_210.csv'
 '/content/drive/My Drive/DataRaw/Isolation/Training/DataOpenFault4N_24_547.csv']


In [None]:
import os
# Test
fileMissing = '/content/drive/My Drive/DataRaw/Isolation/Training/DataClosedFault4N_45_576.csv'
print(os.path.isfile(fileMissing))

True


In [None]:
nFiles = len(fileList)
nPerRecord = 200
start = 0
filePathWrite = '/content/drive/My Drive/Data/Isolation/Training/'
# Read start and beginning from file register
with open(filePathWrite+"FileRegister.csv",'r') as f:
    register = np.genfromtxt(f, delimiter=',').astype(int)
if not np.shape(register) == (3,):
    start = register[-1,1]
    fileIndex = register[-1,2] + 1
else:
    start = 0
    fileIndex = 0
start = 0
print(register)
fileIndex = 0
end = start + nPerRecord
tS = time.perf_counter()
durAverage = 0
fileNameWrite = "TrainCorrected_"
while end  < nFiles:
    print("Reading from {0} to {1}".format(start,end) )
    fileNameRead = fileList[start]
    data = process(fileNameRead)
    filesLeft = end < nFiles
    for fileNameRead in fileList[start+1: end if filesLeft else None]:
       data = np.concatenate((data,process(fileNameRead)),axis = 0)

    # Write TFRecord file 
    fileName = fileNameWrite + str(fileIndex) + ".tfrecord"
    create_tfrecord(filePathWrite,fileName,data)
    # Uodate File Register
    with open(filePathWrite+'FileRegister.csv','a') as f:
        update = str(start)+ ',' + str(end) + ','+ str(fileIndex) + '\n' 
        f.write(update)
    start = end
    end = start + nPerRecord
    tE = time.perf_counter()
    dur = tE-tS
    if durAverage == 0 :
        durAverage = dur
    else:
        durAverage = 0.9*durAverage +0.1*dur 
    tS = tE
    fileIndex += 1
    print("Written File " + fileName)
    print("Average time per file: {}".format(durAverage))
    print("Estimated minutes left: {}".format((nFiles-start)/(nPerRecord/durAverage)/60))



[[    0     0     0]
 [    0   200     0]
 [  200   400     1]
 [  400   600     2]
 [  600   800     3]
 [  800  1000     4]
 [ 1000  1200     5]
 [ 1200  1400     6]
 [ 1400  1600     7]
 [ 1600  1800     8]
 [ 1800  2000     9]
 [ 2000  2200    10]
 [ 2200  2400    11]
 [ 2400  2600    12]
 [ 2600  2800    13]
 [ 2800  3000    14]
 [ 3000  3200    15]
 [ 3200  3400    16]
 [ 3400  3600    17]
 [ 3600  3800    18]
 [ 3800  4000    19]
 [ 4000  4200    20]
 [ 4200  4400    21]
 [ 4400  4600    22]
 [ 4600  4800    23]
 [ 4800  5000    24]
 [ 5000  5200    25]
 [ 5200  5400    26]
 [ 5400  5600    27]
 [ 5600  5800    28]
 [ 5800  6000    29]
 [ 6000  6200    30]
 [ 6200  6400    31]
 [ 6400  6600    32]
 [ 6600  6800    33]
 [ 6800  7000    34]
 [ 7000  7200    35]
 [ 7200  7400    36]
 [ 7400  7600    37]
 [ 7600  7800    38]
 [ 7800  8000    39]
 [ 8000  8200    40]
 [ 8200  8400    41]
 [ 8400  8600    42]
 [ 8600  8800    43]
 [ 8800  9000    44]
 [ 9000  9200    45]
 [ 9200  9400

KeyboardInterrupt: ignored

In [None]:
register

Creating Fault Severity Indexing File

In [None]:
path1 = '/content/drive/My Drive/DataRaw/Detection/Training/'
path2 = '/content/drive/My Drive/DataRaw/Detection/Testing/'
path3 = '/content/drive/My Drive/DataRaw/Isolation/Training/'
openPattern1   = path1 + 'DataOpenFault4N_*.csv'
closedPattern1 = path1 + 'DataClosedFault4N_*.csv'

openPattern2   = path2 + 'DataOpenFault4N_*.csv'
closedPattern2 = path2 + 'DataClosedFault4N_*.csv'

openPattern3   = path3 + 'DataOpenFault4N_*.csv'
closedPattern3 = path3 + 'DataClosedFault4N_*.csv'
globPatterns = [openPattern1,closedPattern1,openPattern2,closedPattern2,openPattern3,closedPattern3]
# Seed is 6 DO NOT CHANGE
seed = 6

# Define model parameters
# Create Dataset
listdir = tf.io.gfile.glob(globPatterns)
print(len(listdir))
fileList = np.sort(listdir)
print(fileList[:5])

nFiles = len(fileList)

filePathWrite = '/content/drive/My Drive/Data/Isolation/Training/'
# Read start and beginning from file register
with open(filePathWrite+"FaultSevRegister.csv",'r') as f:
    register = np.genfromtxt(f, delimiter=',').astype(int)
if not np.shape(register) == (3,):
    startIndex = register[-1,1] +1
else:
    startIndex = 0
print(startIndex)

faultSeverity = 0 
tS = time.perf_counter()
durAverage = 0
fileCounter = 0 
with open(filePathWrite+'FaultSevRegister.csv','a') as register:
    for fileIndex,fileNameRead in enumerate(fileList[startIndex:]):
        with open(fileNameRead,'r') as f:
            data = np.genfromtxt(f,delimiter = ",")
            info = data[0,:]
            faultSeverity = info[4]
        # Uodate Fault Severity Register
        update = fileNameRead+ ','+ str(fileIndex+startIndex)+ ',' + str(faultSeverity)+ '\n' 
        register.write(update)
        tE = time.perf_counter()
        dur = tE-tS
        alpha = 0.99
        if durAverage == 0 :
            durAverage = dur
        else:
            durAverage = alpha * durAverage + (1-alpha) * dur 
        tS = tE
        print("Read File with Index: {}".format(fileIndex+startIndex))
        print("Average time per file: {}".format(durAverage))
        print("Estimated minutes left: {}".format((nFiles-(fileIndex+startIndex))*durAverage/60 ))
    
        if fileCounter >= 9:
            clear_output()
            fileCounter = 0
        else: 
            fileCounter +=1
