# Verification of TFRecord writer

# Setup

In [1]:
!pip install tensorflow==1.15.0

Collecting tensorflow==1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/3f/98/5a99af92fb911d7a88a0005ad55005f35b4c1ba8d75fba02df726cd936e6/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (412.3MB)
[K     |████████████████████████████████| 412.3MB 38kB/s 
Collecting keras-applications>=1.0.8
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
[K     |████████████████████████████████| 51kB 3.2MB/s 
Collecting tensorboard<1.16.0,>=1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/1e/e9/d3d747a97f7188f48aa5eda486907f3b345cd409f0a0850468ba867db246/tensorboard-1.15.0-py3-none-any.whl (3.8MB)
[K     |████████████████████████████████| 3.8MB 32.0MB/s 
Collecting tensorflow-estimator==1.15.1
[?25l  Downloading https://files.pythonhosted.org/packages/de/62/2ee9cd74c9fa2fa450877847ba560b260f5d0fb70ee0595203082dafcc9d/tensorflow_est

Package Import and Dependencies


In [2]:
import tensorflow as tf
import tensorflow.train as tft
import numpy as np
import pandas as pd
import io
import os
import shutil as sh
import pickle
from pathlib import Path
import random
# Authentication for Managing Data
from google.colab import drive
drive.mount('/content/drive')
tf.enable_eager_execution()

Mounted at /content/drive


In [3]:
rootPath = '/content/drive/My Drive/'
register = np.zeros(1)
while not np.any(register):
    try:
        with open(rootPath + 'DataRaw/Detection/Training/DataNoFault4N_32.csv','r') as f:
            register = np.genfromtxt(f,delimiter = ",")
    except:
        pass
np.shape(register)


(141649, 6)

Function Definitions

In [4]:
def process(fileName):
    # Load correct File
    with tf.io.gfile.GFile(fileName,'r') as f:
        data = np.genfromtxt(f,delimiter = ",")
    # Seperate Relative Position & Velocity data
    info = data[0,:]
    settleIndex = 300
    faultTime = np.ceil(info[0])
    if faultTime<1:
        faultTime = 1e10
    faultSat = info[1]
    faultThruster = info[2]
    faultType = info[3]

    posvelData = data[1:,:]
    noRows = np.size(posvelData, 0)//24 - settleIndex
    noCols = 6
    dataSat1 = np.zeros([noRows, 4*noCols])
    dataSat2 = np.zeros([noRows, 4*noCols])
    dataSat3 = np.zeros([noRows, 4*noCols])
    dataSat4 = np.zeros([noRows, 4*noCols])
    dataSat5 = np.zeros([noRows, 4*noCols])
    dataSat6 = np.zeros([noRows, 4*noCols])
    
    faultLabel = np.zeros([noRows, 1])
    faultLabel[np.where(np.arange(noRows)>faultTime-settleIndex)] = 1
    settleOffset = settleIndex * 24 
    dataSat1[:,0*noCols:1*noCols] = posvelData[settleOffset + 0::24,0:noCols]
    dataSat1[:,1*noCols:2*noCols] = posvelData[settleOffset + 1::24,0:noCols]
    dataSat1[:,2*noCols:3*noCols] = posvelData[settleOffset + 2::24,0:noCols]
    dataSat1[:,3*noCols:4*noCols] = posvelData[settleOffset + 3::24,0:noCols]

    dataSat2[:,0*noCols:1*noCols] = posvelData[settleOffset + 4::24,0:noCols]
    dataSat2[:,1*noCols:2*noCols] = posvelData[settleOffset + 5::24,0:noCols]
    dataSat2[:,2*noCols:3*noCols] = posvelData[settleOffset + 6::24,0:noCols]
    dataSat2[:,3*noCols:4*noCols] = posvelData[settleOffset + 7::24,0:noCols]

    dataSat3[:,0*noCols:1*noCols] = posvelData[settleOffset + 8::24,0:noCols]
    dataSat3[:,1*noCols:2*noCols] = posvelData[settleOffset + 9::24,0:noCols]
    dataSat3[:,2*noCols:3*noCols] = posvelData[settleOffset + 10::24,0:noCols]
    dataSat3[:,3*noCols:4*noCols] = posvelData[settleOffset + 11::24,0:noCols]

    dataSat4[:,0*noCols:1*noCols] = posvelData[settleOffset + 12::24,0:noCols]
    dataSat4[:,1*noCols:2*noCols] = posvelData[settleOffset + 13::24,0:noCols]
    dataSat4[:,2*noCols:3*noCols] = posvelData[settleOffset + 14::24,0:noCols]
    dataSat4[:,3*noCols:4*noCols] = posvelData[settleOffset + 15::24,0:noCols]

    dataSat5[:,0*noCols:1*noCols] = posvelData[settleOffset + 16::24,0:noCols]
    dataSat5[:,1*noCols:2*noCols] = posvelData[settleOffset + 17::24,0:noCols]
    dataSat5[:,2*noCols:3*noCols] = posvelData[settleOffset + 18::24,0:noCols]
    dataSat5[:,3*noCols:4*noCols] = posvelData[settleOffset + 19::24,0:noCols]

    dataSat6[:,0*noCols:1*noCols] = posvelData[settleOffset + 20::24,0:noCols]
    dataSat6[:,1*noCols:2*noCols] = posvelData[settleOffset + 21::24,0:noCols]
    dataSat6[:,2*noCols:3*noCols] = posvelData[settleOffset + 22::24,0:noCols]
    dataSat6[:,3*noCols:4*noCols] = posvelData[settleOffset + 23::24,0:noCols]

    sats = np.arange(6)
    sats = np.repeat(sats,noRows)
    sats = sats.reshape(-1,1)
    time = np.arange(settleIndex,noRows+settleIndex)

    time = np.tile(time,6).reshape(-1,1)

    data = np.concatenate((dataSat1,dataSat2,dataSat3,dataSat4,dataSat5,dataSat6),0)
    labels = np.concatenate((faultLabel,faultLabel,faultLabel,faultLabel,faultLabel,faultLabel),0)
    data = np.concatenate((data,labels,time,sats),axis = 1)

    return data # ds3.map(lambda a,b,c: (a,b)), ds3.map(lambda a,b,c: c)
 

In [5]:
def create_tfrecord(filePath,fileName, data):
    # Create tfrecord
    
    header = ['x1','y1','z1','vx1','vy1','vz1',
                'x2','y2','z2','vx2','vy2','vz2',
                'x3','y3','z3','vx3','vy3','vz3',
                'x4','y4','z4','vx4','vy4','vz4',
                'label','time','sat']
    # Create dict
    x1  = tft.Feature(float_list = tft.FloatList(value = data[:,0]))
    y1  = tft.Feature(float_list = tft.FloatList(value = data[:,1]))
    z1  = tft.Feature(float_list = tft.FloatList(value = data[:,2]))
    vx1 = tft.Feature(float_list = tft.FloatList(value = data[:,3]))
    vy1 = tft.Feature(float_list = tft.FloatList(value = data[:,4]))
    vz1 = tft.Feature(float_list = tft.FloatList(value = data[:,5]))

    x2  = tft.Feature(float_list = tft.FloatList(value = data[:,6]))
    y2  = tft.Feature(float_list = tft.FloatList(value = data[:,7]))
    z2  = tft.Feature(float_list = tft.FloatList(value = data[:,8]))
    vx2 = tft.Feature(float_list = tft.FloatList(value = data[:,9]))
    vy2 = tft.Feature(float_list = tft.FloatList(value = data[:,10]))
    vz2 = tft.Feature(float_list = tft.FloatList(value = data[:,11]))

    x3  = tft.Feature(float_list = tft.FloatList(value = data[:,12]))
    y3  = tft.Feature(float_list = tft.FloatList(value = data[:,13]))
    z3  = tft.Feature(float_list = tft.FloatList(value = data[:,14]))
    vx3 = tft.Feature(float_list = tft.FloatList(value = data[:,15]))
    vy3 = tft.Feature(float_list = tft.FloatList(value = data[:,16]))
    vz3 = tft.Feature(float_list = tft.FloatList(value = data[:,17]))

    x4  = tft.Feature(float_list = tft.FloatList(value = data[:,18]))
    y4  = tft.Feature(float_list = tft.FloatList(value = data[:,19]))
    z4  = tft.Feature(float_list = tft.FloatList(value = data[:,20]))
    vx4 = tft.Feature(float_list = tft.FloatList(value = data[:,21]))
    vy4 = tft.Feature(float_list = tft.FloatList(value = data[:,22]))
    vz4 = tft.Feature(float_list = tft.FloatList(value = data[:,23]))

    label = tft.Feature(int64_list = tft.Int64List(value = data[:,24].astype(int)))
    time  = tft.Feature(int64_list = tft.Int64List(value = data[:,25].astype(int)))
    sats  = tft.Feature(int64_list = tft.Int64List(value = data[:,26].astype(int)))



    feature_dict = {'x1':x1,'y1':y1,'z1':z1,'vx1':vx1,'vy1':vy1,'vz1':vz1,
                    'x2':x2,'y2':y2,'z2':z2,'vx2':vx2,'vy2':vy2,'vz2':vz2,
                    'x3':x3,'y3':y3,'z3':z3,'vx3':vx3,'vy3':vy3,'vz3':vz3,
                    'x4':x4,'y4':y4,'z4':z4,'vx4':vx4,'vy4':vy4,'vz4':vz4,
                    'label':label,'time':time,'sats':sats}
    features = tft.Features(feature = feature_dict)
    DataExample = tft.Example(features = features)

    with tf.python_io.TFRecordWriter(filePath+fileName) as writer:
        writer.write(DataExample.SerializeToString())
    return

In [6]:
def decode_TFRecord(exampleProto):
# Read TFRecord file
    # Define features
    featureDescription = {
        'x1': tf.VarLenFeature(dtype=tf.float32),
        'y1': tf.VarLenFeature(dtype=tf.float32),
        'z1': tf.VarLenFeature(dtype=tf.float32),
        'vx1': tf.VarLenFeature(dtype=tf.float32),
        'vy1': tf.VarLenFeature(dtype=tf.float32),
        'vz1':tf.VarLenFeature(dtype=tf.float32),
        'x2': tf.VarLenFeature(dtype=tf.float32),
        'y2': tf.VarLenFeature(dtype=tf.float32),
        'z2': tf.VarLenFeature(dtype=tf.float32),
        'vx2': tf.VarLenFeature(dtype=tf.float32),
        'vy2': tf.VarLenFeature(dtype=tf.float32),
        'vz2':tf.VarLenFeature(dtype=tf.float32),
        'x3': tf.VarLenFeature(dtype=tf.float32),
        'y3': tf.VarLenFeature(dtype=tf.float32),
        'z3': tf.VarLenFeature(dtype=tf.float32),
        'vx3': tf.VarLenFeature(dtype=tf.float32),
        'vy3': tf.VarLenFeature(dtype=tf.float32),
        'vz3': tf.VarLenFeature(dtype=tf.float32),
        'x4': tf.VarLenFeature(dtype=tf.float32),
        'y4': tf.VarLenFeature(dtype=tf.float32),
        'z4': tf.VarLenFeature(dtype=tf.float32),
        'vx4': tf.VarLenFeature(dtype=tf.float32),
        'vy4': tf.VarLenFeature(dtype=tf.float32),
        'vz4': tf.VarLenFeature(dtype=tf.float32),
        'label': tf.VarLenFeature(dtype=tf.int64),
        'time': tf.VarLenFeature(dtype=tf.int64),
        'sats': tf.VarLenFeature(dtype=tf.int64)}

    # Extract features from serialized data
    return  tf.io.parse_single_example(exampleProto, featureDescription)



The procedure is checked by reading a csv date file, using it to write a TFRecord file. The written TFRecord file is then parsed again and compared to the original data.

In [7]:
filePathCheck = rootPath + 'DataRaw/Detection/Training/'
fileName = "DataOpenFault4N_66_404.csv"

In [8]:
# Read Data from Test file
data = process(filePathCheck + fileName)
# Create TFRecord file
filePathWrite = rootPath + "Colab Notebooks/Verification/"
fileNameCheck = 'Test_0.tfrecord'
create_tfrecord(filePathWrite, fileNameCheck,data)

In [42]:
# Read Created TFRecord File

readSet = tf.data.TFRecordDataset(filePathWrite+ fileNameCheck)
# Define features
read_features = {
    'x1': tf.VarLenFeature(dtype=tf.float32),
    'y1': tf.VarLenFeature(dtype=tf.float32),
    'z1': tf.VarLenFeature(dtype=tf.float32),
    'vx1': tf.VarLenFeature(dtype=tf.float32),
    'vy1': tf.VarLenFeature(dtype=tf.float32),
    'vz1':tf.VarLenFeature(dtype=tf.float32),
    'x2': tf.VarLenFeature(dtype=tf.float32),
    'y2': tf.VarLenFeature(dtype=tf.float32),
    'z2': tf.VarLenFeature(dtype=tf.float32),
    'vx2': tf.VarLenFeature(dtype=tf.float32),
    'vy2': tf.VarLenFeature(dtype=tf.float32),
    'vz2':tf.VarLenFeature(dtype=tf.float32),
    'x3': tf.VarLenFeature(dtype=tf.float32),
    'y3': tf.VarLenFeature(dtype=tf.float32),
    'z3': tf.VarLenFeature(dtype=tf.float32),
    'vx3': tf.VarLenFeature(dtype=tf.float32),
    'vy3': tf.VarLenFeature(dtype=tf.float32),
    'vz3':tf.VarLenFeature(dtype=tf.float32),
    'x4': tf.VarLenFeature(dtype=tf.float32),
    'y4': tf.VarLenFeature(dtype=tf.float32),
    'z4': tf.VarLenFeature(dtype=tf.float32),
    'vx4': tf.VarLenFeature(dtype=tf.float32),
    'vy4': tf.VarLenFeature(dtype=tf.float32),
    'vz4':tf.VarLenFeature(dtype=tf.float32),
    'label': tf.VarLenFeature(dtype=tf.int64),
    'time': tf.VarLenFeature(dtype=tf.int64),
    'sats': tf.VarLenFeature(dtype=tf.int64)}

# Extract features from serialized data
for s in readSet.take(1):
    feature = tf.parse_single_example(s,features = read_features)

# Print features
x1read = tf.sparse.to_dense(feature['x1']).numpy()
x1write = data[:,0]
print(np.sum(np.abs(x1read-x1write)/len(x1read)))


1.738364150070746e-08
