# Make tfRecords

In [None]:
import pandas as pd
import tensorflow as tf

## Load data

In [None]:
X_train = pd.read_csv("X_train.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv")
y_test = pd.read_csv("y_test.csv")

input_feat_names = X_train.columns.values.tolist()

In [None]:
print(X_train.shape)
X_train.head()

In [None]:
print(y_train.shape)
y_train.head()

In [None]:
print(X_test.shape)
X_train.head()

In [None]:
print(y_test.shape)
y_test.head()

## Make train tf records 

In [None]:
def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def serialize_example(target, data, feat_names):
    """
    Creates a tf.train.Example message ready to be written to a file.
    """
    # Create a dictionary mapping the feature name to the tf.train.Example-compatible
    # data type.
    data = [_int64_feature(target)]+[_float_feature(i) for i in data]
    feature = dict(zip(["target"]+input_feat_names,data))

    # Create a Features message using tf.train.Example.
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

In [None]:
filename = "tfRecord/train.tfrecord"
with tf.io.TFRecordWriter(filename) as writer:
    for i in range(X_train.shape[0]):
        features, label = X_train.iloc[i,:].values.tolist(), int(y_train.iloc[i].values)
        example = tf.train.Example()
        example.features.feature["features"].float_list.value.extend(features)
        example.features.feature["label"].int64_list.value.append(label)
        writer.write(example.SerializeToString())

## Make test tf records 

In [None]:
filename = "tfRecord/test.tfrecord"
with tf.io.TFRecordWriter(filename) as writer:
    for i in range(X_test.shape[0]):
        features, label = X_test.iloc[i,:].values.tolist(), int(y_test.iloc[i].values)
        example = tf.train.Example()
        example.features.feature["features"].float_list.value.extend(features)
        example.features.feature["label"].int64_list.value.append(label)
        writer.write(example.SerializeToString())

## Test train tf records

In [None]:
filenames = ["tfRecord/train.tfrecord"]
raw_dataset = tf.data.TFRecordDataset(filenames)

In [None]:
for raw_record in raw_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

In [None]:
# Decoding function
def parse_record(record):
    name_to_features = {
        'features': tf.io.FixedLenFeature([95], tf.float32),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }
    return tf.io.parse_single_example(record, name_to_features)

def decode_record(record):
    features = record['features']
    target = record['label']
    return (features,target)

In [None]:
for raw_record in raw_dataset.take(1):
    parsed_record = parse_record(raw_record)
    decoded_record = decode_record(parsed_record)
    X, y = decoded_record
    print(X,y)

## Test test tf records

In [None]:
filenames = ["tfRecord/test.tfrecord"]
raw_dataset = tf.data.TFRecordDataset(filenames)

In [None]:
for raw_record in raw_dataset.take(1):
    parsed_record = parse_record(raw_record)
    decoded_record = decode_record(parsed_record)
    X, y = decoded_record
    print(X,y)