In [1]:
import csv
import tensorflow as tf
from tqdm import tqdm

In [5]:
# define some helper functions
from tensorflow.train import BytesList, Int64List, FloatList

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=BytesList(value=[value.encode()]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=Int64List(value=[value.encode()]))


def _float_feature(value):
    return tf.train.Feature(float_list=FloatList(value=[value.encode()]))


In [3]:
! head {'/content/AAPL.csv'}

Date,Open,High,Low,Close,Adj Close,Volume
2015-10-29,29.674999,30.172501,29.567499,30.132500,27.667913,204909200
2015-10-30,30.247499,30.305000,29.862499,29.875000,27.431473,197461200
2015-11-02,30.200001,30.340000,29.902500,30.295000,27.817125,128813200
2015-11-03,30.197500,30.872499,30.174999,30.642500,28.136196,182076000
2015-11-04,30.782499,30.955000,30.405001,30.500000,28.005352,179544400
2015-11-05,30.462500,30.672501,30.045000,30.230000,27.876257,158210800
2015-11-06,30.277500,30.452499,30.155001,30.264999,27.908531,132169200
2015-11-09,30.240000,30.452499,30.012501,30.142500,27.795572,135485600
2015-11-10,29.225000,29.517500,29.014999,29.192499,26.919538,236511600


In [6]:
from tensorflow.train import Example, Features

original_filename = '/content/AAPL.csv'
tfrecords_filename = '/content/apple_stocks.tfrecords'

tf_record_writer = tf.io.TFRecordWriter(tfrecords_filename)

with open(original_filename) as csv_file:
    reader = csv.DictReader(csv_file, delimiter=',', quotechar='"')
    for row in tqdm(reader):
        example = Example(features=Features(feature={
            'date': _bytes_feature(row['Date']), 
            'open': _bytes_feature(row['Open']), 
            'high': _bytes_feature(row['High']), 
            'low': _bytes_feature(row['Low']), 
            'close': _bytes_feature(row['Close']),
            'adj close': _bytes_feature(row['Adj Close']),
            'volume': _bytes_feature(row['Volume'])

        }))
        tf_record_writer.write(example.SerializeToString())
    tf_record_writer.close()

1510it [00:00, 12759.79it/s]


In [7]:
# let's read the dataset
paths = ['/content/apple_stocks.tfrecords']
dataset = tf.data.TFRecordDataset(paths)

In [8]:
for row in dataset:
    print(row.numpy())

b'\n\xa9\x01\n\x16\n\x04date\x12\x0e\n\x0c\n\n2015-10-29\n\x16\n\x05close\x12\r\n\x0b\n\t30.132500\n\x15\n\x04high\x12\r\n\x0b\n\t30.172501\n\x15\n\x04open\x12\r\n\x0b\n\t29.674999\n\x17\n\x06volume\x12\r\n\x0b\n\t204909200\n\x14\n\x03low\x12\r\n\x0b\n\t29.567499\n\x1a\n\tadj close\x12\r\n\x0b\n\t27.667913'
b'\n\xa9\x01\n\x15\n\x04open\x12\r\n\x0b\n\t30.247499\n\x1a\n\tadj close\x12\r\n\x0b\n\t27.431473\n\x16\n\x04date\x12\x0e\n\x0c\n\n2015-10-30\n\x15\n\x04high\x12\r\n\x0b\n\t30.305000\n\x16\n\x05close\x12\r\n\x0b\n\t29.875000\n\x14\n\x03low\x12\r\n\x0b\n\t29.862499\n\x17\n\x06volume\x12\r\n\x0b\n\t197461200'
b'\n\xa9\x01\n\x16\n\x04date\x12\x0e\n\x0c\n\n2015-11-02\n\x15\n\x04open\x12\r\n\x0b\n\t30.200001\n\x16\n\x05close\x12\r\n\x0b\n\t30.295000\n\x17\n\x06volume\x12\r\n\x0b\n\t128813200\n\x1a\n\tadj close\x12\r\n\x0b\n\t27.817125\n\x15\n\x04high\x12\r\n\x0b\n\t30.340000\n\x14\n\x03low\x12\r\n\x0b\n\t29.902500'
b'\n\xa9\x01\n\x14\n\x03low\x12\r\n\x0b\n\t30.174999\n\x15\n\x04high\x12\