# TensorFlow . tf.data - Build TensorFlow input pipelines

In [1]:
import tensorflow as tf

import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.set_printoptions(precision=4)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

In [None]:
for elem in dataset:
  print(elem.numpy())

In [None]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4]))

dataset1.element_spec

In [None]:
for elem in dataset1:
  print(elem.numpy())

In [None]:
directory_url = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
file_names = ['cowper.txt', 'derby.txt', 'butler.txt']

file_paths = [
    tf.keras.utils.get_file(file_name, directory_url + file_name)
    for file_name in file_names
]

In [None]:
dataset = tf.data.TextLineDataset(file_paths)

In [None]:
for line in dataset.take(5):
  print(line.numpy())

In [None]:
file_paths

In [2]:
titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv


In [3]:
df = pd.read_csv(titanic_file)
df.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [7]:
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df))

for feature_batch in titanic_slices.take(2):
  for key, value in feature_batch.items():
    print("  {!r:20s}: {}".format(key, value))

  'survived'          : 0
  'sex'               : b'male'
  'age'               : 22.0
  'n_siblings_spouses': 1
  'parch'             : 0
  'fare'              : 7.25
  'class'             : b'Third'
  'deck'              : b'unknown'
  'embark_town'       : b'Southampton'
  'alone'             : b'n'
  'survived'          : 1
  'sex'               : b'female'
  'age'               : 38.0
  'n_siblings_spouses': 1
  'parch'             : 0
  'fare'              : 71.2833
  'class'             : b'First'
  'deck'              : b'C'
  'embark_town'       : b'Cherbourg'
  'alone'             : b'n'


In [6]:
feature_batch

{'survived': <tf.Tensor: shape=(), dtype=int64, numpy=0>,
 'sex': <tf.Tensor: shape=(), dtype=string, numpy=b'male'>,
 'age': <tf.Tensor: shape=(), dtype=float64, numpy=22.0>,
 'n_siblings_spouses': <tf.Tensor: shape=(), dtype=int64, numpy=1>,
 'parch': <tf.Tensor: shape=(), dtype=int64, numpy=0>,
 'fare': <tf.Tensor: shape=(), dtype=float64, numpy=7.25>,
 'class': <tf.Tensor: shape=(), dtype=string, numpy=b'Third'>,
 'deck': <tf.Tensor: shape=(), dtype=string, numpy=b'unknown'>,
 'embark_town': <tf.Tensor: shape=(), dtype=string, numpy=b'Southampton'>,
 'alone': <tf.Tensor: shape=(), dtype=string, numpy=b'n'>}

In [8]:
"{0!r:20}".format("Hello")  

"'Hello'             "

In [10]:
titanic_batches = tf.data.experimental.make_csv_dataset(
    titanic_file, batch_size=4,
    label_name="survived", select_columns=['class', 'fare', 'survived'])

In [16]:
for feature_batch, label_batch in titanic_batches.take(1):
  print("'survived': {}".format(label_batch))
  for key, value in feature_batch.items():
    print("  {!r:20s}: {}".format(key, value))
    

'survived': [1 0 0 0]
  'fare'              : [ 7.925  20.2125  7.8958 31.275 ]
  'class'             : [b'Third' b'Third' b'Third' b'Third']


## Source

https://www.tensorflow.org/guide/data

https://www.tensorflow.org/tutorials/keras/text_classification

https://www.tensorflow.org/api_docs/python/tf/data/Dataset

https://www.tensorflow.org/tutorials/load_data/text

https://www.tensorflow.org/api_docs/python/tf/keras/utils/get_file

https://www.tensorflow.org/tutorials/load_data/csv

https://www.tensorflow.org/tutorials/load_data/pandas_dataframe