<a href="https://colab.research.google.com/github/KeerthiVasan-ai/deep-learning-tools-lab/blob/main/1_Data_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

import os
import pathlib

## Dataset from List

In [2]:
dataset = tf.data.Dataset.from_tensor_slices(
    [1,2,3]
)
for element in dataset:
  print(element)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)


In [4]:
dataset = tf.data.Dataset.from_tensors(
    [1,2,3]
)
for element in dataset:
  print(element)

tf.Tensor([1 2 3], shape=(3,), dtype=int32)


### Dataset from TextFile

In [6]:
directory_url = "https://storage.googleapis.com/download.tensorflow.org/data/illiad/"
file_names = ['cowper.txt','derby.txt','butler.txt']

file_paths = [
    tf.keras.utils.get_file(file_name,directory_url+file_name)
    for file_name in file_names
]

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt


In [7]:
text_line_dataset = tf.data.TextLineDataset(file_paths)

for line in text_line_dataset.take(5):
  print(line.numpy())

b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;"
b'His wrath pernicious, who ten thousand woes'
b"Caused to Achaia's host, sent many a soul"
b'Illustrious into Ades premature,'
b'And Heroes gave (so stood the will of Jove)'


### Dataset from set of Files

In [8]:
flower_root = (
    tf.keras.utils.get_file(
        "flower_photos",
        "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz",
        untar = True
    )
)
flower_root = pathlib.Path(flower_root)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [9]:
file_path_ds = tf.data.Dataset.list_files(str(flower_root/'*/*'))

def process_path(file_path):
  label = tf.strings.split(file_path,os.sep)[-2]
  return tf.io.read_file(file_path),label

In [10]:
labeled_data = file_path_ds.map(process_path)

In [12]:
for image,label in labeled_data.take(1):
  print(image,label,sep="\n")


tf.Tensor(b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xe2\x0cXICC_PROFILE\x00\x01\x01\x00\x00\x0cHLino\x02\x10\x00\x00mntrRGB XYZ \x07\xce\x00\x02\x00\t\x00\x06\x001\x00\x00acspMSFT\x00\x00\x00\x00IEC sRGB\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-HP  \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x11cprt\x00\x00\x01P\x00\x00\x003desc\x00\x00\x01\x84\x00\x00\x00lwtpt\x00\x00\x01\xf0\x00\x00\x00\x14bkpt\x00\x00\x02\x04\x00\x00\x00\x14rXYZ\x00\x00\x02\x18\x00\x00\x00\x14gXYZ\x00\x00\x02,\x00\x00\x00\x14bXYZ\x00\x00\x02@\x00\x00\x00\x14dmnd\x00\x00\x02T\x00\x00\x00pdmdd\x00\x00\x02\xc4\x00\x00\x00\x88vued\x00\x00\x03L\x00\x00\x00\x86view\x00\x00\x03\xd4\x00\x00\x00$lumi\x00\x00\x03\xf8\x00\x00\x00\x14meas\x00\x00\x04\x0c\x00\x00\x00$tech\x00\x00\x040\x00\x00\x00\x0crTRC\x00\x

### Batching Dataset

In [13]:
dataset = tf.data.Dataset.range(100)
batched_dataset = dataset.batch(15)

for element in batched_dataset:
  print(element)

tf.Tensor([ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14], shape=(15,), dtype=int64)
tf.Tensor([15 16 17 18 19 20 21 22 23 24 25 26 27 28 29], shape=(15,), dtype=int64)
tf.Tensor([30 31 32 33 34 35 36 37 38 39 40 41 42 43 44], shape=(15,), dtype=int64)
tf.Tensor([45 46 47 48 49 50 51 52 53 54 55 56 57 58 59], shape=(15,), dtype=int64)
tf.Tensor([60 61 62 63 64 65 66 67 68 69 70 71 72 73 74], shape=(15,), dtype=int64)
tf.Tensor([75 76 77 78 79 80 81 82 83 84 85 86 87 88 89], shape=(15,), dtype=int64)
tf.Tensor([90 91 92 93 94 95 96 97 98 99], shape=(10,), dtype=int64)


### Shuffling Dataset

In [21]:
dataset = tf.data.Dataset.range(20)
shuffled_dataset = dataset.shuffle(3,reshuffle_each_iteration=False)

for element in shuffled_dataset:
  print(element)

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(9, shape=(), dtype=int64)
tf.Tensor(10, shape=(), dtype=int64)
tf.Tensor(12, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)
tf.Tensor(13, shape=(), dtype=int64)
tf.Tensor(15, shape=(), dtype=int64)
tf.Tensor(16, shape=(), dtype=int64)
tf.Tensor(11, shape=(), dtype=int64)
tf.Tensor(14, shape=(), dtype=int64)
tf.Tensor(19, shape=(), dtype=int64)
tf.Tensor(18, shape=(), dtype=int64)
tf.Tensor(17, shape=(), dtype=int64)


### Building a Pipeline

#### Data Source

In [22]:
train_ds,test_ds= tf.keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [23]:
def normalize_image(img,label):
  return (tf.cast(img,tf.float32) / 255.0, label)

#### Train Data Pipeline

In [26]:
train_dataset = tf.data.Dataset.from_tensor_slices(train_ds)

train_dataset = train_dataset.map(
    normalize_image,num_parallel_calls=tf.data.AUTOTUNE
)

train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(len(train_dataset))

train_dataset = train_dataset.batch(64)
train_dataset = train_dataset.prefetch(tf.data.AUTOTUNE)

train_dataset


<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.uint8, name=None))>

#### Test Data Pipeline

In [27]:
test_dataset = tf.data.Dataset.from_tensor_slices(test_ds)

test_dataset = test_dataset.map(
    normalize_image,num_parallel_calls=tf.data.AUTOTUNE
)

test_dataset = test_dataset.cache()
test_dataset = test_dataset.shuffle(len(train_dataset))

test_dataset = test_dataset.batch(64)
test_dataset = test_dataset.prefetch(tf.data.AUTOTUNE)

test_dataset

<_PrefetchDataset element_spec=(TensorSpec(shape=(None, 28, 28), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.uint8, name=None))>

In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

model = Sequential([
    #Rest of the model goes here
])