##### Streming Approach: Loading dataset into batches
* tf.data.Dataset: Helps to build tensorflow input pipeline
* tf_dataset.filter(filter_func): To detect if dataset is blurry or not
  ##### How to apply all filters at once
* tf_dataset = tf.data.Dataset.list_files('images/*').map(process_img).filter(filter_func).map(lambda x:x/255)

In [3]:
import tensorflow as tf

In [4]:
daily_sales_numbers = [21,22, -100, 31, -1, 32, 34, 31]

In [5]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [6]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-100
31
-1
32
34
31


In [7]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-100


In [8]:
#Filtering datapoints : 
tf_dataset = tf_dataset.filter(lambda x:x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


In [9]:
tf_dataset = tf_dataset.map(lambda x: x*83)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1743
1826
2573
2656
2822
2573


In [10]:
tf_dataset = tf_dataset.shuffle(2)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1743
2573
1826
2656
2573
2822


In [11]:
#Creating batches
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1743 2573]
[1826 2822]
[2656 2573]


In [12]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
#Filtering, Maping, Shuffling, Batching
tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*80).shuffle(2).batch(2)
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[[1760 2480]
 [2560 2720]]
[[1680 2480]]


In [16]:
images_ds = tf.data.Dataset.list_files('images/*/*', shuffle=False)
#Stored the image path
for files in images_ds.take(3):
    print(files.numpy())

b'images/cat/00tb-cats1-videoSixteenByNineJumbo1600.jpg'
b'images/cat/07CAT-STRIPES-mediumSquareAt3X-v2.jpg'
b'images/cat/1200px-RedCat_8727.jpg'


In [35]:
#Shuffling image
#200 : Buffer size
images_ds = images_ds.shuffle(200)
#Stored the image path
for files in images_ds.take(3):
    print(files.numpy())

b'images/dog/Cavalier-King-Charles-Spaniel-laying-down-indoors.jpg'
b'images/cat/Cat-andriyko-podilnyk-RCfi7vgJjUY-unsplash_1659328989095_1659328998370_1659328998370.jpg'
b'images/dog/best-dog-breeds-for-seniors-4138298-hero-a02732418cd343eb89164c4230e0b574.jpg'


In [19]:
class_names = ['cat','dog']

In [23]:
image_count = len(images_ds)
image_count

176

In [25]:
#Train Test Split
train_size = int(image_count*0.8)

train_ds = images_ds.take(train_size) #Will take 80% of dataset
test_ds = images_ds.skip(train_size)

In [27]:
len(train_ds)

140

In [28]:
len(test_ds)

36

In [38]:
#Retrieving label from the string
s= "b'images/dog/best-dog-breeds-for-seniors-4138298-hero-a02732418cd343eb89164c4230e0b574.jpg"
s.split('/')[-2]

'dog'

In [44]:
#Since file path is a tensor object we use spatial function
def get_label(file_path):
    import os
    return tf.strings.split(file_path, os.path.sep)[-2]   #os seperator

In [48]:
#Getting label as well as the image
def process_image(file_path):
    label = get_label(file_path)
    #Reading the file
    img = tf.io.read_file(file_path)
    #Decoding jpeg image
    img = tf.image.decode_jpeg(img)
    #Resizing the image
    img = tf.image.resize(img, [128,128])

    return img, label

In [49]:
for t in train_ds.take(4):
     print(t.numpy())

b'images/cat/red-white-cat-i-white-studio_155003-13189.jpg'
b'images/cat/255883.jpg'
b'images/cat/sick_cat_1660402138551_1660402151976_1660402151976.jpg'
b'images/cat/maxresdefault (1).jpg'


In [53]:

for img, label in train_ds.map(process_image).take(3):
    print("Image:",img)
    print(label)

Image: tf.Tensor(
[[[163.       188.       122.      ]
  [164.       189.       123.      ]
  [164.       189.       123.      ]
  ...
  [162.       185.       115.      ]
  [162.       185.       117.      ]
  [161.       184.       116.      ]]

 [[164.       189.       123.      ]
  [164.       189.       123.      ]
  [165.       190.       124.      ]
  ...
  [162.       185.       115.      ]
  [162.       185.       117.      ]
  [162.       185.       117.      ]]

 [[165.       190.       124.      ]
  [165.       190.       124.      ]
  [166.       191.       125.      ]
  ...
  [160.       186.       113.      ]
  [162.       185.       115.      ]
  [162.       185.       115.      ]]

 ...

 [[114.50391  132.625     74.86719 ]
  [ 97.19141  119.43359   55.67578 ]
  [111.687744 123.92993   60.17212 ]
  ...
  [122.11743  120.05859   63.242188]
  [141.95312  109.25      56.246094]
  [145.98901   95.30859   48.429688]]

 [[105.72388  126.72388   70.635254]
  [100.91016  126.9

In [54]:
def scale(image, label):
    return image/255, label

In [63]:
train_ds = train_ds.map(scale)
for img, label in train_ds.take(5):
    print("****Image:",img.numpy()[0][0])
    print("****label:",label.numpy())

****Image: [1.6381409e-10 1.6659650e-10 1.7123383e-10]
****label: b'cat'
****Image: [0.000000e+00 7.692916e-11 9.552924e-11]
****label: b'cat'
****Image: [1.3850436e-10 1.5334385e-10 9.4145282e-11]
****label: b'dog'
****Image: [8.2544686e-11 1.1964343e-10 1.6508937e-10]
****label: b'dog'
****Image: [0.000000e+00 0.000000e+00 1.854937e-12]
****label: b'dog'
