In [None]:
import tensorflow as tf

daily_sales_number = [21,22,-108, 31, -1, 32, 34, 31]

tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_number)
tf_dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [None]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
31
-1
32
34
31


In [None]:
for sales in tf_dataset.take(3):
    print(sales.numpy())

21
22
-108


In [None]:
#filter func
tf_dataset = tf_dataset.filter(lambda x:x>0)
for sales in tf_dataset:
    print(sales.numpy())

21
22
31
32
34
31


In [None]:
#map func
tf_dataset = tf_dataset.map(lambda x: x*72)
for sales in tf_dataset:
    print(sales.numpy())

1512
1584
2232
2304
2448
2232


In [None]:
#shuffle
tf_dataset = tf_dataset.shuffle(buffer_size=2)
for sales in tf_dataset:
    print(sales.numpy())

1512
2232
1584
2448
2232
2304


In [None]:
#batching
tf_dataset = tf_dataset.batch(2)
for i in tf_dataset:
    print(i.numpy())

[1584 1512]
[2232 2304]
[2232 2448]


In [None]:
#in single pipeline
daily_sales_number = [21,22,-108, 31, -1, 32, 34, 31]
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_number)

tf_dataset = tf_dataset.filter(lambda x: x>0).map(lambda y: y*72).shuffle(buffer_size=3).batch(2)

for i in tf_dataset:
    print(i.numpy())

[1512 2304]
[2448 2232]
[2232 1584]


## Using tf.Data to create X_train and Y_train using image files

In [None]:
!unzip "/content/codebasics deep-learning-keras-tf-tutorial master 44_tf_data_pipeline-images.zip"

Archive:  /content/codebasics deep-learning-keras-tf-tutorial master 44_tf_data_pipeline-images.zip
   creating: cat/
 extracting: cat/The Joys of Owning a Cat - HelpGuide.org.jpg  
   creating: dog/
 extracting: dog/Canine Mind....jpg  
 extracting: dog/9 Reasons to Own a Dog.jpg  
 extracting: dog/356 Free Dog Stock Photos - CC0 Images.jpg  
 extracting: cat/What to do if your cat is marking....jpg  
 extracting: dog/Aggression in dogs _ Animal Humane Society.jpg  
 extracting: dog/How dogs contribute to your health and....jpg  
 extracting: dog/The 25 Cutest Dog Breeds - Most....jpg  
 extracting: dog/How to make your dog feel comfortable....jpg  
 extracting: dog/The US Army is testing augmented....jpg  
 extracting: dog/The 20 Best Dog Breeds for Runners....jpg  
 extracting: dog/45 Best Large Dog Breeds - Top Big Dogs_yyth....jpg  
 extracting: cat/All About Your Cat_s Tongue.jpg  
 extracting: dog/Dog Breed Chart....jpg  
 extracting: dog/How Many Dog Breeds Are There_ _ Hill_s 

In [None]:
!mkdir images
!mv cat/ images/
!mv dog/ images/

In [None]:
images_ds = tf.data.Dataset.list_files('/content/images/*/*',shuffle=False)
image_count = len(images_ds)
print(image_count)

130


In [None]:
images_ds = images_ds.shuffle(200)
for file in images_ds.take(3):
    print(file.numpy())

b'/content/images/dog/10 Teacup Dog Breeds for Tiny Canine Lovers.jpg'
b'/content/images/dog/Great Dane - Wikipedia.jpg'
b'/content/images/dog/How To Read Your Dog_s Body Language....png'


In [None]:
train_size = int(image_count*0.8)
train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)

print(len(train_ds))
print(len(test_ds))

104
26


In [None]:
import os
for file in train_ds.take(3):
    print(tf.strings.split(file, os.path.sep)[-2])

tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)


In [None]:
import os
def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    return parts[-2]

In [None]:
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img, [128,128])
    return img, label

In [None]:
img, label = process_image('/content/images/cat/20 Reasons Why Cats Make the Best Pets....jpg')
print(img.numpy()[:2])

[[[155.       186.       215.      ]
  [156.       187.       216.      ]
  [158.       189.       218.      ]
  [160.0039   189.0039   219.0039  ]
  [161.0039   190.0039   220.0039  ]
  [162.       191.       221.      ]
  [166.       193.       222.      ]
  [167.       194.       223.      ]
  [168.       195.       224.      ]
  [169.       196.       225.      ]
  [170.       197.       224.      ]
  [170.0039   197.0039   224.0039  ]
  [172.       199.       226.      ]
  [173.0039   199.0039   224.0039  ]
  [174.0039   200.0039   225.0039  ]
  [175.0039   201.0039   226.0039  ]
  [176.       202.       227.      ]
  [177.0039   203.0039   228.0039  ]
  [177.0039   203.0039   228.0039  ]
  [179.0039   203.0039   227.0039  ]
  [180.0039   204.0039   228.0039  ]
  [180.0039   204.0039   228.0039  ]
  [181.0039   206.0039   228.0039  ]
  [182.0039   207.0039   229.0039  ]
  [184.       209.       231.      ]
  [184.       209.       231.      ]
  [184.       209.       231.      ]
 

In [None]:
train_ds = train_ds.map(process_image)
test_ds = test_ds.map(process_image)

In [None]:
for image, label in train_ds.take(1):
    print("Label:", label)
    print('Image:', image)

Label: tf.Tensor(b'dog', shape=(), dtype=string)
Image: tf.Tensor(
[[[3.73476562e+01 3.43476562e+01 1.34765625e+00]
  [4.10000000e+01 3.90000000e+01 0.00000000e+00]
  [4.43984375e+01 4.32941589e+01 2.08557129e-01]
  ...
  [1.72941589e+01 3.02941589e+01 2.94158936e-01]
  [1.49570312e+01 2.50859375e+01 0.00000000e+00]
  [1.31385193e+01 2.27460938e+01 3.92425537e-01]]

 [[3.98725891e+01 3.68725891e+01 5.87258911e+00]
  [4.20083923e+01 4.00083923e+01 2.00839233e+00]
  [4.89335938e+01 4.69335938e+01 5.93359375e+00]
  ...
  [2.11289062e+01 3.51289062e+01 1.73828125e+00]
  [1.80083923e+01 2.83326111e+01 8.04687500e-01]
  [1.60679016e+01 2.60679016e+01 1.45703125e+00]]

 [[3.83425598e+01 3.53425598e+01 1.65267944e+00]
  [4.99843750e+01 4.79843750e+01 8.90658569e+00]
  [5.19843750e+01 5.09843750e+01 5.00000000e+00]
  ...
  [2.67362366e+01 4.07362366e+01 4.73623657e+00]
  [2.28710938e+01 3.40000000e+01 1.91406250e+00]
  [1.83503723e+01 2.93503723e+01 2.71606445e-03]]

 ...

 [[3.94331055e+01 5.0

# Image Classification using tf.Data

In [3]:
!unzip "/content/codebasics deep-learning-keras-tf-tutorial master 44_tf_data_pipeline-images.zip"

Archive:  /content/codebasics deep-learning-keras-tf-tutorial master 44_tf_data_pipeline-images.zip
   creating: cat/
 extracting: cat/The Joys of Owning a Cat - HelpGuide.org.jpg  
   creating: dog/
 extracting: dog/Canine Mind....jpg  
 extracting: dog/9 Reasons to Own a Dog.jpg  
 extracting: dog/356 Free Dog Stock Photos - CC0 Images.jpg  
 extracting: cat/What to do if your cat is marking....jpg  
 extracting: dog/Aggression in dogs _ Animal Humane Society.jpg  
 extracting: dog/How dogs contribute to your health and....jpg  
 extracting: dog/The 25 Cutest Dog Breeds - Most....jpg  
 extracting: dog/How to make your dog feel comfortable....jpg  
 extracting: dog/The US Army is testing augmented....jpg  
 extracting: dog/The 20 Best Dog Breeds for Runners....jpg  
 extracting: dog/45 Best Large Dog Breeds - Top Big Dogs_yyth....jpg  
 extracting: cat/All About Your Cat_s Tongue.jpg  
 extracting: dog/Dog Breed Chart....jpg  
 extracting: dog/How Many Dog Breeds Are There_ _ Hill_s 

In [4]:
!mkdir images
!mv cat/ images/
!mv dog/ images/

In [16]:
#to check proper images available in the folder https://stackoverflow.com/questions/65438156/tensorflow-keras-error-unknown-image-file-format-one-of-jpeg-png-gif-bmp-re
import os
import cv2
import imghdr

def check_images( s_dir, ext_list):
    bad_images=[]
    bad_ext=[]
    s_list= os.listdir(s_dir)
    for klass in s_list:
        klass_path=os.path.join (s_dir, klass)
        print ('processing class directory ', klass)
        if os.path.isdir(klass_path):
            file_list=os.listdir(klass_path)
            for f in file_list:
                if not f.endswith('.ipynb_checkpoints'):         
                    f_path=os.path.join (klass_path,f)
                    tip = imghdr.what(f_path)
                    if ext_list.count(tip) == 0:
                      bad_images.append(f_path)
                    if os.path.isfile(f_path):
                        try:
                            img=cv2.imread(f_path)
                            shape=img.shape
                        except:
                            print('file ', f_path, ' is not a valid image file')
                            bad_images.append(f_path)
                    else:
                        print('*** fatal error, you a sub directory ', f, ' in class directory ', klass)
        else:
            print ('*** WARNING*** you have files in ', s_dir, ' it should only contain sub directories')
    return bad_images, bad_ext

source_dir = "/content/images"
good_exts=['jpg', 'png', 'jpeg', 'gif', 'bmp' ] # list of acceptable extensions
bad_file_list, bad_ext_list=check_images(source_dir, good_exts)
if len(bad_file_list) !=0:
    print('improper image files are listed below')
    for i in range (len(bad_file_list)):
        print (bad_file_list[i])
else:
    print(' no improper image files were found')

processing class directory  cat
processing class directory  dog
 no improper image files were found


In [1]:
#import libraries
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#setting the constants
BATCH_SIZE = 4
IMAGE_SIZE = 256
CHANNEL = 3
EPOCHS = 10

In [3]:
dataset = tf.keras.preprocessing.image_dataset_from_directory('images',
                                                              seed=123,
                                                              shuffle=True,
                                                              image_size = (IMAGE_SIZE,IMAGE_SIZE),
                                                              batch_size=BATCH_SIZE)

Found 124 files belonging to 2 classes.


In [4]:
CLASS_NAMES = dataset.class_names
print(CLASS_NAMES)

['cat', 'dog']


In [5]:
for image_batch, label_batch in dataset.take(1):
    print(image_batch.shape)
    print(label_batch.numpy())

(4, 256, 256, 3)
[1 1 1 0]


In [48]:
#split the dataset
len(dataset)

31

In [46]:
# 80% - train, 10% - val, 10% - test
print("train size:",int(0.8*31))
print("val size:", int(31*0.1)) 
print("test size:", int(31*0.1))

train size: 24
val size: 3
test size: 3


In [6]:
def get_dataset_partitions_tf(ds, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10):
    assert (train_split + test_split + val_split)==1

    ds_size = len(ds)

    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=0)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)

    train_ds = ds.take(train_size)
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    return train_ds, val_ds, test_ds

train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

In [7]:
print(len(train_ds))
print(len(val_ds))
print(len(test_ds))

24
3
4


In [8]:
#cache, shuffle and prefetch the dataset
train_ds = train_ds.cache().shuffle(10).prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.cache().shuffle(10).prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds = test_ds.cache().shuffle(10).prefetch(buffer_size=tf.data.AUTOTUNE)

In [10]:
NUM_CLASSES = 2
CHANNELS = 3
resize_and_rescale = tf.keras.Sequential([
                            tf.keras.layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
                            tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
                      ])

In [11]:
#data augmentation
data_augmentation = tf.keras.Sequential([
                              tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal_and_vertical'),
                              tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
])

In [13]:
# train_ds = train_ds.map(lambda x,y : (data_augmentation(x, training=True), y)
# ).prefetch(buffer_size=tf.data.AUTOTUNE)

input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)

model = tf.keras.Sequential([
  resize_and_rescale,
  data_augmentation,
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(1, activation='sigmoid')
])

model.build(input_shape= input_shape)

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (256, 256, 3)             0         
                                                                 
 sequential_1 (Sequential)   (256, 256, 3)             0         
                                                                 
 conv2d_3 (Conv2D)           (4, 256, 256, 16)         448       
                                                                 
 max_pooling2d_3 (MaxPooling  (4, 128, 128, 16)        0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (4, 128, 128, 32)         4640      
                                                                 
 max_pooling2d_4 (MaxPooling  (4, 64, 64, 32)          0         
 2D)                                                  

In [14]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [15]:
model.fit(train_ds, epochs=EPOCHS,batch_size=BATCH_SIZE, verbose=1, validation_data = val_ds)

Epoch 1/10


  return dispatch_target(*args, **kwargs)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0a700533d0>