In [7]:
import tensorflow as tf

In [20]:
simple_list = [21,21,23,-100,32,-5,32,33,31]

In [21]:
# create a dataset from tensor slices
# turn the list or array of data into a dataset that TensorFlow can use.
tf_dataset = tf.data.Dataset.from_tensor_slices(simple_list)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [22]:
for ls in tf_dataset:
    print(ls)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(23, shape=(), dtype=int32)
tf.Tensor(-100, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(-5, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(33, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


In [23]:
for ls in tf_dataset:
    print(ls.numpy()) # convert it into numpy

21
21
23
-100
32
-5
32
33
31


In [24]:
for ls in tf_dataset.take(3): # take 3 elements
    print(ls.numpy())

21
21
23


In [26]:
# Remove negative number
tf_dataset = tf_dataset.filter(lambda x : x > 0)
for ls in tf_dataset:
    print(ls.numpy())

21
21
23
32
32
33
31


In [31]:
# I want to multiply all the element by 10
tf_dataset = tf_dataset.map(lambda x : x * 10)
for ls in tf_dataset:
    print(ls.numpy())

210
210
230
320
320
330
310


In [32]:
# Shuffle dataset
tf_dataset = tf_dataset.shuffle(3)
for ls in tf_dataset:
    print(ls.numpy())

210
320
230
320
310
330
210


In [33]:
# using batch
for ls in tf_dataset.batch(4):
    print(ls.numpy())

[210 210 230 320]
[330 320 310]


In [36]:
# All in one
tf_dataset = tf.data.Dataset.from_tensor_slices(simple_list)
tf_dataset = tf_dataset.filter(lambda x : x > 0).map(lambda y : y * 10).shuffle(2).batch(2)

for ls in tf_dataset:
    print(ls.numpy())

[210 230]
[320 320]
[210 330]
[310]


### Image Data

In [38]:
images_ds = tf.data.Dataset.list_files("dataset/images/*/*",shuffle=False)

In [40]:
for file in images_ds.take(3):
    print(file) # Image path

tf.Tensor(b'dataset\\images\\cat\\download (1).jfif', shape=(), dtype=string)
tf.Tensor(b'dataset\\images\\cat\\download (10).jfif', shape=(), dtype=string)
tf.Tensor(b'dataset\\images\\cat\\download (11).jfif', shape=(), dtype=string)


In [41]:
images_ds = images_ds.shuffle(200)

for file in images_ds.take(3):
    print(file.numpy())

b'dataset\\images\\dog\\download (15).jfif'
b'dataset\\images\\dog\\download (3).jfif'
b'dataset\\images\\cat\\download (13).jfif'


In [42]:
class_names = ["cat","dog"]

In [44]:
image_count = len(images_ds)
image_count

41

In [45]:
# Split train and test dataset
train_size =  int(image_count*0.8)

train_ds = images_ds.take(train_size)
test_ds = images_ds.skip(train_size)


In [47]:
len(train_ds), len(test_ds)

(32, 9)

In [48]:
# label split
s = 'dataset\\images\\cat\\download (1).jfif'
s.split("\\")

['dataset', 'images', 'cat', 'download (1).jfif']

In [50]:
s.split("\\")[2]

'cat'

In [57]:
# Function for get label
import os
def get_label(file_path):
    return tf.strings.split(file_path,os.path.sep)[2]

In [67]:
# function for both label and image
def process_image(file_path):
    label = get_label(file_path)
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img,[128,123])
    return img, label

In [65]:
for t in train_ds.take(2):
    print(t.numpy())

b'dataset\\images\\cat\\download (5).jfif'
b'dataset\\images\\dog\\download (17).jfif'


In [63]:
train_ps = train_ds.map(get_label)

In [66]:
for l in train_ps.take(2):
    print(l)

tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)


In [69]:
train_ds = train_ds.map(process_image)

for label,img in train_ds:
    print(label, img)

tf.Tensor(
[[[ 21.719513   21.719513   10.505336 ]
  [ 25.317074   26.317074   10.580031 ]
  [ 30.101753   32.03887    11.227515 ]
  ...
  [ 30.15625    43.15625    14.351379 ]
  [ 32.444736   45.444736   17.127659 ]
  [ 25.609379   37.60938    13.609378 ]]

 [[ 21.250763   21.250763   11.51372  ]
  [ 24.848324   25.453888   11.925686 ]
  [ 29.408918   31.00648    11.213796 ]
  ...
  [ 31.65739    44.65739    15.852521 ]
  [ 30.5183     43.5183     15.201221 ]
  [ 23.221039   35.22104    11.221039 ]]

 [[ 19.938263   19.719131   12.061738 ]
  [ 23.535824   23.69436    12.622713 ]
  [ 28.413872   29.544588   11.78125  ]
  ...
  [ 34.27934    47.27934    18.474468 ]
  [ 28.725628   41.725628   13.40855  ]
  [ 20.596039   32.59604     8.596039 ]]

 ...

 [[ 87.5625    109.34375     2.1227133]
  [ 87.5625    109.5625      0.78125  ]
  [ 87.5625    110.02935     0.       ]
  ...
  [102.        117.         23.19513  ]
  [ 97.42455   113.266014   15.741632 ]
  [ 96.34337   112.34337    14.34

In [71]:
test_ds = test_ds.map(process_image)

for label, img in test_ds:
    print(label,img)

tf.Tensor(
[[[179.       151.       114.      ]
  [179.       151.       114.      ]
  [179.       151.       114.      ]
  ...
  [179.       151.       112.      ]
  [178.       150.       111.      ]
  [177.12389  149.12389  110.123886]]

 [[179.       151.       114.      ]
  [179.       151.       114.      ]
  [179.       151.       114.      ]
  ...
  [179.206    151.206    112.205986]
  [178.01344  150.01344  111.01344 ]
  [177.85391  149.85391  110.85391 ]]

 [[179.       151.       114.      ]
  [179.       151.       114.      ]
  [179.       151.       114.      ]
  ...
  [179.94865  151.94865  112.94865 ]
  [178.91997  150.91997  111.91997 ]
  [178.       150.       111.      ]]

 ...

 [[183.       155.       118.      ]
  [184.       156.       119.      ]
  [185.       157.       120.      ]
  ...
  [185.374    157.374    120.37399 ]
  [185.       157.       120.      ]
  [184.       156.       119.      ]]

 [[183.       155.       118.      ]
  [184.       156.       1

In [73]:
# Normalization
def scale(image,lable):
    img = image/255.
    return img,label

In [75]:
train_ds = train_ds.map(scale)
test_ds = test_ds.map(scale)

for label, img in train_ds:
    print(label,img)

tf.Tensor(
[[[9.5002743e-04 8.6294208e-04 6.4591126e-04]
  [9.1254769e-04 8.6195162e-04 6.4465118e-04]
  [9.3189412e-04 8.7037933e-04 6.8583497e-04]
  ...
  [1.0209304e-03 9.7479427e-04 6.8259897e-04]
  [1.0192138e-03 9.9895883e-04 7.1970414e-04]
  [9.8108966e-04 9.6571096e-04 6.8889430e-04]]

 [[1.1030643e-03 1.0532183e-03 8.4126886e-04]
  [1.8698716e-03 1.8227744e-03 1.6382301e-03]
  [1.0460212e-03 9.9892379e-04 8.1007188e-04]
  ...
  [9.0177573e-04 8.5563964e-04 6.5997156e-04]
  [9.4929052e-04 9.2296960e-04 7.1272551e-04]
  [8.0797367e-04 8.0701255e-04 6.0228363e-04]]

 [[2.2773839e-04 2.1912305e-04 4.0802788e-05]
  [1.9813674e-03 1.9746392e-03 1.7900948e-03]
  [2.1850179e-04 1.9741469e-04 3.5035770e-05]
  ...
  [2.1343301e-04 1.6729691e-04 7.1795066e-06]
  [4.3777991e-04 4.0839397e-04 1.9357842e-04]
  [2.5856611e-04 2.7394481e-04 7.8827543e-05]]

 ...

 [[2.4905701e-03 2.4444340e-03 2.3367831e-03]
  [2.3761850e-03 2.3300489e-03 2.2223981e-03]
  [1.3274739e-03 1.2813378e-03 1.173686