In [1]:
import tensorflow as tf

In [3]:
daily_sales_numbers=[21,22,-108,31,-1,32,34,31]

In [6]:
tf_dataset=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [7]:
for sales in tf_dataset:
    print(sales)

tf.Tensor(21, shape=(), dtype=int32)
tf.Tensor(22, shape=(), dtype=int32)
tf.Tensor(-108, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(-1, shape=(), dtype=int32)
tf.Tensor(32, shape=(), dtype=int32)
tf.Tensor(34, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)


In [8]:
#to view we can convert the tensor to numpy

In [9]:
for sales in tf_dataset:
    print(sales.numpy())
# we are converting the sales tensor datatype
#to numpy

21
22
-108
31
-1
32
34
31


In [10]:
# We can directly use as_numpy_iterator() to 
#get the sales in numpy format

In [12]:
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
-108
31
-1
32
34
31


### filtering of data

In [15]:
#It is seen that the sales cannit be negative
#so we have to filter out data removing negative
#values from the data, we can do that in tensor 
#using filter function.
tf_dataset=tf_dataset.filter(lambda x:x>0)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

21
22
31
32
34
31


In [16]:
#Let us consider the above data is in US dollars($),
#But we want that in indian rupees, to get that we
#will have to multiply each data by 72. This can be 
#done using map function, where it applies the 
#specified function to all the datapoints
tf_dataset=tf_dataset.map(lambda x:x*72)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

1512
1584
2232
2304
2448
2232


In [17]:
#We can use shuffle() methos to shuffle the
#datapoints, there is this argument called as buffer 
#which this function takes this is clearly explained in
#the below website
#https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset
tf_dataset=tf_dataset.shuffle(3)
for sales in tf_dataset.as_numpy_iterator():
    print(sales)

2232
1584
2304
2232
1512
2448


In [19]:
#we can also do batching using batch()
#batching can be used in multi GPU enviromnment
#Where you want to distribute different batches to 
#different environment
#It is useful when to get the huge dataset into the 
#memory in batch wise
for sales_batch in tf_dataset.batch(3):
    print(sales_batch.numpy())

[2232 1512 1584]
[2232 2448 2304]


In [20]:
for sales_batch in tf_dataset.batch(2):
    print(sales_batch.numpy())

[1584 2304]
[2448 2232]
[2232 1512]


In [21]:
for sales_batch in tf_dataset.batch(4):
    print(sales_batch.numpy())

[1512 1584 2232 2232]
[2304 2448]


### The above things can be done in one single line as shown below

In [27]:
tf_dataset2=tf.data.Dataset.from_tensor_slices(daily_sales_numbers)
tf_dataset2=tf_dataset2.filter(lambda x:x>0).map(lambda y:y*72).batch(2)
for sales in tf_dataset2.as_numpy_iterator():
    print(sales)

[1512 1584]
[2232 2304]
[2448 2232]
