In [17]:
import tensorflow as tf

In [18]:
daily_sales_numbers = [21,22,-108,-1,32,34,31]

In [19]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers) # reads the data
tf_dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [20]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
-108
-1
32
34
31


In [21]:
for sales in tf_dataset.take(3): # like head()
    print(sales.numpy())

21
22
-108


In [22]:
tf_dataset = tf_dataset.filter(lambda x: x>0 )

In [23]:
for sales in tf_dataset:
    print(sales.numpy())

21
22
32
34
31


In [24]:
tf_dataset = tf_dataset.map(lambda x:x*72)
for sales in tf_dataset:
    print(sales.numpy())

1512
1584
2304
2448
2232


In [26]:
tf_dataset = tf_dataset.shuffle(buffer_size=3)
for sales in tf_dataset:
    print(sales.numpy()) 

2304
1512
1584
2448
2232


In [28]:
tf_dataset = tf_dataset.batch(4);
for sales in tf_dataset:
    print(sales.numpy())

[2448 1584 2232 2304]
[1512]


In [30]:
tf_dataset = tf.data.Dataset.from_tensor_slices(daily_sales_numbers)

tf_dataset = tf_dataset.filter(lambda x:x>0).map(lambda y:y*72).shuffle(3).batch(2) # can be done in 1 shot

In [31]:
for sales in tf_dataset:
    print(sales.numpy())

[2304 1584]
[2448 2232]
[1512]


In [34]:
images_ds = tf.data.Dataset.list_files('images/*/*',shuffle=True)

for file in images_ds.take(5):
    print(file.numpy())

b'images\\cat\\International Cat Care _ The ultimate....jpg'
b'images\\dog\\Puppy Dog Pictures _ Download Free....jpg'
b'images\\cat\\Want your cat to stay in purrrfect....jpg'
b'images\\cat\\Reality check_ Can cat poop cause....jpg'
b'images\\cat\\The Cat Health Checklist_ Everything....jpg'


In [37]:
class_names = ["cat","dog"]

In [39]:
image_count = len(images_ds)
image_count

130

In [40]:
# Train/Test Splitting

train_size = int(image_count*0.8)

train_ds = images_ds.take(train_size) # takes 80%
test_ds = images_ds.skip(train_size) # skips 80% 

In [41]:
len(train_ds),len(test_ds)

(104, 26)

In [43]:
s = 'images\\dog\\why dogs understand our....jpg'

s.split("\\")[-2]

'dog'

In [52]:
import os
def get_label(file_path):
    return tf.strings.split(file_path,os.path.sep)[-2]


In [51]:
for t in train_ds.take(4):
    print(t.numpy())

b'images\\dog\\List of Dog Breeds _ Petfinder.jpg'
b'images\\dog\\Most Popular Breeds \xe2\x80\x93 American Kennel Club.jpg'
b'images\\dog\\Carolina Dog Dog Breed Information....jpg'
b'images\\dog\\15 Amazing Facts About Dogs That Will....jpg'


In [54]:
for label in train_ds.map(get_label):
    print(label)
    
# this is our y Labels

tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'cat', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=string)
tf.Tensor(b'dog', shape=(), dtype=

In [56]:
def process_image(file_path):
    label = get_label(file_path)
    
    img = tf.io.read_file(file)
    img = tf.image.decode_jpeg(img)
    img = tf.image.resize(img,[128,128])
    
    return img,label

# this is X  

In [62]:
for img,label in train_ds.map(process_image).take(3):
    print(img)
    print(label)
    
train_ds = train_ds.map(process_image)

tf.Tensor(
[[[230.875   230.875   232.875  ]
  [232.375   232.375   231.75   ]
  [234.      234.      234.     ]
  ...
  [214.6875  207.6875  201.6875 ]
  [214.      205.      200.     ]
  [213.      206.      200.     ]]

 [[230.125   230.125   232.125  ]
  [232.375   232.375   234.375  ]
  [234.03906 234.03906 234.03906]
  ...
  [214.      207.      201.     ]
  [214.01562 207.0625  201.0625 ]
  [213.      206.      200.     ]]

 [[230.125   231.      233.     ]
  [231.      231.      231.     ]
  [234.      234.      234.     ]
  ...
  [214.16406 207.16406 201.16406]
  [215.      208.      202.     ]
  [213.      206.      200.     ]]

 ...

 [[189.4375  231.4375  247.4375 ]
  [187.3125  232.      247.4375 ]
  [193.33594 234.21094 246.21094]
  ...
  [175.5625  226.5625  247.5625 ]
  [176.5625  227.5625  248.5625 ]
  [177.49219 229.50781 248.     ]]

 [[193.42969 234.8125  248.85938]
  [187.29688 234.75    248.3125 ]
  [182.91406 234.10156 247.35156]
  ...
  [172.5625  227.5625  247.

In [63]:
def scale(img,label):
    return img / 255 , label

In [64]:
train_ds = train_ds.map(scale)
for image,label in train_ds.take(5):
    print(image)
    print(label)

tf.Tensor(
[[[0.90539217 0.90539217 0.9132353 ]
  [0.9112745  0.9112745  0.90882355]
  [0.91764706 0.91764706 0.91764706]
  ...
  [0.8419118  0.8144608  0.79093134]
  [0.8392157  0.8039216  0.78431374]
  [0.8352941  0.80784315 0.78431374]]

 [[0.902451   0.902451   0.9102941 ]
  [0.9112745  0.9112745  0.9191176 ]
  [0.91780025 0.91780025 0.91780025]
  ...
  [0.8392157  0.8117647  0.7882353 ]
  [0.83927697 0.8120098  0.7884804 ]
  [0.8352941  0.80784315 0.78431374]]

 [[0.902451   0.90588236 0.9137255 ]
  [0.90588236 0.90588236 0.90588236]
  [0.91764706 0.91764706 0.91764706]
  ...
  [0.83985907 0.8124081  0.7888787 ]
  [0.84313726 0.8156863  0.7921569 ]
  [0.8352941  0.80784315 0.78431374]]

 ...

 [[0.74289215 0.907598   0.9703431 ]
  [0.7345588  0.9098039  0.9703431 ]
  [0.75818014 0.91847426 0.9655331 ]
  ...
  [0.6884804  0.88848037 0.97083336]
  [0.69240195 0.89240193 0.9747549 ]
  [0.6960478  0.9000306  0.972549  ]]

 [[0.7585478  0.92083335 0.9759191 ]
  [0.73449755 0.92058825 0

In [65]:
# Movie reviews are present as individual text file (one file per review) in review folder.
# Folder structure looks like this
# 
# reviews
#     |__ positive
#         |__pos_1.txt
#         |__pos_2.txt
#         |__pos_3.txt
#     |__ negative
#         |__neg_1.txt
#         |__neg_2.txt
#         |__neg_3.txt
#         
# You need to read these reviews using tf.data.Dataset and perform following transformations,
# 
# Read text review and generate a label from folder name. your dataset should have review text and label as a tuple
# Filter blank text review. Two files are blank in this dataset
# Do all of the above transformations in single line of code. Also shuffle all the reviews

In [1]:
import tensorflow as tf

In [2]:
review_ds = tf.data.Dataset.list_files('reviews/*/*',shuffle=False) # Reads the file
review_ds

In [5]:
for file in review_ds:
    print(file.numpy())

b'reviews\\negative\\neg_1.txt'
b'reviews\\negative\\neg_2.txt'
b'reviews\\negative\\neg_3.txt'
b'reviews\\positive\\pos_1.txt'
b'reviews\\positive\\pos_2.txt'
b'reviews\\positive\\pos_3.txt'


# Extract review text from these files. Extract label from folder name

In [6]:
import os
def extract_review_and_label(file_path):
    return tf.io.read_file(file_path) , tf.strings.split(file_path,os.path.sep)[-2]

In [11]:
review_ds_1 = review_ds.map(extract_review_and_label)
for review , label in review_ds_1:
    print("Review: ",review.numpy()[:50])
    print("Label: ",label.numpy())

Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b''
Label:  b'negative'
Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'
Review:  b''
Label:  b'positive'


# Filter blank reviews


In [17]:
review_ds_2 = review_ds_1.filter(lambda review , label : review!="")
for review,label in review_ds_2.as_numpy_iterator():
    print("Review: ",review[:50])
    print("Label: ",label)

Review:  b"Basically there's a family where a little boy (Jak"
Label:  b'negative'
Review:  b'This show was an amazing, fresh & innovative idea '
Label:  b'negative'
Review:  b'One of the other reviewers has mentioned that afte'
Label:  b'positive'
Review:  b'A wonderful little production. <br /><br />The fil'
Label:  b'positive'


# Perform map, filter and shuffle all in single line of code

In [19]:
final_ds = review_ds.map(extract_review_and_label).filter(lambda review, label: review!="").shuffle(3)
for review, label in final_ds.as_numpy_iterator():
    print("Review:",review[:50])
    print("Label:",label)

Review: b"Basically there's a family where a little boy (Jak"
Label: b'negative'
Review: b'One of the other reviewers has mentioned that afte'
Label: b'positive'
Review: b'A wonderful little production. <br /><br />The fil'
Label: b'positive'
Review: b'This show was an amazing, fresh & innovative idea '
Label: b'negative'
