### Using Data Pipeline

In [1]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist
(train_x,train_y), (test_x, test_y) = mnist.load_data()
train_x, test_x = train_x/255.0, test_x/255.0
epochs=10

In [2]:
batch_size = 32
buffer_size = 10000
training_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(32).shuffle(10000)
training_dataset = training_dataset.map(lambda x, y: (tf.image.random_flip_left_right(x), y))
training_dataset = training_dataset.repeat()

In [3]:
testing_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).batch(batch_size).shuffle(10000)
testing_dataset = training_dataset.repeat()

### Building The Model Architecture

In [4]:
#Now in the fit() function, we can pass the dataset directly in, as follows:
model5 = tf.keras.models.Sequential([
 tf.keras.layers.Flatten(),
 tf.keras.layers.Dense(512,activation=tf.nn.relu),
 tf.keras.layers.Dropout(0.2),
 tf.keras.layers.Dense(10,activation=tf.nn.softmax)
])

### Compiling The Model

In [5]:
steps_per_epoch = len(train_x)//batch_size #required becuase of the repeat() on the dataset
optimiser = tf.keras.optimizers.Adam()
model5.compile (optimizer= optimiser, loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

### Fitting The Model 

In [6]:
model5.fit(training_dataset, epochs=epochs, steps_per_epoch = steps_per_epoch)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Train for 1875 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1af4689e9b0>

### Evaluating The Model 

In [7]:
model5.evaluate(testing_dataset,steps=10)



[0.040664650511462244, 0.984375]

In [8]:
import datetime as dt
callbacks = [
  # Write TensorBoard logs to `./logs` directory
  tf.keras.callbacks.TensorBoard(log_dir='log/{}/'.format(dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")))
]

In [9]:
model5.fit(training_dataset, epochs=epochs, steps_per_epoch=steps_per_epoch,
          validation_data=testing_dataset,
          validation_steps=3)

Train for 1875 steps, validate for 3 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1af205cd630>

### Evaluating

In [10]:
model5.evaluate(testing_dataset,steps=10)



[0.0054867578073753975, 1.0]

### Saving And Loading Keras Models

### Keras Datasets

The Following Datasets are available from within keras:
boston_housing, cifar10, cifar100, fashion_mnist, imdb, mnist, and reuters.

They are all accessed with the function
load_data()

For example, to load the fashion_mnist dataset, use the following:

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

### Using Numpy arrays With Datasets

In [12]:
import numpy as np
number_items = 11
number_list1 = np.arange(number_items)
number_list1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [13]:
number_list2 = np.arange(number_items,number_items*2)
number_list2

array([11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21])

In [14]:
import tensorflow as tf
import numpy as np
number_items = 11
number_list1 = np.arange(number_items)
number_list2 = np.arange(number_items,number_items*2)

### Create datasets, using the from_tensor_slices() method.

In [15]:
number_list1_dataset = tf.data.Dataset.from_tensor_slices(number_list1)

### Create An Iterator on it using the make_one_shot_iterator() method:

In [16]:
iterator = tf.compat.v1.data.make_one_shot_iterator(number_list1_dataset)

### Using Them Together, with the get_next method:

In [17]:
for item in number_list1_dataset:
    number = iterator.get_next().numpy()
    print(number)

0
1
2
3
4
5
6
7
8
9
10


### Note that executing this code twice in the same program run will raise an error because we are using a one-shot iterator. 

### It's also possible to access the data in batches() with the batch method. Note that the first arguments is the number of elements to put in each batch and the second is the self-explanatory drop_remainder arguments.

In [18]:
number_list1_dataset = tf.data.Dataset.from_tensor_slices(number_list1).batch(3, drop_remainder = False)
iterator = tf.compat.v1.data.make_one_shot_iterator(number_list1_dataset)
for item in number_list1_dataset:
    number = iterator.get_next().numpy()
    print(number)

[0 1 2]
[3 4 5]
[6 7 8]
[ 9 10]


### There is also a Zip Method, which is useful for presenting features and labels together.

In [19]:
data_set1 = [1,2,3,4,5]
data_set2 = ['a', 'e', 'i', 'o', 'u']
data_set1 = tf.data.Dataset.from_tensor_slices(data_set1)
data_set2 = tf.data.Dataset.from_tensor_slices(data_set2)
zipped_datasets = tf.data.Dataset.zip((data_set1, data_set2))
iterator = tf.compat.v1.data.make_one_shot_iterator(zipped_datasets)
for item in zipped_datasets:
    number = iterator.get_next()
    print(number)

(<tf.Tensor: id=113707, shape=(), dtype=int32, numpy=1>, <tf.Tensor: id=113708, shape=(), dtype=string, numpy=b'a'>)
(<tf.Tensor: id=113711, shape=(), dtype=int32, numpy=2>, <tf.Tensor: id=113712, shape=(), dtype=string, numpy=b'e'>)
(<tf.Tensor: id=113715, shape=(), dtype=int32, numpy=3>, <tf.Tensor: id=113716, shape=(), dtype=string, numpy=b'i'>)
(<tf.Tensor: id=113719, shape=(), dtype=int32, numpy=4>, <tf.Tensor: id=113720, shape=(), dtype=string, numpy=b'o'>)
(<tf.Tensor: id=113723, shape=(), dtype=int32, numpy=5>, <tf.Tensor: id=113724, shape=(), dtype=string, numpy=b'u'>)


### We can concatenate two datasets as follows, using the concatenate method:

In [21]:
datas1 = tf.data.Dataset.from_tensor_slices([1,2,3,5,7,11,13,17])
datas2 = tf.data.Dataset.from_tensor_slices([19,23,29,31,37,41])
datas3 = datas1.concatenate(datas2)
print(datas3)
iterator = tf.compat.v1.data.make_one_shot_iterator(datas3)
for i in datas3:
    number = iterator.get_next()
    print(number)

<ConcatenateDataset shapes: (), types: tf.int32>
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(11, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
tf.Tensor(17, shape=(), dtype=int32)
tf.Tensor(19, shape=(), dtype=int32)
tf.Tensor(23, shape=(), dtype=int32)
tf.Tensor(29, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(37, shape=(), dtype=int32)
tf.Tensor(41, shape=(), dtype=int32)


### We can also do away with iterators altogether as shown here:

In [22]:
epochs = 2
for e in range(epochs):
    for item in datas3:
        print(item)

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(11, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
tf.Tensor(17, shape=(), dtype=int32)
tf.Tensor(19, shape=(), dtype=int32)
tf.Tensor(23, shape=(), dtype=int32)
tf.Tensor(29, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(37, shape=(), dtype=int32)
tf.Tensor(41, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(11, shape=(), dtype=int32)
tf.Tensor(13, shape=(), dtype=int32)
tf.Tensor(17, shape=(), dtype=int32)
tf.Tensor(19, shape=(), dtype=int32)
tf.Tensor(23, shape=(), dtype=int32)
tf.Tensor(29, shape=(), dtype=int32)
tf.Tensor(31, shape=(), dtype=int32)
tf.Tensor(37, shape=(), dtype=int32)
tf.Tensor(4

#### CSV Example1 

With the following arguments our dataset will consist of two items taken from each row of the filename file, both of the float type, with the first line of the file ignored and columns 1 and 2 used (column numbering is, of course, 0-based)

In [23]:
record_defaults = [tf.float32]*2
record_defaults

[tf.float32, tf.float32]

In [24]:
filename = ["./size_1000.csv"]
record_defaults = [tf.float32]*2 # two required float columns
data_set = tf.data.experimental.CsvDataset(filename, record_defaults, header=True, select_cols=[1,2])
for item in data_set:
    print(item)

NotFoundError: NewRandomAccessFile failed to Create/Open: ./size_1000.csv : The system cannot find the file specified.
; No such file or directory [Op:IteratorGetNextSync]

### CSV Example 2

In this following example, and with the following arguments, our dataset will consist of one required float,
one optional float with a default value of 0.0 and an int, where there is no header in the csv  file and only columns 1,2 and 3 are imported:


In [25]:
filename = "mycsvfile.txt"
record_defaults = [tf.float32, tf.constant([0.0], dtype = tf.float32), tf.int32]
data_set = tf.data.experimental.CsvDataset(filename, record_defaults, header = False, select_cols=[1,2,3])
for item in data_set:
    print(item)

NotFoundError: NewRandomAccessFile failed to Create/Open: mycsvfile.txt : The system cannot find the file specified.
; No such file or directory [Op:IteratorGetNextSync]

### CSV Example 3

In [26]:
# For Our FInal Example, our dataset will consist of two required folat and a required sgtring
# Where the csv file has a header variable:

filename = "file1.txt"
record_defaults = [tf.float32, tf.float32, tf.string]
dataset = tf.data.experimental.CsvDataset(filename, record_defaults, header = False)
for item in dataset:
    print(item[0].numpy(), item[1].numpy(), item[2].numpy().decode())

NotFoundError: NewRandomAccessFile failed to Create/Open: file1.txt : The system cannot find the file specified.
; No such file or directory [Op:IteratorGetNextSync]

### TF Records

In [29]:
import tensorflow as tf
import numpy as np
data = np.array([10.,11.,12.,13.,14.,15.])
def npy_to_tfrecords(fname,data):
    writer = tf.io.TFRecordWriter(fname)
    feature = {}
    feature['data'] = tf.train.Feature(float_list = tf.train.FloatList(value = data))
    example = tf.train.Example(features = tf.train.Features(feature = feature))
    serialized = example.SerializeToString()
    writer.write(serialized)
    writer.close()
npy_to_tfrecords("./myfile.tfrecords", data)    

The code to read the record back is as follows

In [31]:
data_set = tf.data.TFRecordDataset("./myfile.tfrecords")
def parse_function(example_proto):
    keys_to_features = {'data' : tf.io.FixedLenSequenceFeature([], dtype = tf.float32, allow_missing = True)}
    parsed_features = tf.io.parse_single_example(serialized=example_proto, features = Keys_to_features)
    return parsed_features['data']
data_set = data_set.map(parse_function)
iterator = tf.compat.v1.data.make_one_shot_iterator(data_set)
# Array Is Retrived as one item
item = iterator.get_next()
print(item)
print(item.numpy())
print(item[2].numpy())

NameError: in converted code:

    <ipython-input-30-7053574fa91f>:4 parse_function  *
        parsed_features = tf.io.parse_single_example(serialized=example_proto, features = Keys_to_features)

    NameError: name 'Keys_to_features' is not defined


### TFRecord Example2

In [32]:
filename = './students.tfrecords'
dataset = {
    'ID': 61553,
    'Name': ['Jones', 'Felicity'],
    'Scores': [45.6,97.2]
}

#### Using This, We can construct a tf.train.Example class, again using the Feature() method.  Note We have to encode our string.

In [33]:
ID = tf.train.Feature(int64_list = tf.train.Int64List(value = [dataset['ID']]))
ID

int64_list {
  value: 61553
}

In [34]:
Name = tf.train.Feature(bytes_list = tf.train.BytesList(value = [n.encode('utf-8') for n in dataset['Name']]))
Name

bytes_list {
  value: "Jones"
  value: "Felicity"
}

In [35]:
Scores = tf.train.Feature(float_list = tf.train.FloatList(value = dataset['Scores']))
Scores

float_list {
  value: 45.599998474121094
  value: 97.19999694824219
}

In [38]:
example = tf.train.Example(features = tf.train.Features(feature = {'ID': ID, 'Name' : Name, 'Scores' : Scores}))
example

features {
  feature {
    key: "ID"
    value {
      int64_list {
        value: 61553
      }
    }
  }
  feature {
    key: "Name"
    value {
      bytes_list {
        value: "Jones"
        value: "Felicity"
      }
    }
  }
  feature {
    key: "Scores"
    value {
      float_list {
        value: 45.599998474121094
        value: 97.19999694824219
      }
    }
  }
}

#### Serializing And Writing This Record To Disc Is The Same As TFRecord Example1:

In [39]:
writer_rec = tf.io.TFRecordWriter(filename)
writer_rec.write(example.SerializeToString())
writer_rec.close()

#### To read this back, we just need to construct our parse_function function to reflect the structure of the record.

In [40]:
data_set = tf.data.TFRecordDataset("./students.tfrecords")
def parse_function(example_proto):
    keys_to_features = {'ID' : tf.io.FixedLenFeature([], dtype = tf.int64),
    'Name' : tf.io.VarLenFeature(dtype = tf.string),
    'Scores' : tf.io.VarLenFeature(dtype = tf.float32)}
    parsed_features = tf.io.parse_single_example(serialized=example_proto, features=keys_to_features)
    return parsed_features["ID"], parsed_features["Name"], parsed_features["Scores"]

### Parsing The Data

In [41]:
dataset = data_set.map(parse_function)
iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
items = iterator.get_next()

### Records Is Retrived as One Item

In [42]:
print(items)

(<tf.Tensor: id=113942, shape=(), dtype=int64, numpy=61553>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x000001AF7D6ECE80>, <tensorflow.python.framework.sparse_tensor.SparseTensor object at 0x000001AF7D68C550>)


#### Now we can extract our data from item (note that the string must be decoded (from bytes) where the default for our python 3 is utf(8).

#### Note also that the string and the array if floats are returned as sparse arrays, and to extract them from the record, we use the sparse array value method.

In [46]:
item

<tf.Tensor: id=113829, shape=(), dtype=int32, numpy=41>

In [48]:
print("ID :", items[0].numpy())
name = items[1].values.numpy()
name1 = name[0].decode()
name2 = name[1].decode('utf8')
print("Name : ", name1, ",", name2)
print("Scores : ", items[2].values.numpy())

ID : 61553
Name :  Jones , Felicity
Scores :  [45.6 97.2]


#### One-Hot Encoding Example 1

In this example, we are converting a decimal value of 7 to a one-hot encoded value of 0000000100 using:
    
    the tf.one_hot() method:

In [49]:
z = 7
z_train_ohe = tf.one_hot(z, depth = 10).numpy()
print(z, "is ", z_train_ohe, "when one-hot encoded with a depth of 10")

7 is  [0. 0. 0. 0. 0. 0. 0. 1. 0. 0.] when one-hot encoded with a depth of 10


#### One-Hot Encoding Example 2 

Ussing The Fashion MNIST Dataset.

The original labels are integers from 0 to 9, so for example a label of 5 becomes 0000010000 when onehot encoded, but note the difference between the index and the label stored at that index

In [51]:
import tensorflow as tf
from tensorflow.python.keras.datasets import fashion_mnist

width, height = 28,28
# Total Classes
n_classes = 10

#### Loading The Dataset

In [52]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


#### Split Features training set into training and validation sets.

In [53]:
split = 50000
(y_train, y_valid) = y_train[:split], y_train[split:]

#### One-Hot encode the labels using Tensorflow then convert back to numpy for display

In [54]:
y_train_ohe = tf.one_hot(y_train, depth = n_classes).numpy()
y_valid_ohe = tf.one_hot(y_valid, depth = n_classes).numpy()
y_test_ohe = tf.one_hot(y_test, depth = n_classes).numpy()

# Show Difference between the original label and a one - hot- encoded label
i = 8
print(y_train[i])# "ordinary" number value of label at index i = 8 is 5
# Noth The Differecne between the index of 8 anmd the label at that index which is 5
print(y_train_ohe[i])

5
[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
