<a href="https://colab.research.google.com/github/GuptaNavdeep1983/DeepLearningRepo/blob/main/TensorFlow3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import functools

import numpy as np
import tensorflow as tf

print("TensorFlow version: ",tf.version.VERSION)

TensorFlow version:  2.3.0


In [30]:
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file("test.csv", TEST_DATA_URL)

In [31]:
np.set_printoptions(precision=3, suppress=True)

## Load data

This section provides an example of how to load CSV data from a file into a `tf.data.Dataset`.  The data used in this tutorial are taken from the Titanic passenger list. The model will predict the likelihood a passenger survived based on characteristics like age, gender, ticket class, and whether the person was traveling alone.

To start, let's look at the top of the CSV file to see how it is formatted.

In [32]:
!head {train_file_path}

survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
1,female,35.0,1,0,53.1,First,C,Southampton,n
0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y
0,male,2.0,3,1,21.075,Third,unknown,Southampton,n
1,female,27.0,0,2,11.1333,Third,unknown,Southampton,n
1,female,14.0,1,0,30.0708,Second,unknown,Cherbourg,n
1,female,4.0,1,1,16.7,Third,G,Southampton,n


In [33]:
LABEL_COLUMN = 'survived'
LABELS = [0, 1]

In [34]:
def get_dataset(file_path, **kwargs):
 # TODO 2 
 # Use `tf.data.experimental.make_csv_dataset()` to read CSV files into a dataset.
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5, # Artificially small to make examples easier to show.
      label_name=LABEL_COLUMN,
      na_value="?",
      num_epochs=1,
      ignore_errors=True,
      **kwargs)
  return dataset

raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)

In [35]:
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
      print("{:20s}: {}".format(key,value.numpy()))

In [36]:
show_batch(raw_train_data)

sex                 : [b'male' b'male' b'female' b'male' b'male']
age                 : [51. 11. 27. 36. 28.]
n_siblings_spouses  : [0 0 0 1 0]
parch               : [0 0 0 2 0]
fare                : [ 12.525  18.788  10.5   120.     26.55 ]
class               : [b'Second' b'Third' b'Second' b'First' b'First']
deck                : [b'unknown' b'unknown' b'E' b'B' b'C']
embark_town         : [b'Southampton' b'Cherbourg' b'Southampton' b'Southampton' b'Southampton']
alone               : [b'y' b'y' b'y' b'n' b'y']


In [37]:
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'parch', 'fare']
DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]
temp_dataset = get_dataset(train_file_path, 
                           select_columns=SELECT_COLUMNS,
                           column_defaults = DEFAULTS)

show_batch(temp_dataset)

age                 : [28. 45. 49. 52. 29.]
n_siblings_spouses  : [1. 0. 1. 1. 1.]
parch               : [0. 0. 0. 0. 0.]
fare                : [82.171 35.5   76.729 78.267 26.   ]


In [38]:
example_batch, labels_batch = next(iter(temp_dataset)) 
labels_batch

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 0, 0, 1, 1], dtype=int32)>

In [39]:
# `pack()` function will pack together all the columns
def pack(features, label):
# `tf.stack()` stacks a list of rank-R tensors into one rank-(R+1) tensor.
  return tf.stack(list(features.values()), axis=-1), label

In [40]:
packed_dataset = temp_dataset.map(pack)
packed_dataset

<MapDataset shapes: ((None, 4), (None,)), types: (tf.float32, tf.int32)>

In [41]:
for features, labels in packed_dataset.take(1):
  print(features.numpy())
  print()
  print(labels.numpy())

[[30.     0.     0.    13.   ]
 [53.     2.     0.    51.479]
 [28.     0.     0.     8.05 ]
 [31.     1.     0.    57.   ]
 [20.     0.     0.     7.05 ]]

[0 1 0 1 0]


In [42]:
class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels

In [43]:
NUMERIC_FEATURES = ['age','n_siblings_spouses','parch', 'fare']

packed_train_data = raw_train_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

packed_test_data = raw_test_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

In [44]:
show_batch(packed_train_data)

sex                 : [b'male' b'female' b'female' b'female' b'male']
class               : [b'Second' b'Third' b'First' b'First' b'Third']
deck                : [b'unknown' b'unknown' b'D' b'unknown' b'unknown']
embark_town         : [b'Southampton' b'Queenstown' b'Cherbourg' b'Cherbourg' b'Southampton']
alone               : [b'y' b'n' b'n' b'n' b'y']
numeric             : [[ 66.      0.      0.     10.5  ]
 [ 28.      1.      0.     15.5  ]
 [ 31.      1.      0.    113.275]
 [ 54.      1.      0.     59.4  ]
 [ 28.      0.      0.      7.896]]


In [52]:
example_batch, labels_batch = next(iter(packed_train_data)) 

In [45]:
# pandas is used for data manipulation and analysis.
import pandas as pd
# pandas module read_csv() function reads the CSV file into a DataFrame object.
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
desc

Unnamed: 0,age,n_siblings_spouses,parch,fare
count,627.0,627.0,627.0,627.0
mean,29.631308,0.545455,0.379585,34.385399
std,12.511818,1.15109,0.792999,54.59773
min,0.75,0.0,0.0,0.0
25%,23.0,0.0,0.0,7.8958
50%,28.0,0.0,0.0,15.0458
75%,35.0,1.0,0.0,31.3875
max,80.0,8.0,5.0,512.3292


In [47]:
MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])
MEAN, STD

(array([29.631,  0.545,  0.38 , 34.385]),
 array([12.512,  1.151,  0.793, 54.598]))

In [48]:
def normalize_numeric_data(data, mean, std):
# TODO 2
  # Center the data
  return (data-mean)/std

In [49]:
print(MEAN, STD)

[29.631  0.545  0.38  34.385] [12.512  1.151  0.793 54.598]


In [50]:
# See what you just created.
# Bind the MEAN and STD to the normalizer fn using `functools.partial`
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

# `tf.feature_column.numeric_column()` represents real valued or numerical features.
numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_column

NumericColumn(key='numeric', shape=(4,), default_value=None, dtype=tf.float32, normalizer_fn=functools.partial(<function normalize_numeric_data at 0x7f3af1c8f378>, mean=array([29.631,  0.545,  0.38 , 34.385]), std=array([12.512,  1.151,  0.793, 54.598])))

In [53]:
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()

array([[ 1.388, -0.474, -0.479, -0.497],
       [ 2.427,  0.395, -0.479,  0.748],
       [-0.13 , -0.474, -0.479, -0.487],
       [-0.13 ,  2.132,  0.782, -0.163],
       [-1.489,  0.395,  2.043,  1.568]], dtype=float32)

In [54]:
CATEGORIES = {
    'sex': ['male', 'female'],
    'class' : ['First', 'Second', 'Third'],
    'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
    'embark_town' : ['Cherbourg', 'Southhampton', 'Queenstown'],
    'alone' : ['y', 'n']
}


In [56]:
categorical_columns = []
for feature, vocab in CATEGORIES.items():
# Use the `tf.feature_column` API to create a collection with a `tf.feature_column.indicator_column` for each categorical column.
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
        key=feature, vocabulary_list=vocab)
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))

In [57]:
categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])

[1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]


In [58]:
# Add the two feature column collections
# Pass them to a `tf.keras.layers.DenseFeatures()` to create an input layer.
# TODO 1
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

In [59]:
print(preprocessing_layer(example_batch).numpy()[0])

[ 1.     0.     0.     0.     1.     0.     0.     0.     0.     0.
  0.     0.     0.     0.     0.     0.     0.     0.     1.388 -0.474
 -0.479 -0.497  1.     0.   ]


In [60]:
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'

# `tf.keras.utils.get_file()` downloads a file from a URL if it not already in the cache.
path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
with np.load(path) as data:
# TODO 1
  train_examples = data['x_train']
  train_labels = data['y_train']
  test_examples = data['x_test']
  test_labels = data['y_test']

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [61]:
# With the help of `tf.data.Dataset.from_tensor_slices()` method, we can get the slices of an array in the form of objects.
# by using `tf.data.Dataset.from_tensor_slices()` method.
# TODO 2
train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))