<a href="https://colab.research.google.com/github/LinCheungS/TensorFlow2_DL/blob/master/tensorflow1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

1.13.1
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 1.13.1
tensorflow._api.v1.keras 2.2.4-tf


## tf1_customized_estimator.

In [0]:
# https://storage.googleapis.com/tf-datasets/titanic/train.csv
# https://storage.googleapis.com/tf-datasets/titanic/eval.csv
train_file = "./data/titanic/train.csv"
eval_file = "./data/titanic/eval.csv"

train_df = pd.read_csv(train_file)
eval_df = pd.read_csv(eval_file)

print(train_df.head())
print(eval_df.head())

   survived     sex   age  n_siblings_spouses  parch     fare  class     deck  \
0         0    male  22.0                   1      0   7.2500  Third  unknown   
1         1  female  38.0                   1      0  71.2833  First        C   
2         1  female  26.0                   0      0   7.9250  Third  unknown   
3         1  female  35.0                   1      0  53.1000  First        C   
4         0    male  28.0                   0      0   8.4583  Third  unknown   

   embark_town alone  
0  Southampton     n  
1    Cherbourg     n  
2  Southampton     y  
3  Southampton     n  
4   Queenstown     y  
   survived     sex   age  n_siblings_spouses  parch     fare   class  \
0         0    male  35.0                   0      0   8.0500   Third   
1         0    male  54.0                   0      0  51.8625   First   
2         1  female  58.0                   0      0  26.5500   First   
3         1  female  55.0                   0      0  16.0000  Second   
4         

In [0]:
y_train = train_df.pop('survived')
y_eval = eval_df.pop('survived')

print(train_df.head())
print(eval_df.head())
print(y_train.head())
print(y_eval.head())

      sex   age  n_siblings_spouses  parch     fare  class     deck  \
0    male  22.0                   1      0   7.2500  Third  unknown   
1  female  38.0                   1      0  71.2833  First        C   
2  female  26.0                   0      0   7.9250  Third  unknown   
3  female  35.0                   1      0  53.1000  First        C   
4    male  28.0                   0      0   8.4583  Third  unknown   

   embark_town alone  
0  Southampton     n  
1    Cherbourg     n  
2  Southampton     y  
3  Southampton     n  
4   Queenstown     y  
      sex   age  n_siblings_spouses  parch     fare   class     deck  \
0    male  35.0                   0      0   8.0500   Third  unknown   
1    male  54.0                   0      0  51.8625   First        E   
2  female  58.0                   0      0  26.5500   First        C   
3  female  55.0                   0      0  16.0000  Second  unknown   
4    male  34.0                   0      0  13.0000  Second        D   

  

In [0]:
train_df.describe()

Unnamed: 0,age,n_siblings_spouses,parch,fare
count,627.0,627.0,627.0,627.0
mean,29.631308,0.545455,0.379585,34.385399
std,12.511818,1.15109,0.792999,54.59773
min,0.75,0.0,0.0,0.0
25%,23.0,0.0,0.0,7.8958
50%,28.0,0.0,0.0,15.0458
75%,35.0,1.0,0.0,31.3875
max,80.0,8.0,5.0,512.3292


In [0]:
categorical_columns = ['sex', 'n_siblings_spouses', 'parch', 'class',
                       'deck', 'embark_town', 'alone']
numeric_columns = ['age', 'fare']

feature_columns = []
for categorical_column in categorical_columns:
    vocab = train_df[categorical_column].unique()
    print(categorical_column, vocab)
    feature_columns.append(
        tf.feature_column.indicator_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                categorical_column, vocab)))

for categorical_column in numeric_columns:
    feature_columns.append(
        tf.feature_column.numeric_column(
            categorical_column, dtype=tf.float32))

sex ['male' 'female']
n_siblings_spouses [1 0 3 4 2 5 8]
parch [0 1 2 5 3 4]
class ['Third' 'First' 'Second']
deck ['unknown' 'C' 'G' 'A' 'B' 'D' 'F' 'E']
embark_town ['Southampton' 'Cherbourg' 'Queenstown' 'unknown']
alone ['n' 'y']


In [0]:
def make_dataset(data_df, label_df, epochs = 10, shuffle = True,
                 batch_size = 32):
    dataset = tf.data.Dataset.from_tensor_slices(
        (dict(data_df), label_df))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset.make_one_shot_iterator().get_next()

In [0]:
output_dir = "customized_estimator"
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

def model_fn(features, labels, mode, params):
    # model runtime state: Train, Eval, Predict
    input_for_next_layer = tf.feature_column.input_layer(
        features, params['feature_columns'])
    for n_unit in params['hidden_units']:
        input_for_next_layer = tf.layers.dense(input_for_next_layer,
                                               units = n_unit,
                                               activation = tf.nn.relu)
    logits = tf.layers.dense(input_for_next_layer,
                             params['n_classes'],
                             activation = None)
    predicted_classes = tf.argmax(logits, 1)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            "class_ids": predicted_classes[:, tf.newaxis],
            "probabilities": tf.nn.softmax(logits),
            "logits": logits
        }
        return tf.estimator.EstimatorSpec(mode,
                                          predictions = predictions)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels = labels,
                                                  logits = logits)
    accuracy = tf.metrics.accuracy(labels = labels,
                                   predictions = predicted_classes,
                                   name = "acc_op")
    metrics = {"accuracy": accuracy}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss = loss,
                                          eval_metric_ops = metrics)
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(
        loss, global_step = tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss = loss,
                                      train_op = train_op)

estimator = tf.estimator.Estimator(
    model_fn = model_fn,
    model_dir = output_dir,
    params = {
        "feature_columns": feature_columns,
        "hidden_units": [100, 100],
        "n_classes": 2
    })
estimator.train(input_fn = lambda : make_dataset(
    train_df, y_train, epochs = 100))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'customized_estimator', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12daf3080>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Colocations handled automatically by placer.
INFO:tensorflow:Calling model_fn.
Instructions for updating:
The old

<tensorflow_estimator.python.estimator.estimator.Estimator at 0x12da60ef0>

In [0]:
estimator.evaluate(lambda : make_dataset(
    eval_df, y_eval, epochs = 1))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-06-12T13:50:23Z
INFO:tensorflow:Graph was finalized.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from customized_estimator/model.ckpt-1960
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-06-12-13:50:23
INFO:tensorflow:Saving dict for global step 1960: accuracy = 0.7916667, global_step = 1960, loss = 0.5161865
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1960: customized_estimator/model.ckpt-1960


{'accuracy': 0.7916667, 'loss': 0.5161865, 'global_step': 1960}

## tf1_dataset

In [0]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)


1.13.1
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 1.13.1
tensorflow._api.v1.keras 2.2.4-tf


In [0]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(5000, 28, 28) (5000,)
(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)


In [0]:
print(np.max(x_train), np.min(x_train))

255 0


In [0]:
# x = (x - u) / std

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# x_train: [None, 28, 28] -> [None, 784]
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)

y_train = np.asarray(y_train, dtype = np.int64)
y_valid = np.asarray(y_valid, dtype = np.int64)
y_test = np.asarray(y_test, dtype = np.int64)


In [0]:
print(np.max(x_train_scaled), np.min(x_train_scaled))

2.023144 -0.8105139


In [0]:
def make_dataset(images, labels, epochs, batch_size, shuffle = True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset

In [0]:
batch_size = 20
epochs = 10
dataset = make_dataset(x_train_scaled, y_train,
                       epochs = epochs,
                       batch_size = batch_size)
for data, label in dataset.take(1):
    print(data)
    print(label)

RuntimeError: dataset.__iter__() is only supported when eager execution is enabled.

In [0]:
batch_size = 20
epochs = 10
dataset = make_dataset(x_train_scaled, y_train,
                       epochs = epochs,
                       batch_size = batch_size)
# 1. auto initialization
# 2. can't be re-initialized. make_initializable_iterator
dataset_iter = dataset.make_one_shot_iterator()
x, y = dataset_iter.get_next()
with tf.Session() as sess:
    x_val, y_val = sess.run([x, y])
    print(x_val.shape)
    print(y_val.shape)

(20, 784)
(20,)


In [0]:
hidden_units = [100, 100]
class_num = 10

input_for_next_layer = x
for hidden_unit in hidden_units:
    input_for_next_layer = tf.layers.dense(input_for_next_layer,
                                           hidden_unit,
                                           activation=tf.nn.relu)
logits = tf.layers.dense(input_for_next_layer,
                         class_num)
# last_hidden_output * W(logits) -> softmax -> prob
# 1. logit -> softmax -> prob
# 2. labels -> one_hot
# 3. calculate cross entropy
loss = tf.losses.sparse_softmax_cross_entropy(labels = y,
                                              logits = logits)
# get accuracy.
prediction = tf.argmax(logits, 1)
correct_prediction = tf.equal(prediction, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))

train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [0]:
print(x)
print(logits)

Tensor("IteratorGetNext:0", shape=(?, 784), dtype=float32)
Tensor("dense_2/BiasAdd:0", shape=(?, 10), dtype=float32)


In [0]:
# session

init = tf.global_variables_initializer()
train_steps_per_epoch = x_train.shape[0] // batch_size

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs):
        for step in range(train_steps_per_epoch):
            loss_val, accuracy_val, _ = sess.run(
                [loss, accuracy, train_op])
            print('\r[Train] epoch: %d, step: %d, loss: %3.5f, accuracy: %2.2f' % (
                epoch, step, loss_val, accuracy_val), end="")

[Train] epoch: 9, step: 2749, loss: 0.21062, accuracy: 0.90

## dense_network

In [0]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)


1.13.1
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 1.13.1
tensorflow._api.v1.keras 2.2.4-tf


In [0]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(5000, 28, 28) (5000,)
(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)


In [0]:
print(np.max(x_train), np.min(x_train))

255 0


In [0]:
# x = (x - u) / std

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# x_train: [None, 28, 28] -> [None, 784]
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)


In [0]:
print(np.max(x_train_scaled), np.min(x_train_scaled))

2.023144 -0.8105139


In [0]:
hidden_units = [100, 100]
class_num = 10

x = tf.placeholder(tf.float32, [None, 28 * 28])
y = tf.placeholder(tf.int64, [None])

input_for_next_layer = x
for hidden_unit in hidden_units:
    input_for_next_layer = tf.layers.dense(input_for_next_layer,
                                           hidden_unit,
                                           activation=tf.nn.relu)
logits = tf.layers.dense(input_for_next_layer,
                         class_num)
# last_hidden_output * W(logits) -> softmax -> prob
# 1. logit -> softmax -> prob
# 2. labels -> one_hot
# 3. calculate cross entropy
loss = tf.losses.sparse_softmax_cross_entropy(labels = y,
                                              logits = logits)
# get accuracy.
prediction = tf.argmax(logits, 1)
correct_prediction = tf.equal(prediction, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))

train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [0]:
print(x)
print(logits)

Tensor("Placeholder:0", shape=(?, 784), dtype=float32)
Tensor("dense_2/BiasAdd:0", shape=(?, 10), dtype=float32)


In [0]:
# session

init = tf.global_variables_initializer()
batch_size = 20
epochs = 10
train_steps_per_epoch = x_train.shape[0] // batch_size
valid_steps = x_valid.shape[0] // batch_size

def eval_with_sess(sess, x, y, accuracy, images, labels, batch_size):
    eval_steps = images.shape[0] // batch_size
    eval_accuracies = []
    for step in range(eval_steps):
        batch_data = images[step * batch_size : (step+1) * batch_size]
        batch_label = labels[step * batch_size : (step+1) * batch_size]
        accuracy_val = sess.run(accuracy,
                                feed_dict = {
                                    x: batch_data,
                                    y: batch_label
                                })
        eval_accuracies.append(accuracy_val)
    return np.mean(eval_accuracies)

with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epochs):
        for step in range(train_steps_per_epoch):
            batch_data = x_train_scaled[
                step * batch_size : (step+1) * batch_size]
            batch_label = y_train[
                step * batch_size : (step+1) * batch_size]
            loss_val, accuracy_val, _ = sess.run(
                [loss, accuracy, train_op],
                feed_dict = {
                    x: batch_data,
                    y: batch_label
                })
            print('\r[Train] epoch: %d, step: %d, loss: %3.5f, accuracy: %2.2f' % (
                epoch, step, loss_val, accuracy_val), end="")
        valid_accuracy = eval_with_sess(sess, x, y, accuracy,
                                        x_valid_scaled, y_valid,
                                        batch_size)
        print("\t[Valid] acc: %2.2f" % (valid_accuracy))

[Train] epoch: 0, step: 2749, loss: 0.29409, accuracy: 0.85	[Valid] acc: 0.86
[Train] epoch: 1, step: 2749, loss: 0.23783, accuracy: 0.90	[Valid] acc: 0.87
[Train] epoch: 2, step: 2749, loss: 0.16557, accuracy: 0.90	[Valid] acc: 0.87
[Train] epoch: 3, step: 2749, loss: 0.16518, accuracy: 0.85	[Valid] acc: 0.88
[Train] epoch: 4, step: 2749, loss: 0.18367, accuracy: 0.90	[Valid] acc: 0.88
[Train] epoch: 5, step: 2749, loss: 0.18992, accuracy: 0.95	[Valid] acc: 0.88
[Train] epoch: 6, step: 2749, loss: 0.13003, accuracy: 0.95	[Valid] acc: 0.88
[Train] epoch: 7, step: 2749, loss: 0.19065, accuracy: 0.95	[Valid] acc: 0.89
[Train] epoch: 8, step: 2749, loss: 0.12379, accuracy: 0.90	[Valid] acc: 0.88
[Train] epoch: 9, step: 2749, loss: 0.11657, accuracy: 0.95	[Valid] acc: 0.89


## initialized_dataset

In [0]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)


1.13.1
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.0.3
numpy 1.16.2
pandas 0.24.2
sklearn 0.20.3
tensorflow 1.13.1
tensorflow._api.v1.keras 2.2.4-tf


In [0]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_all, y_train_all), (x_test, y_test) = fashion_mnist.load_data()
x_valid, x_train = x_train_all[:5000], x_train_all[5000:]
y_valid, y_train = y_train_all[:5000], y_train_all[5000:]

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(5000, 28, 28) (5000,)
(55000, 28, 28) (55000,)
(10000, 28, 28) (10000,)


In [0]:
print(np.max(x_train), np.min(x_train))

255 0


In [0]:
# x = (x - u) / std

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# x_train: [None, 28, 28] -> [None, 784]
x_train_scaled = scaler.fit_transform(
    x_train.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_valid_scaled = scaler.transform(
    x_valid.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)
x_test_scaled = scaler.transform(
    x_test.astype(np.float32).reshape(-1, 1)).reshape(-1, 28 * 28)

y_train = np.asarray(y_train, dtype = np.int64)
y_valid = np.asarray(y_valid, dtype = np.int64)
y_test = np.asarray(y_test, dtype = np.int64)


In [0]:
print(np.max(x_train_scaled), np.min(x_train_scaled))

2.023144 -0.8105139


In [0]:
def make_dataset(images, labels, epochs, batch_size, shuffle = True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        dataset = dataset.shuffle(10000)
    dataset = dataset.repeat(epochs).batch(batch_size)
    return dataset

In [0]:
batch_size = 20
epochs = 10

images_placeholder = tf.placeholder(tf.float32, [None, 28 * 28])
labels_placeholder = tf.placeholder(tf.int64, (None,))

dataset = make_dataset(images_placeholder, labels_placeholder,
                       epochs = epochs,
                       batch_size = batch_size)

dataset_iter = dataset.make_initializable_iterator()
x, y = dataset_iter.get_next()
with tf.Session() as sess:
    sess.run(dataset_iter.initializer,
             feed_dict = {
                 images_placeholder: x_train_scaled,
                 labels_placeholder: y_train
             })
    x_val, y_val = sess.run([x, y])
    print(x_val.shape)
    print(y_val.shape)
    sess.run(dataset_iter.initializer,
             feed_dict = {
                 images_placeholder: x_valid_scaled,
                 labels_placeholder: y_valid,
             })
    x_val, y_val = sess.run([x, y])
    print(x_val.shape)
    print(y_val.shape)

Instructions for updating:
Colocations handled automatically by placer.
(20, 784)
(20,)
(20, 784)
(20,)
