# 4주차, 5일차 : 1-3주차 튜토리얼 작성하기
- ### Contents 
    1. Basic classification: Classify images of clothing : https://www.tensorflow.org/tutorials/keras/basic_classification
    2. Text classification with TensorFlow Hub: Movie reviews : https://www.tensorflow.org/tutorials/keras/basic_text_classification
    3. Basic regression: Predict fuel efficiency : https://www.tensorflow.org/tutorials/keras/basic_regression
    4. Build a linear model with Estimators : https://www.tensorflow.org/tutorials/estimator/linear
    5. Overfit And Underfit : https://www.tensorflow.org/tutorials/keras/overfit_and_underfit
    6. Load CSV Data : https://www.tensorflow.org/tutorials/load_data/csv
    7. TFRecord and tf.Example : https://www.tensorflow.org/tutorials/load_data/tfrecord
    8. Save and load models : https://www.tensorflow.org/tutorials/keras/save_and_restore_models
    9. Better performance with the tf.data API: https://www.tensorflow.org/guide/data_performance
    10. Time Series Forecasting: https://www.tensorflow.org/tutorials/structured_data/time_series
    11. Text Classification with an RNN: https://www.tensorflow.org/tutorials/text/text_classification_rnn
    12. Distributed training with Keras: https://www.tensorflow.org/tutorials/distribute/keras



## 1. Basic classification: Classify images of clothing

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

In [None]:
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

In [None]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker',
              'Bag', 'Ankel boot']

In [None]:
# Expore the data
train_images.shape

In [None]:
len(train_labels)

In [None]:
train_labels

In [None]:
test_images.shape

In [None]:
len(test_labels)

In [None]:
# preprocess the data
plt.figure()
plt.imshow(train_images[0])
plt.colorbar()
plt.grid(False)
plt.show()

In [None]:
train_images = train_images / 255.0
test_images  = test_images  / 255.0

In [None]:
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
# Build the model
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [None]:
# Compile th model
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

In [None]:
# Train the model
model.fit(train_images, train_labels, epochs=10)

In [None]:
# Evaluate accuracy
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)

print('\nTest accuracy:', test_acc)

In [None]:
# Make predictions
predictions = model.predict(test_images)

In [None]:
predictions[0]

In [None]:
np.argmax(predictions[0])

In [None]:
test_labels[0]

In [None]:
def plot_image(i, predictions_array, true_label, img):
    predictions_array, true_label, img = predictions_array, true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    
    plt.imshow(img, cmap=plt.cm.binary)
    
    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'
        
    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                        100*np.max(predictions_array),
                                        class_names[true_label]),
                                        color=color)
    
def plot_value_array(i, predictions_array, true_label):
    predictions_array, true_label = predictions_array, true_label[i]
    plt.grid(False)
    plt.xticks(range(10))
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color='#777777')
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)
    
    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')

In [None]:
i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions[i], test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions[i], test_labels)
plt.show()

In [None]:
i = 12
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions[i], test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions[i], test_labels)
plt.show()

In [None]:
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, predictions[i], test_labels, test_images)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, predictions[i], test_labels)
plt.tight_layout()
plt.show()

In [None]:
img = test_images[i]

print(img.shape)

In [None]:
img = (np.expand_dims(img,0))
img.shape

In [None]:
predictions_single = model.predict(img)

predictions_single

In [None]:
plot_value_array(1, predictions_single[0], test_labels)
_ = plt.xticks(range(10), class_names, rotation=45)

In [None]:
np.argmax(predictions_single[0])

## 2. Test classification with TensorFlow Hub: Movie reviews

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import tensorflow as tf

import tensorflow_hub as hub
import tensorflow_datasets as tfds

print('Version: ', tf.__version__)
print('Eager mode: ', tf.executing_eagerly())
print('Hub version: ', hub.__version__)
print('GPU is', 'avaliable' if tf.config.experimental.list_physical_devices('GPU') else 'NOT AVALIABLE')

In [None]:
# Download the IMDB dataset
train_validation_split = tfds.Split.TRAIN.subsplit([6, 4])

(train_data, validation_data), test_data = tfds.load(
    name='imdb_reviews',
    split=(train_validation_split, tfds.Split.TEST),
    as_supervised=True
)

In [None]:
# Explore the data
train_examples_batch, train_labels_batch = next(iter(train_data.batch(10)))
train_examples_batch

In [None]:
train_labels_batch

In [None]:
# Build the model
embedding = "https://tfhub.dev/google/tf2-preview/gnews-swivel-20dim/1"
hub_layer = hub.KerasLayer(embedding, input_shape=[],
                          dtype=tf.string, trainable=True)
hub_layer(train_examples_batch[:3])

In [None]:
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
# Loss function and optimizer
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_data.shuffle(10000).batch(512),
                   epochs=20,
                   validation_data=validation_data.batch(512),
                   verbose=1)

In [None]:
# Evaluate the model
results = model.evaluate(test_data.batch(512), verbose=2)

for name, value in zip(model.metrics_names, results):
    print('%s: %.3f' % (name, value))

## 3. Basic regression: Predict fuel efficiency

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

In [None]:
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [None]:
# The Auto MPG dataset
# Get the data
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path

In [None]:
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',
               'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                         na_values='?', comment='\t', sep=' ', skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.tail()

In [None]:
# Clean the data
dataset.isna().sum()

In [None]:
dataset = dataset.dropna()

In [None]:
dataset['Origin'] = dataset['Origin'].map(lambda x : {1:'USA', 2:'Europe', 3:'Japan'}.get(x))

In [None]:
dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
dataset.tail()

In [None]:
# Split the data into train and test
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset  = dataset.drop(train_dataset.index)

In [None]:
# Inspect the data
sns.pairplot(train_dataset[['MPG', 'Cylinders', 'Displacement', 'Weight']], diag_kind='kde')

In [None]:
train_stats = train_dataset.describe()
train_stats.pop('MPG')
train_stats = train_stats.transpose()
train_stats

In [None]:
# Split features from labels

In [None]:
train_labels = train_dataset.pop('MPG')
test_labels  = test_dataset.pop('MPG')

In [None]:
# Normalize the data
def norm(x):
    return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset)
normed_test_data  = norm(test_dataset)

In [None]:
# The model 
# Build the model
def build_model():
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss='mse',
                 optimizer=optimizer,
                 metrics=['mae', 'mse'])
    return model

In [None]:
model = build_model()

In [None]:
# Inspect the model
model.summary()

In [None]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result

In [None]:
# Train the model
EPOCHS = 1000

history = model.fit(
    normed_train_data, train_labels,
    epochs=EPOCHS, validation_split=0.2, verbose=0,
    callbacks=[tfdocs.modeling.EpochDots()]
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)

In [None]:
plotter.plot({'Basic': history}, metric='mae')
plt.ylim([0, 10])
plt.ylabel('MAE [MPG]')

In [None]:
plotter.plot({'Basic': history}, metric='mse')
plt.ylim([0, 20])
plt.ylabel('MSE [MPG^2]')

In [None]:
model = build_model()

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)

early_history = model.fit(normed_train_data, train_labels,
                         epochs=EPOCHS, validation_split=0.2, verbose=0,
                         callbacks=[early_stop, tfdocs.modeling.EpochDots()])

In [None]:
plotter.plot({'Early Stopping': early_history}, metric='mae')
plt.ylim([0, 10])
plt.ylabel('MAE [MPG]')

In [None]:
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=2)

print('Testing set Mean Abs Error: {:5.2f} MPG'.format(mae))

In [None]:
# Make predictions

In [None]:
test_predictions = model.predict(normed_test_data).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
_ = plt.plot(lims, lims)

In [None]:
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel('Prediction Error [MPG]')
_ = plt.ylabel('Count')

## 4. Build a linear model with Estimators

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

In [None]:
# Load the titanic dataset
import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf

In [None]:
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

In [None]:
# Explore the data
dftrain.head()

In [None]:
dftrain.describe()

In [None]:
dftrain.shape[0], dfeval.shape[0]

In [None]:
dftrain.age.hist(bins=20)

In [None]:
dftrain.sex.value_counts().plot(kind='barh')

In [None]:
dftrain['class'].value_counts().plot(kind='barh')

In [None]:
pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')

In [None]:
# Feature Engineering for the Model
CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                      'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, 
                                                                                    vocabulary))
for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

In [None]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
    def input_function():
        ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
        if shuffle:
            ds = ds.shuffle(1000)
        ds = ds.batch(batch_size).repeat(num_epochs)
        return ds
    return input_function

train_input_fn = make_input_fn(dftrain, y_train)
eval_input_fn  = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)

In [None]:
ds = make_input_fn(dftrain, y_train, batch_size=10)()
for feature_batch, label_batch in ds.take(1):
    print('Some feature keys:', list(feature_batch.keys()))
    print()
    print('A batch of class:', feature_batch['class'].numpy())
    print()
    print('A batch of Labels:', label_batch.numpy())

In [None]:
age_column = feature_columns[7]
tf.keras.layers.DenseFeatures([age_column])(feature_batch).numpy()

In [None]:
gender_column = feature_columns[0]
tf.keras.layers.DenseFeatures([tf.feature_column.indicator_column(gender_column)])(feature_batch).numpy()

In [None]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)

clear_output()
print(result)

In [None]:
age_x_gender = tf.feature_column.crossed_column(['age', 'sex'], hash_bucket_size=100)

In [None]:
derived_feature_columns = [age_x_gender]
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns+derived_feature_columns)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)

clear_output()
print(result)

In [None]:
pred_dicts = list(linear_est.predict(eval_input_fn))
probs = pd.Series([pred['probabilities'][1] for pred in pred_dicts])

probs.plot(kind='hist', bins=20, title='predicted probabilities')

In [None]:
from sklearn.metrics import roc_curve
from matplotlib import pyplot as plt

fpr, tpr, _ = roc_curve(y_eval, probs)
plt.plot(fpr, tpr)
plt.title('ROC curve')
plt.xlabel('false positive rate')
plt.ylabel('true positive rate')
plt.xlim(0,)
plt.ylim(0,)

# 5. Overfit and underfit

In [None]:
# Setup
from __future__ import absolute_import, division, print_function, unicode_literals

%tensorflow_version 2.x

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import regularizers

print(tf.__version__)

In [None]:
!pip install -q git+https://github.com/tensorflow/docs

import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots

In [None]:
from IPython import display
from matplotlib import pyplot as plt

import numpy as np

import pathlib
import shutil
import tempfile

In [None]:
logdir = pathlib.Path(tempfile.mkdtemp())/'tensorboard_logs'
shutil.rmtree(logdir, ignore_errors=True)

In [None]:
# The Higgs Dataset
gz = tf.keras.utils.get_file('HIGGS.csv.gz',
                             'https://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz')

In [None]:
FEATURES = 28

In [None]:
ds = tf.data.experimental.CsvDataset(gz,[float(),]*(FEATURES+1), compression_type='GZIP')

In [None]:
def pack_row(*row):
  label = row[0]
  features = tf.stack(row[1:], 1)
  return features, label

In [None]:
packed_ds = ds.batch(10000).map(pack_row).unbatch()

In [None]:
for features, label in packed_ds.batch(1000).take(1):
  print(features[0])
  plt.hist(features.numpy().flatten(), bins=101)

In [None]:
N_VALIDATION = int(1e3)
N_TRAIN = int(1e4)
BUFFER_SIZE = int(1e4)
BATCH_SIZE = 500
STEPS_PER_EPOCH = N_TRAIN//BATCH_SIZE

In [None]:
validate_ds = packed_ds.take(N_VALIDATION).cache()
train_ds = packed_ds.skip(N_VALIDATION).take(N_TRAIN).cache()

In [None]:
train_ds

In [None]:
validate_ds = validate_ds.batch(BATCH_SIZE)
train_ds = train_ds.shuffle(BUFFER_SIZE).repeat().batch(BATCH_SIZE)

In [None]:
# Demonstrate overfitting
# Training precedure
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
    0.001,
    decay_steps=STEPS_PER_EPOCH*1000,
    decay_rate=1,
    staircase=False
)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

In [None]:
step = np.linspace(0, 100000)
lr = lr_schedule(step)
plt.figure(figsize=(8,6))
plt.plot(step/STEPS_PER_EPOCH, lr)
plt.ylim([0, max(plt.ylim())])
plt.xlabel('Epoch')
_ = plt.ylabel('Learning Rate')

In [None]:
def get_callbacks(name):
  return [
          tfdocs.modeling.EpochDots(),
          tf.keras.callbacks.EarlyStopping(monitor='val_binary_crossentropy', patience=200),
          tf.keras.callbacks.TensorBoard(logdir/name),
  ]

In [None]:
def compile_and_fit(model, name, optimizer=None, max_epochs=10000):
  if optimizer is None:
    optimizer = get_optimizer()
  model.compile(optimizer=optimizer,
                loss='binary_crossentropy',
                metrics=['accuracy', 'binary_crossentropy'])
  model.summary()

  history = model.fit(
      train_ds,
      steps_per_epoch = STEPS_PER_EPOCH,
      epochs=max_epochs,
      validation_data=validate_ds,
      callbacks=get_callbacks(name),
      verbose=0
  )
  return history

In [None]:
# Tiny model
tiny_model = tf.keras.Sequential([
                                  layers.Dense(16, activation='elu', input_shape=(FEATURES,)),
                                  layers.Dense(1, activation='sigmoid')
])

In [None]:
size_histories = {}

In [None]:
size_histories['Tiny'] = compile_and_fit(tiny_model, 'sizes/Tiny')

In [None]:
plotter = tfdocs.plots.HistoryPlotter(metric = 'binary_crossentropy', smoothing_std=10)
plotter.plot(size_histories)
plt.ylim([0.5, 0.7])

In [None]:
# Small model
small_model = tf.keras.Sequential([
  # 'input_shape'를 입력해야 .summary()가 작동한다.
  layers.Dense(16, activation='elu', input_shape=(FEATURES,)),
  layers.Dense(16, activation='elu'),
  layers.Dense(1, activation='sigmoid')
])

In [None]:
size_histories['Small'] = compile_and_fit(small_model, 'sizes/Small')

In [None]:
# Medium model
medium_model = tf.keras.Sequential([
  layers.Dense(64, activation='elu', input_shape=(FEATURES, )),
  layers.Dense(64, activation='elu'),
  layers.Dense(64, activation='elu'),
  layers.Dense(1,  activation='sigmoid')
])

In [None]:
size_histories['Medium'] = compile_and_fit(medium_model, 'sizes/Medium')

In [None]:
# Large model
large_model = tf.keras.Sequential([
  layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
  layers.Dense(512, activation='elu'),
  layers.Dense(512, activation='elu'),
  layers.Dense(512, activation='elu'),
  layers.Dense(1, activation='sigmoid')
])

In [None]:
size_histories['large'] = compile_and_fit(large_model, 'sizes/large')

In [None]:
# Plot the training and validation losses
plotter.plot(size_histories)
a = plt.xscale('log')
plt.xlim([5, max(plt.xlim())])
plt.ylim([0.5, 0.7])
plt.xlabel('Epochs [Log Scale]')

In [None]:
# View in Tensorboard
 
%tensorboard --logdir {logdir}/sizes

In [None]:
display.IFrame(
    src="https://tensorboard.dev/experiment/vW7jmmF9TmKmy3rbheMQpw/#scalars&_smoothingWeight=0.97",
    width="100%", height="800px"
)

In [None]:
!tensorbard dev upload --logdir {logdir}/sizes

In [None]:
#Strategies to prevent overfitting
shutil.rmtree(logdir/'regularizers/Tiny', ignore_errors=True)
shutil.copytree(logdir/'sizes/Tiny', logdir/'regularizers/Tiny')

In [None]:
regularizer_histories={}
regularizer_histories['Tiny'] = size_histories['Tiny']

In [None]:
# Add weight regularization
l2_model = tf.keras.Sequential([
  layers.Dense(512, activation='elu',
               kernel_regularizer=regularizers.l2(0.001),
               input_shape=(FEATURES, )),
  layers.Dense(512, activation='elu',
               kernel_regularizer=regularizers.l2(0.001)),
  layers.Dense(512, activation='elu',
               kernel_regularizer=regularizers.l2(0.001)),
  layers.Dense(512, activation='elu',
               kernel_regularizer=regularizers.l2(0.001)),
  layers.Dense(1, activation='sigmoid')
])

regularizer_histories['l2'] = compile_and_fit(l2_model, 'regularizers/l2')

In [None]:
plotter.plot(regularizer_histories)
plt.ylim([0.5, 0.7])

In [None]:
result = l2_model(features)
regularization_loss = tf.add_n(l2_model.losses)

In [None]:
# Add dropout
dropout_model = tf.keras.Sequential([
  layers.Dense(512, activation='elu', input_shape=(FEATURES,)),
  layers.Dropout(0.5),
  layers.Dense(512, activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(512, activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(512, activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(1, activation='sigmoid')
])

regularizer_histories['dropout'] = compile_and_fit(dropout_model, 'regularizers/dropout')

In [None]:
plotter.plot(regularizer_histories)
plt.ylim([0.5, 0.7])

In [None]:
# Combined L2 + dropout
combined_model = tf.keras.Sequential([
  layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
               activation='elu', input_shape=(FEATURES, )),
  layers.Dropout(0.5),
  layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
               activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
               activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(512, kernel_regularizer=regularizers.l2(0.0001),
               activation='elu'),
  layers.Dropout(0.5),
  layers.Dense(1, activation='sigmoid')
])

regularizer_histories['combined'] = compile_and_fit(combined_model, 'regularizers/combined')

In [None]:
plotter.plot(regularizer_histories)
plt.ylim([0.5, 0.7])

In [None]:
%tensorboard --logdir {logdir}/regularizers

In [None]:
display.IFrame(
    src="https://tensorboard.dev/experiment/fGInKDo8TXes1z7HQku9mw/#scalars&_smoothingWeight=0.97",
    width = "100%",
    height="800px")

# 6. Load CSV data

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import functools

import numpy as np
import tensorflow as tf

In [None]:
TRAIN_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/train.csv"
TEST_DATA_URL = "https://storage.googleapis.com/tf-datasets/titanic/eval.csv"

train_file_path = tf.keras.utils.get_file("train.csv", TRAIN_DATA_URL)
test_file_path = tf.keras.utils.get_file("eval.csv", TEST_DATA_URL)

In [None]:
np.set_printoptions(precision=3, suppress=True)

In [None]:
# Load data
!head {train_file_path}

In [None]:
LABEL_COLUMN = 'survived'
LABELS = [0, 1]

In [None]:
def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
   file_path,
   batch_size=5,
   label_name=LABEL_COLUMN,
   na_value="?",
   num_epochs=1,
   ignore_errors=True,
   **kwargs   
  )

  return dataset

raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)

In [None]:
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in  batch.items():
      print('{:20s}: {}'.format(key, value.numpy()))

In [None]:
show_batch(raw_train_data)

In [None]:
CSV_COLUMNS = ['survived', 'sex', 'age', 'n_siblings_spouses', 'parch',
               'fare', 'class', 'deck', 'embark_town', 'alone']
temp_dataset = get_dataset(train_file_path, column_names=CSV_COLUMNS)

show_batch(temp_dataset)

In [None]:
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'class', 'deck', 'alone']

temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)

show_batch(temp_dataset)

In [None]:
# Data preprocessing
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'parch', 'fare']
DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]
temp_dataset = get_dataset(train_file_path,
                           select_columns=SELECT_COLUMNS,
                           column_defaults=DEFAULTS)

show_batch(temp_dataset)

In [None]:
example_batch , labels_batch = next(iter(temp_dataset))

In [None]:
def pack(features, label):
  return tf.stack(list(features.values()), axis=-1), label


In [None]:
packed_dataset = temp_dataset.map(pack)

for features, labels in packed_dataset.take(1):
  print(features.numpy())
  print()
  print(labels.numpy())

In [None]:
show_batch(raw_train_data)

In [None]:
example_batch, labels_batch = next(iter(temp_dataset))

In [None]:
class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names

  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features

    return features, labels

In [None]:
NUMERIC_FEATURES = ['age', 'n_siblings_spouses', 'parch', 'fare']

packed_train_data = raw_train_data.map(
    PackNumericFeatures(NUMERIC_FEATURES)
)

packed_test_data = raw_test_data.map(
    PackNumericFeatures(NUMERIC_FEATURES)
)

In [None]:
show_batch(packed_train_data)

In [None]:
example_batch, labels_batch = next(iter(packed_train_data))

In [None]:
# Data Normalization
import pandas as pd
desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()
desc

In [None]:
MEAN = np.array(desc.T['mean'])
STD  = np.array(desc.T['std'])

In [None]:
def normalize_numeric_data(data, mean, std):
  return (data-mean)/std

In [None]:
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)

numeric_column = tf.feature_column.numeric_column('numeric', 
                                                  normalizer_fn=normalizer, 
                                                  shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_column

In [None]:
example_batch['numeric']

In [None]:
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
numeric_layer(example_batch).numpy()

In [None]:
# Categorical data
CATEGORIES = {
    'sex' : ['male', 'female'],
    'class' : ['First', 'Second', 'Third'],
    'deck' : ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
    'embark_town' : ['Cherbourg', 'Southhampton', 'Queenstown'],
    'alone' : ['y', 'n']
}

In [None]:
categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = tf.feature_column.categorical_column_with_vocabulary_list(
      key=feature, vocabulary_list=vocab
  )
  categorical_columns.append(tf.feature_column.indicator_column(cat_col))

In [None]:
categorical_columns

In [None]:
categorical_layer = tf.keras.layers.DenseFeatures(categorical_columns)
print(categorical_layer(example_batch).numpy()[0])

In [None]:
# Combine preprocessing layer
preprocessing_layer = tf.keras.layers.DenseFeatures(categorical_columns+numeric_columns)

In [None]:
print(preprocessing_layer(example_batch).numpy()[0])

In [None]:
# Build the model
model = tf.keras.Sequential([
  preprocessing_layer,
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [None]:
# Train, evlauate, and predict
train_data = packed_train_data.shuffle(500)
test_data  = packed_test_data

In [None]:
model.fit(train_data, epochs=20)

In [None]:
test_loss, test_accuracy = model.evaluate(test_data)

print('\n\nTest Loss: {}, Test Accuracy: {}'.format(test_loss, test_accuracy))

In [None]:
predictions = model.predict(test_data)

for prediction, survived in zip(predictions[:10], list(test_data)[0][1][:10]):
  print('Predicted survival: {:.2%}'.format(prediction[0]),
        '| Acutal outcome: ',
        ('SURVIVED' if bool(survived) else 'DIED'))

# 7. TFRecord and tf.Example

In [None]:
# Setup

from __future__ import absolute_import, division, print_function, unicode_literals

%tensorflow_version 2.x

import tensorflow as tf

import numpy as np
import IPython.display as display

In [None]:
def _bytes_feature(value):
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy()
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
print(_bytes_feature(b'test_string'))
print(_bytes_feature(u'test_bytes'.encode('utf-8')))

print(_float_feature(np.exp(1)))

print(_int64_feature(True))
print(_int64_feature(1))

In [None]:
feature = _float_feature(np.exp(1))

feature.SerializeToString()

In [None]:
# Creating a tf.Example message
n_observations = int(1e4)

feature0 = np.random.choice([False, True], n_observations)

feature1 = np.random.randint(0, 5, n_observations)

strings = np.array([b'cat', b'dog', b'chicken', b'horse', b'goat'])
feature2 = strings[feature1]

feature3 = np.random.randn(n_observations)

In [None]:
def serialize_example(feature0, feature1, feature2, feature3):
  feature = {
      'feature0' : _int64_feature(feature0),
      'feature1' : _int64_feature(feature1),
      'feature2' : _bytes_feature(feature2),
      'feature3' : _float_feature(feature3),
  }

  example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
  return example_proto.SerializeToString()

In [None]:
example_obsevation = []

serialized_example = serialize_example(False, 4, b'goat', 0.9876)
serialized_example

In [None]:
example_proto = tf.train.Example.FromString(serialized_example)
example_proto

In [None]:
# TFRecord files using tf.data
tf.data.Dataset.from_tensor_slices(feature1)

In [None]:
features_dataset = tf.data.Dataset.from_tensor_slices((feature0, feature1,
                                                       feature2, feature3))
features_dataset

In [None]:
for f0,f1,f2,f3 in features_dataset.take(1):
  print(f0)
  print(f1)
  print(f2)
  print(f3)

In [None]:
# 이 함수랑 serialize_example 함수랑은 뭔 차이인가.. 단순히 파이썬 함수를 텐서플로우 함수로 매핑해주는걸까..
def tf_serialize_example(f0,f1,f2,f3):
  tf_string = tf.py_function(
      serialize_example,
      (f0,f1,f2,f3),
      tf.string
  )
  return tf.reshape(tf_string, ())

In [None]:
tf_serialize_example(f0,f1,f2,f3)

In [None]:
serialized_features_dataset = features_dataset.map(tf_serialize_example)
serialized_features_dataset

In [None]:
def generator():
  for features in features_dataset:
    yield serialize_example(*features)

In [None]:
serialized_features_dataset = tf.data.Dataset.from_generator(
    generator, output_types=tf.string, output_shapes=()
)

In [None]:
serialized_features_dataset

In [None]:
filename = 'test.tfrecord'
writer = tf.data.experimental.TFRecordWriter(filename)
writer.write(serialized_features_dataset)

In [None]:
# Reading a TFRecord file
filenames = [filename]
raw_dataset = tf.data.TFRecordDataset(filenames)
raw_dataset

In [None]:
for raw_record in raw_dataset.take(10):
  print(repr(raw_record))

In [None]:
feature_description = {
    'feature0' : tf.io.FixedLenFeature([], tf.int64, default_value=0),
    'feature1' : tf.io.FixedLenFeature([], tf.int64, default_value=0),
    'feature2' : tf.io.FixedLenFeature([], tf.string, default_value=''),
    'feature3' : tf.io.FixedLenFeature([], tf.float32, default_value=0.0)
}

def _parse_function(example_proto):
  return tf.io.parse_single_example(example_proto, feature_description)

In [None]:
parsed_dataset = raw_dataset.map(_parse_function)
parsed_dataset

In [None]:
for parsed_record in parsed_dataset.take(10):
  print(repr(parsed_record))

In [None]:
# TFRecord files in Python
# Writing a TFRecord file
with tf.io.TFRecordWriter(filename) as writer:
  for i in range(n_observations):
    example = serialize_example(feature0[i], feature1[i], feature2[i], feature3[i])
    writer.write(example)

In [None]:
# Reading a TFRecored file
filenames = [filename]
raw_dataset = tf.data.TFRecordDataset(filename)
raw_dataset

In [None]:
for raw_record in raw_dataset.take(1):
  example = tf.train.Example()
  example.ParseFromString(raw_record.numpy())
  print(example)

In [None]:
# Walkthrough: Reading and writing image data
cat_in_snow  = tf.keras.utils.get_file('320px-Felis_catus-cat_on_snow.jpg', 'https://storage.googleapis.com/download.tensorflow.org/example_images/320px-Felis_catus-cat_on_snow.jpg')
williamsburg_bridge = tf.keras.utils.get_file('194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg','https://storage.googleapis.com/download.tensorflow.org/example_images/194px-New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg')

In [None]:
display.display(display.Image(filename=cat_in_snow))
display.display(display.HTML('Image cc-by: <a "href=https://commons.wikimedia.org/wiki/File:Felis_catus-cat_on_snow.jpg">Von.grzanka</a>'))

In [None]:
display.display(display.Image(filename=williamsburg_bridge))
display.display(display.HTML('<a "href=https://commons.wikimedia.org/wiki/File:New_East_River_Bridge_from_Brooklyn_det.4a09796u.jpg">From Wikimedia</a>'))

In [None]:
# Write the TFREcord file
image_labels = {
    cat_in_snow : 0,
    williamsburg_bridge : 1,
}

In [None]:
image_string = open(cat_in_snow, 'rb').read()

label = image_labels[cat_in_snow]

def image_example(image_string, label):
  image_shape = tf.image.decode_jpeg(image_string).shape

  feature = {
      'height': _int64_feature(image_shape[0]),
      'width': _int64_feature(image_shape[1]),
      'depth': _int64_feature(image_shape[2]),
      'label': _int64_feature(label),
      'image_raw': _bytes_feature(image_string)
  }

  return tf.train.Example(features=tf.train.Features(feature=feature))

for line in str(image_example(image_string, label)).split('\n')[:5]:
  print(line)
print('...')

In [None]:
record_file = 'images.tfrecords'
with tf.io.TFRecordWriter(record_file) as writer:
  for filename, label in image_labels.items():
    image_string = open(filename, 'rb').read()
    tf_example = image_example(image_string, label)
    writer.write(tf_example.SerializeToString())

In [None]:
# Read the TFRecord file
raw_image_dataset = tf.data.TFRecordDataset('images.tfrecords')

image_feature_description = {
    'height': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'depth': tf.io.FixedLenFeature([], tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64),
    'image_raw': tf.io.FixedLenFeature([], tf.string),
}

def _parse_image_function(example_proto):
  return tf.io.parse_single_example(example_proto, image_feature_description)

parsed_image_dataset = raw_image_dataset.map(_parse_image_function)
parsed_image_dataset

In [None]:
for image_features in parsed_image_dataset:
  image_raw = image_features['image_raw'].numpy()
  display.display(display.Image(data=image_raw))

## 8. Save and load models

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os

import tensorflow as tf
from tensorflow import keras

print(tf.__version__)

In [None]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

train_labels = train_labels[:1000]
test_labels  = test_labels[:1000]

train_images = train_images[:1000].reshape(-1, 28*28) / 255.0
test_images  = test_images[:1000].reshape(-1, 28*28) / 255.0


In [None]:
# Define a model
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation='softmax')
    ])
    
    model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    
    return model

model = create_model()

model.summary()

In [None]:
# Save checkpoints during training

checkpoint_path = 'training_1/cp.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, 
                                                 save_weights_only=True,
                                                 verbose=1)

model.fit(train_images,
         train_labels,
         epochs=10,
         validation_data=(test_images, test_labels),
         callbacks=[cp_callback])


In [None]:
model = create_model()

loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print('Untrained model, accuracy: {:5.2f}%'.format(100*acc))

In [None]:
model.load_weights(checkpoint_path)

loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))

In [None]:
# Checkpoint callback options
checkpoint_path = 'tranining_2/cp-{epoch:04d}.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5
)

model = create_model()

model.save_weights(checkpoint_path.format(epoch=0))

model.fit(train_images,
         train_labels,
         epochs=50,
         callbacks=[cp_callback],
         validation_data = (test_images, test_labels),
         verbose=0)

In [None]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

In [None]:
model = create_model()

model.load_weights(latest)

loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))

In [None]:
# Maully save weights
model.save_weights('./checkpoints/my_checkpoint')

model = create_model()

model.load_weights('./checkpoints/my_checkpoint')

loss, acc = model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))

In [None]:
# Save the entire model
model = create_model()
model.fit(train_images, train_labels, epochs=5)

model.save('my_model.h5')

In [None]:
new_model = tf.keras.models.load_model('my_model.h5')

new_model.summary()

In [None]:
loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))

In [None]:
# SavedModel format

model = create_model()
model.fit(train_images, train_labels, epochs=5)

!mkdir -p saved_model
model.save('saved_model/my_model')

In [None]:
new_model = tf.keras.models.load_model('saved_model/my_model')

new_model.summary()

In [None]:
loss, acc = new_model.evaluate(test_images, test_labels, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100*acc))

print(new_model.predict(test_images).shape)

## 9. Better performance with the tf.data API

In [None]:
# Setup
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import time

In [None]:
# the dataset
class ArtificialDataset(tf.data.Dataset):
    def _generator(num_samples):
        time.sleep(0.03)
        
        for sample_idx in range(num_samples):
            time.sleep(0.015)
            
            yield (sample_idx, )
            
        
    def __new__(cls, num_samples=3):
        return tf.data.Dataset.from_generator(
            cls._generator,
            output_types=tf.dtypes.int64,
            output_shapes=(1,),
            args=(num_samples,)
        )

In [None]:
# The traning loop
def benchmark(dataset, num_epochs=2):
    start_time = time.perf_counter()
    for epoch_num in range(num_epochs):
        for sample in dataset:
            time.sleep(0.01)
    tf.print('Execution time:', time.perf_counter() - start_time)

In [None]:
# Optimize perfomance
benchmark(ArtificialDataset())

In [None]:
# Prefetching
benchmark(
    ArtificialDataset()
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [None]:
# Parallelizeing data extraction
# Sequential interleave
benchmark(
    tf.data.Dataset.range(2)
    .interleave(ArtificialDataset)
)

In [None]:
# Parallel interleave
benchmark(
    tf.data.Dataset.range(2)
    .interleave(
            ArtificialDataset,
            num_parallel_calls=tf.data.experimental.AUTOTUNE
    )
)

In [None]:
# Parallelizing data transformation
def mapped_function(s):
    tf.py_function(lambda: time.sleep(0.03), [], ())
    return s

In [None]:
# Sequential mapping
benchmark(
    ArtificialDataset()
    .map(mapped_function)
)

In [None]:
# Parallel mapping
benchmark(
    ArtificialDataset()
    .map(
        mapped_function,
        num_parallel_calls=tf.data.experimental.AUTOTUNE
    )
)

In [None]:
# Caching
benchmark(
    ArtificialDataset()
    .map(
        mapped_function
    ).cache(
    ), 5
)

In [None]:
# Vectorizing mapping
fast_dataset = tf.data.Dataset.range(10000)

def fast_benchmark(dataset, num_epochs=2):
    start_time = time.perf_counter()
    for _ in tf.data.Dataset.range(num_epochs):
        for _ in dataset:
            pass
    
    tf.print('Execution time:', time.perf_counter() - start_time)
    
def increment(x):
    return x+1

In [None]:
# Scalar mapping
fast_benchmark(
    fast_dataset
    .map(increment)
    .batch(256)
)

In [None]:
# Vectorized mapping
fast_benchmark(
    fast_dataset
    .batch(256)
    .map(increment)
)

## 10. Time series forecasting

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
    # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)


import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

In [None]:
# the weather dataset
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)

In [None]:
df = pd.read_csv(csv_path)

In [None]:
df.head()

In [None]:
def univariate_data(dataset, start_index, end_index, history_size, target_size):
    data = []
    labels = []
    
    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size
        
    
    for i in range(start_index, end_index):
        indices = range(i-history_size, i)
        
        data.append(np.reshape(dataset[indices], (history_size, 1)))
        labels.append(dataset[i+target_size])
    return np.array(data), np.array(labels)

In [None]:
TRAIN_SPLIT = 30000

In [None]:
tf.random.set_seed(13)

In [None]:
# Part 1: Forecast a univariate time series
uni_data = df['T (degC)']
uni_data.index = df['Date Time']
uni_data.head()

In [None]:
uni_data.plot(subplots=True)

In [None]:
uni_data = uni_data.values

In [None]:
uni_train_mean = uni_data[:TRAIN_SPLIT].mean()
uni_train_std  = uni_data[:TRAIN_SPLIT].std()

In [None]:
uni_data = (uni_data - uni_train_mean)/uni_train_std

In [None]:
univariate_past_history = 20
univariate_future_target = 0

x_train_uni, y_train_uni = univariate_data(uni_data, 0, TRAIN_SPLIT,
                                          univariate_past_history,
                                          univariate_future_target)
x_val_uni, y_val_uni = univariate_data(uni_data, TRAIN_SPLIT, None,
                                      univariate_past_history,
                                      univariate_future_target)

In [None]:
print('Single sindow of past history')
print(x_train_uni[0])
print('\n Target temperature to predict')
print(y_train_uni[0])

In [None]:
def create_time_steps(length):
    time_steps = []
    for i in range(-length, 0, 1):
        time_steps.append(i)
    return time_steps

In [None]:
def show_plot(plot_data, delta, title):
    labels = ['History', 'True Future', 'Model Prediction']
    marker = ['.-', 'rx', 'go']
    time_steps = create_time_steps(plot_data[0].shape[0])
    
    if delta:
        future = delta
    else:
        future = 0
    
    plt.title(title)
    for i, x in enumerate(plot_data):
        if i:
            plt.plot(future, plot_data[i], marker[i], markersize=10,
                    label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])
    plt.legend()
    plt.xlim([time_steps[0], (future+5)*2])
    plt.xlabel('Time-Step')
    return plt

In [None]:
show_plot([x_train_uni[0], y_train_uni[0]], 0, 'Sample Example')

In [None]:
# Baseline
def baseline(history):
    return np.mean(history)

In [None]:
show_plot([x_train_uni[0], y_train_uni[0], baseline(x_train_uni[0])], 0,
         'Baseline Prediction Example')

In [None]:
# Recurrent neural network
BATCH_SIZE = 256
BUFFER_SIZE = 10000

train_univariate = tf.data.Dataset.from_tensor_slices((x_train_uni, y_train_uni))
train_univariate = train_univariate.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_univariate = tf.data.Dataset.from_tensor_slices((x_val_uni, y_val_uni))
val_univariate = val_univariate.batch(BATCH_SIZE).repeat()

In [None]:
simple_lstm_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8, input_shape=x_train_uni.shape[-2:]),
    tf.keras.layers.Dense(1)
])

simple_lstm_model.compile(optimizer='adam', loss='mae')

In [None]:
for x, y in val_univariate.take(1):
    print(simple_lstm_model.predict(x).shape)

In [None]:
EVALUATION_INTERVAL = 200
EPOCHS = 10

simple_lstm_model.fit(train_univariate, epochs=EPOCHS,
                     steps_per_epoch=EVALUATION_INTERVAL,
                     validation_data=val_univariate, validation_steps=50)

In [None]:
# Predict using the simple LSTM model
for x, y in val_univariate.take(3):
    plot = show_plot([x[0].numpy(), y[0].numpy(),
                     simple_lstm_model.predict(x)[0]], 0, 'Simple LSTM model')
    plot.show()




In [None]:
# Part 2: forecast a multivariate time series
features_considered = ['p (mbar)', 'T (degC)', 'rho (g/m**3)']

In [None]:
features = df[features_considered]
features.index = df['Date Time']
features.head()

In [None]:
features.plot(subplots=True)

In [None]:
dataset = features.values
data_mean = dataset[:TRAIN_SPLIT].mean(axis=0)
data_std  = dataset[:TRAIN_SPLIT].std(axis=0)

In [None]:
dataset = (dataset - data_mean) / data_std

In [None]:
# Single step model
def multivariate_data(dataset, target, start_index, end_index, history_size,
                     target_size, step, single_step=False):
    data = []
    labels = []
    
    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size
        
    for i in range(start_index, end_index):
        indices = range(i - history_size, i, step)
        data.append(dataset[indices])
        
        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])
            
    
    return np.array(data), np.array(labels)

In [None]:
past_history = 720
future_target = 72
STEP = 6

x_train_single, y_train_single = multivariate_data(dataset, dataset[:, 1], 0,
                                                  TRAIN_SPLIT, past_history,
                                                  future_target, STEP,
                                                  single_step=True)
x_val_single, y_val_single = multivariate_data(dataset, dataset[:, 1], TRAIN_SPLIT,
                                              None, past_history,
                                              future_target, STEP,
                                              single_step=True)

In [None]:
print('Single window of past history : {}'.format(x_train_single[0].shape))

In [None]:
train_data_single = tf.data.Dataset.from_tensor_slices((x_train_single, y_train_single))
train_data_single = train_data_single.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_single = tf.data.Dataset.from_tensor_slices((x_val_single, y_val_single))
val_data_single = val_data_single.batch(BATCH_SIZE).repeat()

In [None]:
single_step_model = tf.keras.models.Sequential()
single_step_model.add(tf.keras.layers.LSTM(32,
                                           input_shape=x_train_single.shape[-2:]))
single_step_model.add(tf.keras.layers.Dense(1))

single_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')

In [None]:
for x, y in val_data_single.take(1):
    print(single_step_model.predict(x).shape)

In [None]:
single_step_history = single_step_model.fit(train_data_single, epochs=EPOCHS,
                                           steps_per_epoch=EVALUATION_INTERVAL,
                                           validation_data=val_data_single,
                                           validation_steps=50)

In [None]:
def plot_train_history(history, title):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(loss))
    
    plt.figure()
    
    plt.plot(epochs, loss, 'b', label='Training Loss')
    plt.plot(epochs, val_loss, 'r', label='Validation Loss')
    plt.title(title)
    plt.legend()
    
    plt.show()

In [None]:
plot_train_history(single_step_history,
                  'Single Step Training and Validation Loss')

In [None]:
# Predict a single step future
for x, y in val_data_single.take(3):
    plot = show_plot([x[0][:, 1].numpy(), y[0].numpy(),
                     single_step_model.predict(x)[0]], 12,
                    'Single Step Prediction')
    plot.show()

In [None]:
# Multi Step Prediction
future_target = 72
x_train_multi, y_train_multi = multivariate_data(dataset, dataset[:, 1], 0,
                                                TRAIN_SPLIT, past_history,
                                                future_target, STEP)
x_val_multi, y_val_multi = multivariate_data(dataset, dataset[:, 1], TRAIN_SPLIT,
                                            None, past_history,
                                            future_target, STEP)

In [None]:
print('Single window of past history : {}'.format(x_train_multi[0].shape))
print('\n Target temperature to predict : {}'.format(y_train_multi[0].shape))

In [None]:
train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))
train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))
val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()

In [None]:
def multi_step_plot(history, true_future, prediction):
    plt.figure(figsize=(12, 6))
    num_in = create_time_steps(len(history))
    num_out = len(true_future)
    
    plt.plot(num_in, np.array(history[:, 1]), label='History')
    plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',
            label='True Furue')
    if prediction.any():
        plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro',
                label='Prediction Future')
    plt.legend(loc='upper left')
    plt.show()

In [None]:
for x, y in train_data_multi.take(1):
    multi_step_plot(x[0], y[0], np.array([0]))

In [None]:
multi_step_model = tf.keras.models.Sequential()
multi_step_model.add(tf.keras.layers.LSTM(32,
                                         return_sequences=True,
                                         input_shape=x_train_multi.shape[-2:]))
multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))
multi_step_model.add(tf.keras.layers.Dense(72))

multi_step_model.compile(optimizer=tf.keras.optimizers.RMSprop(clipvalue=1.0), loss='mae')

In [None]:
for x, y in val_data_multi.take(1):
    print (multi_step_model.predict(x).shape)

In [None]:
multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,
                                         steps_per_epoch=EVALUATION_INTERVAL,
                                         validation_data=val_data_multi,
                                         validation_steps=50)

In [None]:
plot_train_history(multi_step_history, 'Multi-Step Training and Validation Loss')

In [None]:
for x, y in val_data_multi.take(3):
    multi_step_plot(x[0], y[0], multi_step_model.predict(x)[0])

## 11. Text classification with an RNN

In [None]:
#Setup
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow_datasets as tfds
import tensorflow as tf

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, string):
    plt.plot(history.history[string])
    plt.plot(history.history['val_' + string])
    plt.xlabel('Epochs')
    plt.ylabel(string)
    plt.legend([string, 'val_'+string])
    plt.show()

In [None]:
#Setup input pipeline
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                         as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [None]:
encoder = info.features['text'].encoder

In [None]:
print('Vocabulary size: {}'.format(encoder.vocab_size))

In [None]:
sample_string = 'Hello Tensorflow'

encoded_string = encoder.encode(sample_string)
print('Encoded string is {}'.format(encoded_string))

original_string = encoder.decode(encoded_string)
print('The original string: {}'.format(original_string))

In [None]:
assert original_string == sample_string

In [None]:
for index in encoded_string:
    print('{} ----> {}'.format(index, encoder.decode([index])))

In [None]:
# Prepare the data for training
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)

train_output_shape = tf.compat.v1.data.get_output_shapes(train_dataset)  # 추가
test_output_shape = tf.compat.v1.data.get_output_shapes(test_dataset)  # 추가

train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_output_shape)

test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_output_shape)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss='binary_crossentropy',
             optimizer=tf.keras.optimizers.Adam(1e-4),
             metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_dataset, epochs=10,
                   validation_data=test_dataset,
                   validation_steps=30)

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))

In [None]:
def pad_to_size(vec, size):
    zeros = [0] * (size - len(vec))
    vec.extend(zeros)
    return vec

In [None]:
def sample_predict(sentence, pad):
    encoded_sample_pred_text = encoder.encode(sample_pred_text)
    
    if pad:
        encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0))
    
    return (predictions)

In [None]:
sample_pred_text = ('The movie was cool. The animation and the graphics '
                    'were out of this world. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=False)
print(predictions)

In [None]:
sample_pred_text = ('The movie was cool. The animation and the graphics '
                    'were out of this world. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=True)
print(predictions)

In [None]:
plot_graphs(history, 'accuracy')

In [None]:
plot_graphs(history, 'loss')

In [None]:
# Stack two or more LSTM Layers
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss='binary_crossentropy',
             optimizers=tf.keras.optimizers.Adam(1e-4),
             metircs=['accuracy'])

In [None]:
history = model.fit(train_dataset, epochs=10,
                   validation_data=test_dataset,
                   validation_steps=30)

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))

In [None]:
sample_pred_text = ('The movie was not good. The animation and the graphics '
                    'were terrible. I would not recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=False)
predictions

In [None]:
sample_pred_text = ('The movie was not good. The animation and the graphics '
                    'were terrible. I would not recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=True)
predictions

In [None]:
plot_graphs(history, 'accuracy')

In [None]:
plot_graphs(history,'loss')

## 12. Distributed training with Keras

In [None]:
# Import dependencies
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow_datasets as tfds
import tensorflow as tf
tfds.disable_progress_bar()

import os

In [None]:
# Download the dataset
datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True)

mnist_train, mnist_test = datasets['train'], datasets['test']

In [None]:
strategy = tf.distribute.MirroredStrategy()

In [None]:
print('Number of deviced: {}'.format(strategy.num_replicas_in_sync))

In [None]:
# Setup input pipeline
num_train_examples = info.splits['train'].num_examples
num_test_examples = info.splits['test'].num_examples

BUFFER_SIZE = 10000

BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

In [None]:
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0
    
    return image, label

In [None]:
train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
eval_dataset  = mnist_test.map(scale).batch(BATCH_SIZE)

In [None]:
# Create the model
with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    
    model.compile(loss='sparse_categorical_crossentropy',
                 optimizer=tf.keras.optimizers.Adam(),
                 metrics=['accuracy'])

In [None]:
# Define the callbacks
checkpoint_dir = './training_checkpoints'

checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')

In [None]:
def decay(epoch):
    if epoch < 3:
        return 1e-3
    elif epoch >= 3 and epoch < 7:
        return 1e-4
    else :
        return 1e-5

In [None]:
class PrintLR(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print('\nLearning rate for epoch {} is {}'.format(epoch + 1,
                                                         model.optimizer.lr.numpy()))

In [None]:
callbacks = [
    tf.keras.callbacks.TensorBoard(log_dir='./logs'),
    tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix,
                                       save_weights_only=True),
    tf.keras.callbacks.LearningRateScheduler(decay),
    PrintLR()
]

In [None]:
# Train and Evaluate
model.fit(train_dataset, epochs=12, callbacks=callbacks)

In [None]:
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

eval_loss, eval_acc = model.evaluate(eval_dataset)

print('Eval Loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc))

In [None]:
# Export to SavedModel
path = 'saved_model/'
model.save(path, save_format('tf'))

# Load the model without strategy.scope
unreplicated_model = tf.keras.models.load_model(path)

unreplicated_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(),
    metrics=['accuracy']
)

eval_loss, eval_acc = unreplicated_model.evaluate(eval_dataset)

print('Eval Loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc))

In [None]:
# Load the model with strategy.scope
with strategy.scope():
    replicated_model = tf.keras.models.load_model(path)
    replicated_model.compile(loss='sparse_categorical_crossentropy',
                            optimizer=tf.keras.optimizers.Adam(),
                            metrics=['accuracy'])
    
    eval_loss, eval_acc = replicated_model.evaluate(eval_dataset)
    print('Eval Loss: {}, Eval Accuracy: {}'.format(eval_loss, eval_acc))