In [20]:
import tensorflow as tf
import datetime
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import pandas as pd
import numpy as np

In [21]:
%load_ext tensorboard

In [11]:
from tensorboard import notebook
notebook.list()

Known TensorBoard instances:
  - port 6006: logdir logs (started 0:19:48 ago; pid 10452)
  - port 6006: logdir logs (started 2:16:58 ago; pid 11084)
  - port 6006: logdir logs/gradient_tape (started 2:08:15 ago; pid 13116)
  - port 6006: logdir logs (started 2:16:11 ago; pid 13992)
  - port 6006: logdir gradient_tape (started 2:19:39 ago; pid 14040)
  - port 6006: logdir logs/fit (started 2:07:30 ago; pid 14816)
  - port 6006: logdir logs (started 2:19:00 ago; pid 15352)
  - port 6006: logdir logs/fit (started 0:18:56 ago; pid 17892)
  - port 6006: logdir logs/ (started 6 days, 6:27:05 ago; pid 20004)
  - port 6006: logdir logs/fit (started 8 days, 13:11:21 ago; pid 25060)
  - port 6006: logdir logs/ (started 8 days, 12:56:06 ago; pid 25172)
  - port 6006: logdir logs/image (started 0:20:34 ago; pid 2588)
  - port 6006: logdir logs (started 0:07:02 ago; pid 3860)
  - port 6006: logdir logs/gradient_tape (started 2:25:00 ago; pid 3880)
  - port 6006: logdir logs/gradient_tape (started 2

In [17]:
!taskkill /F /PID 2588

SUCCESS: The process with PID 2588 has been terminated.


In [18]:
!rd /s /q logs

In [19]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"

df = pd.read_excel(url)

In [22]:
df_train = df.sample(frac=0.8, random_state=42)
df_test = df.drop(df_train.index)

In [23]:
x_train, y_train = df_train.iloc[:, :-1], df_train.iloc[:, -1]

In [24]:
x_test, y_test = df_test.iloc[:, :-1], df_test.iloc[:, -1]

In [25]:
drop_rate = .1
lr = .001
hidden_size = 16
epochs = 250
val_frac = .25
metric = keras.losses.MeanSquaredError()
regularizer = 'l1_l2'
initializer = 'he_normal'

In [26]:
normalizer = layers.Normalization(axis=-1)
dense = layers.Dense(hidden_size, activation='relu', kernel_initializer=initializer, kernel_regularizer=regularizer)
dropout = layers.Dropout(drop_rate)
output_dense = layers.Dense(1, kernel_initializer=initializer, kernel_regularizer=regularizer)

In [27]:
model = keras.Sequential([
    normalizer,
    dense,
    dropout,
    output_dense
])

In [28]:
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=lr),
    loss=metric)

In [29]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.fit(x=x_train, 
          y=y_train, 
          epochs=100,
          verbose=0,
          validation_data=(x_test, y_test), 
          callbacks=[tensorboard_callback])

<keras.callbacks.History at 0x19b8368db20>

In [30]:
base_path = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/'

red = pd.read_csv(f'{base_path}winequality-red.csv', sep=';')
white = pd.read_csv(f'{base_path}winequality-white.csv', sep=';')
red['color'] = 1
white['color'] = 0
df = pd.concat([red, white])
df['color'] = df.color.astype(np.int64)

In [31]:
df_train = df.sample(frac=0.8, random_state=42)
df_test = df.drop(df_train.index)

In [32]:
x_train, y_train = df_train.iloc[:, :-2], df_train.iloc[:, -1:]

In [33]:
x_test, y_test = df_test.iloc[:, :-2], df_test.iloc[:, -1:]

In [41]:
import matplotlib.pyplot as plt

def plot_confusion_matrix(cm, class_names):

    figure = plt.figure(figsize=(8, 8))
    plt.imshow(cm)
    plt.title("Confusion matrix")
    plt.colorbar()

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    return figure

In [47]:
import io

def plot_to_image(figure):
    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    plt.close(figure)
    buf.seek(0)
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    image = tf.expand_dims(image, 0)
    return image

In [48]:
logdir = "logs/image/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
file_writer_cm = tf.summary.create_file_writer(logdir + '/cm')

In [49]:
from sklearn.metrics import confusion_matrix

def log_confusion_matrix(epoch, logs):
    test_pred_raw = model.predict(x_test)
    test_pred = np.argmax(test_pred_raw, axis=1)

    cm = confusion_matrix(y_test, test_pred)
    figure = plot_confusion_matrix(cm, class_names=['white', 'red'])
    cm_image = plot_to_image(figure)

    with file_writer_cm.as_default():
        tf.summary.image("Confusion Matrix", cm_image, step=epoch)

cm_callback = keras.callbacks.LambdaCallback(on_epoch_end=log_confusion_matrix)

model = keras.Sequential([
    layers.Normalization(axis=-1),
    layers.Dense(hidden_size, activation='relu'),
    layers.Dense(2, activation='relu')
])

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=lr),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=tf.metrics.SparseCategoricalAccuracy()
)

model.fit(
    x_train, y_train,
    epochs=5,
    verbose=0,
    callbacks=[tensorboard_callback, cm_callback],
    validation_data=(x_test, y_test),
)

<keras.callbacks.History at 0x19b8e918940>

In [50]:
(train_dataset, test_dataset), info = tfds.load('ag_news_subset', with_info=True,
                          as_supervised=True, split=['train[:25%]', 'test[:25%]'])

In [51]:
BATCH_SIZE = 64
train_dataset = train_dataset.batch(BATCH_SIZE)
test_dataset = test_dataset.batch(BATCH_SIZE)

In [52]:
VOCAB_SIZE = 1000
encoder = keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text, label: text))

In [53]:
labels = np.unique(np.concatenate([label for text, label in train_dataset], axis=0))

In [54]:
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=1e-4)
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')

In [55]:
def train_step(model, optimizer, x_train, y_train):
    with tf.GradientTape() as tape:
        predictions = model(x_train, training=True)
        loss = loss_fn(y_train, predictions)
        
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    train_loss(loss)
    train_accuracy(y_train, predictions)

def test_step(model, x_test, y_test):
    predictions = model(x_test)
    loss = loss_fn(y_test, predictions)

    test_loss(loss)
    test_accuracy(y_test, predictions)

In [56]:
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [57]:
epochs = 3

model = keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    layers.LSTM(16),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(labels))
])

for epoch in range(epochs):
    for (x_train, y_train) in train_dataset:
        train_step(model, optimizer, x_train, y_train)
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

    for (x_test, y_test) in test_dataset:
        test_step(model, x_test, y_test)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=epoch)
        tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)

    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

In [58]:
from tensorboard.plugins.hparams import api as hp

In [59]:
HP_LR = hp.HParam('learning_rate', hp.Discrete([1e-1, 1e-2, 1e-3, 1e-4, 1e-5]))

METRIC_ACCURACY = 'accuracy'

with tf.summary.create_file_writer('logs/hparam_tuning').as_default():
    hp.hparams_config(
        hparams=[HP_LR],
        metrics=[hp.Metric(METRIC_ACCURACY, display_name='Accuracy')],
    )

In [60]:
def train_test_model(hparams):
    model = keras.Sequential([
    encoder,
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        mask_zero=True),
    layers.LSTM(16),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(labels))
])
    model.compile(
      optimizer=keras.optimizers.Adam(learning_rate=hparams[HP_LR]),
      loss='sparse_categorical_crossentropy',
      metrics=['accuracy'],
    )

    model.fit(x_train, y_train, epochs=1)
    _, accuracy = model.evaluate(x_test, y_test)
    return accuracy

In [61]:
epochs = 2

def run(run_dir, hparams):
    with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)
        accuracy = train_test_model(hparams)
        tf.summary.scalar(METRIC_ACCURACY, accuracy, step=epochs)

In [62]:
for i, lr in enumerate(HP_LR.domain.values):
    hparams = {HP_LR: lr}
    run(f'logs/hparam_tuning/{i}', hparams)



















In [84]:
from tensorboard.plugins import projector
import os

log_dir=os.path.join('logs', 'nlp')
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
    
with open(os.path.join(log_dir, 'metadata.tsv'), "w") as f:
    for subwords in encoder.get_vocabulary():
        f.write(f"{subwords}\n")

weights = tf.Variable(model.layers[1].get_weights()[0][1:])
checkpoint = tf.train.Checkpoint(embedding=weights)
checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))

config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
embedding.metadata_path = 'metadata.tsv'
projector.visualize_embeddings(log_dir, config)