#### Utilizzo di **TensorBoard** per la visualizzazione degli embedding addestrati su un particolare problema.

In [1]:
import os
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorboard.plugins import projector

#### Importo un dataset (Composto da commenti) da tensorflow dataset, mescolo i dati e applico un padding 

In [2]:
(train_data, test_data), info = tfds.load(
    "imdb_reviews/subwords8k",
    split=(tfds.Split.TRAIN, tfds.Split.TEST),
    with_info=True,
    as_supervised=True,
)
encoder = info.features["text"].encoder

train_batches = train_data.shuffle(1000).padded_batch(10, padded_shapes=((None,), ()))
test_batches = test_data.shuffle(1000).padded_batch(10, padded_shapes=((None,), ()))
train_batch, train_labels = next(iter(train_batches))



[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\claud\tensorflow_datasets\imdb_reviews\subwords8k\1.0.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\claud\tensorflow_datasets\imdb_reviews\subwords8k\1.0.0.incomplete4SQZ4U\imdb_reviews-train…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\claud\tensorflow_datasets\imdb_reviews\subwords8k\1.0.0.incomplete4SQZ4U\imdb_reviews-test.…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling C:\Users\claud\tensorflow_datasets\imdb_reviews\subwords8k\1.0.0.incomplete4SQZ4U\imdb_reviews-unsup…



[1mDataset imdb_reviews downloaded and prepared to C:\Users\claud\tensorflow_datasets\imdb_reviews\subwords8k\1.0.0. Subsequent calls will reuse this data.[0m


In [3]:
print('Vocabolario:', encoder.vocab_size, 'termini')

Vocabolario: 8185 termini


#### Scrivo l'architettura e compilo il modello (l'obiettivo del modello sará di riconoscere quali sono i commenti negativi all'interno dell dataset )

In [4]:
embedding_dim = 16
embedding = tf.keras.layers.Embedding(encoder.vocab_size, embedding_dim)

model = tf.keras.Sequential(
    [
        embedding, 

        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1),
    ]
)

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 16)          130960    
                                                                 
 global_average_pooling1d (  (None, 16)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dropout (Dropout)           (None, 16)                0         
                                                                 
 dense_1 (Dense)             (None, 16)                272       
                                                                 
 dropout_1 (Dropout)         (None, 16)                0         
                                                        

#### Addestro il modello

In [5]:
history = model.fit(train_batches, epochs=10, validation_data=test_batches, validation_steps=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


* Configuro la cartella per i log
* Salvo le parole del vocabolario che sono state utilizzate nel layer di Embedding nella variabile subwords e quelle non presenti le
  sostituisco con 'unknow'
* Salvo i pesi associati ai vettori del layer di embedding
* Creo un chack-point dagli Embedding addestrati
* Visualizzazione tramite Projector in TensorBoard degli Embedding

In [6]:
# Configurazione della cartella per i log
log_dir='logs/imdb_example/'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

with open(os.path.join(log_dir, 'metadata.tsv'), "w", encoding="utf-8") as f:
    for subwords in encoder.subwords:
        f.write("{}\n".format(subwords))
    for unknown in range(1, encoder.vocab_size - len(encoder.subwords)):
        f.write("unknow #{}\n".format(unknown))

weights = tf.Variable(model.layers[0].get_weights()[0][1:])

checkpoint = tf.train.Checkpoint(embedding=weights)
checkpoint.save(os.path.join(log_dir, "embedding.ckpt"))

config = projector.ProjectorConfig()
embedding = config.embeddings.add()

embedding.tensor_name = "embedding/.ATTRIBUTES/VARIABLE_VALUE"
embedding.metadata_path = 'metadata.tsv'
projector.visualize_embeddings(log_dir, config)

eseguito lo script, in una shell avviare
Tensorboard con il seguente comando
nella stessa cartella in cui è salvato
questo jupyter notebook
```
(venv) C:\....\tensorboard --logdir logs/imdb_example