In [None]:
try:
    # %tensorflow_version only exists in Colab.
    %tensorflow_version 2.x
except Exception:
    print("tensorflow_version failed, i guess you are not in Colab")

In [None]:
import tensorflow as tf
print(tf.__version__)
import numpy as np

## TF Imdb review

In [None]:
(train_data, train_y), (test_data, test_y) = tf.keras.datasets.imdb.load_data()
word_index = tf.keras.datasets.imdb.get_word_index()

id_to_word = {id_ + 3 : word for word, id_ in word_index.items() }
id_to_word[0], id_to_word[1], id_to_word[2] = "<pad>", "<sos>", "<unk>"

In [None]:
for i in range(100):
  print(" ".join(map(lambda x:id_to_word[x], train_data[i])))
  len(word_index)

In [None]:
repr(train_data[0])

# Un jeu de donnée qui ne nécessite pas de pré-processing

In [None]:
import tensorflow_datasets as tfds

(train_data, test_data), info = tfds.load(
    # Use the version pre-encoded with an ~8k vocabulary.
    'imdb_reviews/subwords8k', 
    # Return the train/test datasets as a tuple.
    split = (tfds.Split.TRAIN, tfds.Split.TEST),
    # Return (example, label) pairs from the dataset (instead of a dictionary).
    as_supervised=True,
    # Also return the `info` structure. 
    with_info=True)


In [None]:
encoder = info.features['text'].encoder

In [None]:
BUFFER_SIZE = 1000

train_batches = (
    train_data
    .shuffle(BUFFER_SIZE)
    .padded_batch(32, padded_shapes=([None],tuple()))) ## IMPORTANT !!!!

test_batches = (
    test_data
    .padded_batch(32, padded_shapes=([None],tuple()))) ## IMPORTANT !!!!


## Exemple numéro 1

Un Embedding de 16 composantes, avec un layer de moyenne globale

In [None]:
from tensorflow import keras

model = keras.Sequential([
  keras.layers.Embedding(encoder.vocab_size, 16),
  keras.layers.LSTM(64, return_sequences=True, name= "LSTM_qui_conserve_la_sequence"),
  keras.layers.LSTM(64),
  keras.layers.Dense(1, activation='sigmoid')])

model.summary()

In [None]:
64 * (16 + 1 + 64)

In [None]:
from tensorflow import keras

model = keras.Sequential([
  keras.layers.Embedding(encoder.vocab_size
                         , 16),
  keras.layers.GlobalAveragePooling1D(),
  keras.layers.Dense(1)])

model.summary()

model.compile(optimizer='adam',
              loss=tf.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_batches,
                    epochs=10,
                    validation_data=test_batches,
                    validation_steps=30)


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])



print(model.summary())

model.compile(loss=tf.keras.losses.BinaryCrossentropy(),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

history = model.fit(train_batches,
                    epochs=10,
                    validation_data=test_batches
                    )


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)), ## Pour garder la dimension
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])

model.summary()

# NLP : transfer learning

In [None]:
import tensorflow_hub as hub

embed = hub.load("https://tfhub.dev/google/nnlm-en-dim50-with-normalization/2")
embeddings = embed(["cat is on the mat", "dog is in the fog"])

In [None]:
embeddings