In [31]:
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam


#### Inizializzo un classe che eseguira un architettura transformer composta da  :
* Layer di Multi head attention
* Layer Danse
* Layer Normalization
* Dropout

In [2]:
class TransformerBlock(layers.Layer):
    def __init__ (self, embed_dim, num_heads, ff_dim, rate=0.5):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim = embed_dim)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation = 'relu'), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-8)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-8)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training= training)
        out1= self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training = training)
        return self.layernorm2(out1 + ffn_output)

#### Inizializzo una classe di Embedding che definira le correlazioni piu forti fra le parole all'interno delle frasi 

In [3]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim = vocab_size, output_dim= embed_dim)
        self.pos_emb = layers.Embedding(input_dim= maxlen, output_dim = embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions= self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

#### Importo il dataset e eseguo un padding per ottenere la stessa lunghezza per tutte le features 

In [4]:
vocab_size = 20000
maxlen = 200 
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=vocab_size)
print(len(x_train))
print(len(x_test))

x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

25000
25000


#### Scrivo l'architettura del modello inserendo anche le funzioni create prima lo compilo e lo addestro

In [5]:
embed_dim = 32
num_heads = 8
ff_dim = 16 

inputs = layers.Input(shape=(maxlen,))
x = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)(inputs)
x = TransformerBlock(embed_dim, num_heads, ff_dim)(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(ff_dim, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(2, activation = 'softmax')(x)

model = keras.Model(inputs=inputs, outputs= outputs)

In [6]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 200)]             0         
                                                                 
 token_and_position_embeddi  (None, 200, 32)           646400    
 ng (TokenAndPositionEmbedd                                      
 ing)                                                            
                                                                 
 transformer_block (Transfo  (None, 200, 32)           34768     
 rmerBlock)                                                      
                                                                 
 global_average_pooling1d (  (None, 32)                0         
 GlobalAveragePooling1D)                                         
                                                                 
 dropout_2 (Dropout)         (None, 32)                0     

In [22]:
model.compile(optimizer = Adam(learning_rate=0.001) , loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [28]:
history = model.fit(
    x_train,
    y_train,
    batch_size = 500,
    epochs= 5,
    validation_data = (x_test, y_test)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### Vedo i risultati dell'addestramento 

In [30]:
model.evaluate(x_test, y_test)



[0.47808706760406494, 0.861519992351532]