Alku

In [2]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import pad_sequences 

max_features = 10000 # vocabulary size
max_len = 250 # words per sample 

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) 

# pad all samples to same length 
x_train = pad_sequences(x_train, maxlen=max_len, padding='post') 
x_test = pad_sequences(x_test, maxlen=max_len, padding='post')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [3]:
word_to_index = imdb.get_word_index()
index_to_word = dict((value, key) for (key, value) in word_to_index.items())

example_review = " ".join(index_to_word.get(i-3, "?") for i in x_train[0])
print(example_review)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list 

In [4]:
from tensorflow.keras.layers import Layer, Embedding
import tensorflow as tf

class TokenAndPositionEmbedding(Layer):
    def __init__(self, seq_len, vocab_size, emb_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(input_dim=vocab_size, output_dim=emb_dim)
        self.pos_emb = Embedding(input_dim=seq_len, output_dim=emb_dim)

    def call(self, x_input):
        seq_len = tf.shape(x_input)[-1]
        positions = tf.range(start=0, limit=seq_len, delta=1)
        positions = self.pos_emb(positions)
        x_input = self.token_emb(x_input)
        return x_input + positions

In [5]:
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling1D, Dropout 
from tensorflow.keras.models import Model
from tensorflow.keras.layers import MultiHeadAttention 

embed_dim = 32 # word embeddings dimension 
num_heads = 2 # number of attention heads 
key_dim = embed_dim // num_heads # query/key dimension for one head

inputs = Input(shape=(max_len,)) 
x = TokenAndPositionEmbedding(max_len, max_features, embed_dim)(inputs) 
attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x) 
x = GlobalAveragePooling1D()(attention_output) 
x = Dropout(0.5)(x) 
outputs = Dense(1, activation='sigmoid')(x) 
att_model = Model(inputs=inputs, outputs=outputs) 

att_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) 

att_model.summary()




In [6]:
att_model.fit(x_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 9ms/step - accuracy: 0.6478 - loss: 0.5797
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9039 - loss: 0.2496
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9379 - loss: 0.1728
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9564 - loss: 0.1301
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9679 - loss: 0.0972


<keras.src.callbacks.history.History at 0x20600099520>