In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import nltk
nltk.download('twitter_samples')
from nltk.corpus import twitter_samples
import numpy as np

# Load the Twitter Samples dataset from NLTK
positive_tweets = twitter_samples.strings('positive_tweets.json')
negative_tweets = twitter_samples.strings('negative_tweets.json')

# Combine positive and negative tweets
tweets = positive_tweets + negative_tweets
labels = np.concatenate((np.ones(len(positive_tweets)), np.zeros(len(negative_tweets))), axis=0)

# Set vocabulary size and maximum sequence length
vocab_size = 10000
max_len = 256

# Preprocess the text data
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(tweets)
sequences = tokenizer.texts_to_sequences(tweets)
word_index = tokenizer.word_index
data = pad_sequences(sequences, maxlen=max_len, truncating='post', padding='post')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Define the Transformer model
class TransformerModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, max_len, num_heads, ff_dim, num_blocks, dropout_rate):
        super(TransformerModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.pos_embedding = tf.keras.layers.Embedding(input_dim=max_len, output_dim=embedding_dim)
        self.encoder_blocks = [TransformerEncoderBlock(embedding_dim, num_heads, ff_dim, dropout_rate) for _ in range(num_blocks)]
        self.flatten = tf.keras.layers.Flatten()
        self.dense = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, inputs):
        x = self.embedding(inputs)
        positions = tf.range(start=0, limit=tf.shape(x)[-2], delta=1)
        positions = self.pos_embedding(positions)
        x += positions
        for encoder_block in self.encoder_blocks:
            x = encoder_block(x)
        x = self.flatten(x)
        x = self.dense(x)
        return x

class TransformerEncoderBlock(tf.keras.layers.Layer):
    def __init__(self, embedding_dim, num_heads, ff_dim, dropout_rate):
        super(TransformerEncoderBlock, self).__init__()
        self.att = tf.keras.layers.MultiHeadAttention(num_heads=num_heads, key_dim=embedding_dim)
        self.ffn = tf.keras.Sequential([
            tf.keras.layers.Dense(ff_dim, activation='relu'),
            tf.keras.layers.Dense(embedding_dim)
        ])
        self.layernorm1 = tf.keras.layers.LayerNormalization()
        self.layernorm2 = tf.keras.layers.LayerNormalization()
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)

    def call(self, inputs):
        x = self.layernorm1(inputs)
        attention_output = self.att(x, x)
        attention_output = self.dropout1(attention_output)
        x2 = tf.keras.layers.Add()([inputs, attention_output])
        x = self.layernorm2(x2)
        ffn_output = self.ffn(x)
        ffn_output = self.dropout2(ffn_output)
        x3 = tf.keras.layers.Add()([x2, ffn_output])
        return x3

# Define hyperparameters
embedding_dim = 128
num_heads = 8
ff_dim = 256
num_blocks = 6
dropout_rate = 0.1
batch_size = 32
epochs = 10

# Create an instance of the Transformer model
model = TransformerModel(vocab_size, embedding_dim, max_len, num_heads, ff_dim, num_blocks, dropout_rate)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Convert the labels to numpy arrays
y_train = np.array(y_train)
y_test = np.array(y_test)

# Train the model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))


[nltk_data] Downloading package twitter_samples to /root/nltk_data...
[nltk_data]   Unzipping corpora/twitter_samples.zip.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2870578f10>