In [36]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,GlobalAveragePooling1D,LayerNormalization,Dropout

In [37]:
#settings
vocab_size = 10000
max_length = 100
embedding_dim = 64
num_heads = 2 
dff = 128
batch_size = 32
epochs = 5

In [38]:
#Step 1: Load and prepare the IMDb data

(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=vocab_size)
X_train = pad_sequences(X_train,maxlen=max_length,padding='post')
X_test = pad_sequences(X_test,maxlen=max_length,padding='post')

In [39]:
X_train[0]

array([1415,   33,    6,   22,   12,  215,   28,   77,   52,    5,   14,
        407,   16,   82,    2,    8,    4,  107,  117, 5952,   15,  256,
          4,    2,    7, 3766,    5,  723,   36,   71,   43,  530,  476,
         26,  400,  317,   46,    7,    4,    2, 1029,   13,  104,   88,
          4,  381,   15,  297,   98,   32, 2071,   56,   26,  141,    6,
        194, 7486,   18,    4,  226,   22,   21,  134,  476,   26,  480,
          5,  144,   30, 5535,   18,   51,   36,   28,  224,   92,   25,
        104,    4,  226,   65,   16,   38, 1334,   88,   12,   16,  283,
          5,   16, 4472,  113,  103,   32,   15,   16, 5345,   19,  178,
         32], dtype=int32)

In [40]:
# Adds position info to word vectors

class PostionalEncoding(tf.keras.layers.Layer):
    def __init__(self,max_length,d_model):
        super(PostionalEncoding,self).__init__()
        self.pos_encoding = self.postional_encoding(max_length,d_model)

    def postional_encoding(self,max_length,d_model):
        pos = tf.range(max_length,dtype=tf.float32)[:,tf.newaxis]
        i = tf.range(d_model,dtype=tf.float32)[tf.newaxis,:]
        angle_rates = 1 / tf.pow(10000,(2 * (i//2) / tf.cast(d_model,tf.float32)))
        angles = pos * angle_rates
        sines = tf.math.sin(angles[:,0::2])
        cosines = tf.math.cos(angles[:,1::2])
        pos_enc = tf.concat([sines,cosines],axis=1)
        return pos_enc[tf.newaxis,...]
    
    def call(self,x):
        seq_len = tf.shape(x)[1]
        return x + self.pos_encoding[:,:seq_len,:]

# The Transformer encoder layer
class TransformerEncoderLayer(tf.keras.layers.Layer):
    def __init__(self,num_heads,d_model,dff,rate=0.1):
        super(TransformerEncoderLayer,self).__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(num_heads=num_heads,key_dim=d_model // num_heads)
        self.ffn = tf.keras.Sequential([
            Dense(dff,activation='relu'),
            Dense(d_model)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self,x,training):
        attn_output = self.mha(x,x,x)
        attn_output = self.dropout1(attn_output,training=training)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output,training=training)
        return self.layernorm2(out1+ffn_output)


In [41]:
#Build the model

model = Sequential([
    tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
    PostionalEncoding(max_length,embedding_dim),
    TransformerEncoderLayer(num_heads,embedding_dim,dff),
    GlobalAveragePooling1D(),
    Dense(1,activation='sigmoid')
])

In [42]:
# Step 3: Compile and train

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.fit(X_train,y_train,epochs=epochs,batch_size=batch_size,validation_split=0.2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f0765f43be0>

In [46]:
# NEW: Prediction function for a custom sentence
def predict_sentiment(sentence, model, max_length, vocab_size):
    # Get the IMDb word index
    word_index = imdb.get_word_index()
    
    # Preprocess the sentence
    # Convert to lowercase and split into words
    words = sentence.lower().split()
    
    # Map words to their indices (unknown words get index 0)
    sequence = [word_index.get(word, 0) for word in words if word_index.get(word, 0) < vocab_size]
    
    # Pad the sequence to match model's input length
    padded_sequence = pad_sequences([sequence], maxlen=max_length, padding='post')
    
    # Predict
    pred_prob = model.predict(padded_sequence, verbose=0)[0][0]  # Probability output
    pred_label = 1 if pred_prob > 0.5 else 0  # Threshold at 0.5
    return 'Positive' if pred_label == 1 else 'Negative', pred_prob

# Example usage
custom_sentence = "very great experience!"
sentiment, probability = predict_sentiment(custom_sentence, model, max_length, vocab_size)
print(f"Sentence: '{custom_sentence}'")
print(f"Predicted sentiment: {sentiment} (Probability: {probability:.4f})")

# # Another example
# another_sentence = "This was a bad experience."
# sentiment, probability = predict_sentiment(another_sentence, model, max_length, vocab_size)
# print(f"Sentence: '{another_sentence}'")
# print(f"Predicted sentiment: {sentiment} (Probability: {probability:.4f})")

Sentence: 'very great experience!'
Predicted sentiment: Negative (Probability: 0.0340)
