In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

import tensorflow as tf


In [2]:
# Read data from dataset IMDB
imdbreviewdata = pd.read_csv('/content/drive/MyDrive/dataset/IMDB Dataset.csv')

In [3]:
# See the first 2 rows
imdbreviewdata.head(2)

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive


In [4]:
imdbreviewdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   review     50000 non-null  object
 1   sentiment  50000 non-null  object
dtypes: object(2)
memory usage: 781.4+ KB


In [5]:
imdbreviewdata.review[0] # Display the first review from the dataset

"One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fa

In [6]:
# As we see '<br />' in the above text, we have to replace those string
imdbreviewdata.review = imdbreviewdata.review.str.lower().replace('<br />','',regex=True)
imdbreviewdata.review[0]

"one of the other reviewers has mentioned that after watching just 1 oz episode you'll be hooked. they are right, as this is exactly what happened with me.the first thing that struck me about oz was its brutality and unflinching scenes of violence, which set in right from the word go. trust me, this is not a show for the faint hearted or timid. this show pulls no punches with regards to drugs, sex or violence. its is hardcore, in the classic use of the word.it is called oz as that is the nickname given to the oswald maximum security state penitentary. it focuses mainly on emerald city, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. em city is home to many..aryans, muslims, gangstas, latinos, christians, italians, irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.i would say the main appeal of the show is due to the fact that it goes where other shows wo

In [7]:
# Now we have to tokenize the reviews
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

num_of_words = 10000 # Max num of words to be used
max_len_sequence = 50

tokenizer = Tokenizer(num_words = num_of_words)
tokenizer.fit_on_texts(imdbreviewdata.review)

# Now change texts to sequence
sequences = tokenizer.texts_to_sequences(imdbreviewdata.review)

# Now pad the sequence
padded_sequences = pad_sequences(sequences, maxlen = max_len_sequence)

# Now define the X, y resp
X = padded_sequences
y = pd.get_dummies(imdbreviewdata.sentiment)

print(f'The shape of X is {X.shape}')
print(f'The shape of y is {y.shape}')

The shape of X is (50000, 50)
The shape of y is (50000, 2)


In [8]:
#See one value of y
y.iloc[0]

Unnamed: 0,0
negative,False
positive,True


In [9]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
from tensorflow.keras.utils import plot_model

# Transformer Encoder block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout):
    #MultiHead Attention
    attention_output = MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(inputs, inputs)
    """We should create my own MultiHeadAttention funtion to get better understanding"""

    # Add & Norm
    attention_output = LayerNormalization(epsilon=1e-6)(inputs + attention_output)

    # Feed Forwrad Block
    ff_output = Dense(ff_dim, activation="relu")(attention_output)
    ff_output = Dense(inputs.shape[-1])(ff_output)

    # Add & Norm
    ff_output = Dropout(dropout)(ff_output)
    ff_output = LayerNormalization(epsilon=1e-6)(attention_output + ff_output)

    return ff_output

# Build the complete model
def build_transformer_model(max_features,
                            max_len,
                            embed_dim=128,
                            num_heads=2,
                            feed_forward_dim=256,
                            n_transformer_block=2,
                            dropout=0.1):
    inputs = Input(shape=(max_len,))
    embedding = Embedding(input_dim=max_features, output_dim=embed_dim)(inputs)
    x = embedding

    # Transformer block
    for _ in range(n_transformer_block):
        x = transformer_encoder(x,
                                head_size=embed_dim // num_heads,
                                num_heads=num_heads,
                                ff_dim=feed_forward_dim,
                                dropout=dropout)

    # Global Pooling
    x = GlobalAveragePooling1D()(x)

    # Final Dense Layer for classification
    outputs = Dense(2, activation="softmax")(x)

    # Create the model
    model = Model(inputs=inputs, outputs=outputs)

    return model


# Create and compile the Model
max_features = 10000  # Vocabulary size
max_len = 50  # Maximum sequence length

transformer_model = build_transformer_model(
    max_features=max_features,
    max_len=max_len,
    embed_dim=128,
    num_heads=4,
    feed_forward_dim=256,
    n_transformer_block=2,
    dropout=0.2)
transformer_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
transformer_model.summary()

In [10]:
# Plot the model
#plot_model(model=transformer_model, show_shapes=True, show_layer_names=True)

In [15]:
# Train the model
history = transformer_model.fit(X, y, epochs=5, batch_size=64)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 8ms/step - accuracy: 0.9024 - loss: 0.2229
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.9101 - loss: 0.2111
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.9232 - loss: 0.1844
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.9332 - loss: 0.1624
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.9435 - loss: 0.1430


In [16]:
testreview = ["I have seen 2-3 movies but this one is the one of the best. Very good acting and thanks to director for making this type of movie"]
testreview = tokenizer.texts_to_sequences(testreview)
testreview = pad_sequences(testreview, maxlen=50)

sentiment = transformer_model.predict(testreview)
print("Sentiment: ", sentiment)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Sentiment:  [[0.07554209 0.9244579 ]]
