In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, Dropout
from tensorflow.keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

In [2]:
devices = tf.config.experimental.list_physical_devices("GPU")
for device in devices:
    tf.config.experimental.set_memory_growth(device=device, enable=True)

In [3]:
dataset = pd.read_csv("IMDB Dataset.csv")
train_dataset, test_dataset = dataset[:40000], dataset[40000:]


X_train, y_train = train_dataset["review"].to_numpy(), train_dataset["sentiment"].to_numpy()
X_test, y_test = test_dataset["review"].to_numpy(), test_dataset["sentiment"].to_numpy()

In [4]:
max_len = 256  # Maximum sequence length
embedding_dim = 64  # Dimension of word embeddings
attention_units = 16  # Attention units for self-attention mechanism
num_classes = 2  # Number of output classes (e.g., positive and negative)

In [5]:
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train_dataset["review"])
vocab_size = len(tokenizer.word_index) + 1

In [6]:
X_train_tokenized = tokenizer.texts_to_sequences(X_train)
X_train_tokenized = tf.keras.preprocessing.sequence.pad_sequences(X_train_tokenized, padding='post', maxlen=max_len)


X_test_tokenized = tokenizer.texts_to_sequences(X_test)
X_test_tokenized = tf.keras.preprocessing.sequence.pad_sequences(X_test_tokenized, padding='post', maxlen=max_len)


encoder = LabelEncoder()

y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

y_train_encoded = y_train_encoded.astype(np.int64)
y_test_encoded = y_test_encoded.astype(np.int64)

In [7]:
def initialize_weights(input_shape, attention_units):
    
    WQ = tf.Variable(initial_value=tf.initializers.glorot_uniform()(shape=(input_shape[-1], attention_units)))
    WK = tf.Variable(initial_value=tf.initializers.glorot_uniform()(shape=(input_shape[-1], attention_units)))
    WV = tf.Variable(initial_value=tf.initializers.glorot_uniform()(shape=(input_shape[-1], attention_units)))
    return WQ, WK, WV

def self_attention(inputs, WQ, WK, WV):
    
    Q = tf.matmul(inputs, WQ)
    K = tf.matmul(inputs, WK)
    V = tf.matmul(inputs, WV)

    d_k = tf.cast(tf.shape(K)[-1], tf.float32)
    attention_weights = tf.nn.softmax(tf.matmul(Q, K, transpose_b=True)/tf.math.sqrt(d_k), axis=-1)
    output = tf.matmul(attention_weights, V)

    return output, attention_weights

def positional_encoding(position, d_model):
    
    angle_rates = 1 / np.power(10000, (2 * (np.arange(d_model)[np.newaxis, :] // 2)) / np.float32(d_model))
    angle_rads = np.arange(position)[:, np.newaxis] * angle_rates

    # Apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])

    # Apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]
    
    return tf.cast(pos_encoding, dtype=tf.float32)


def sentiment_classifier(inputs, vocab_size, max_len, embedding_dim, attention_units, num_classes):
    
    embedding = Embedding(vocab_size, embedding_dim, input_length=max_len)
    positional_encodings = positional_encoding(max_len, embedding_dim)
    WQ, WK, WV = initialize_weights((max_len, embedding_dim), attention_units)
    global_avg_pool = GlobalAveragePooling1D()
    dense1 = Dense(256, activation="relu")
    dropout = Dropout(0.25)
    dense2 = Dense(num_classes, activation='softmax')
    
    
    x = embedding(inputs)
    x *= tf.math.sqrt(tf.cast(tf.shape(x)[-1], tf.float32))
    x += positional_encodings[:, :tf.shape(x)[1], :]
    output, attention_weights = self_attention(x, WQ, WK, WV)
    output = global_avg_pool(output)
    output = dense1(output)
    output = dropout(output)
    output = dense2(output)
    
    return output, attention_weights

In [8]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_tokenized, y_train_encoded)).batch(64)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_tokenized, y_test_encoded)).batch(32)

2024-03-31 11:06:05.477776: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2024-03-31 11:06:05.477795: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 24.00 GB
2024-03-31 11:06:05.477799: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 8.00 GB
2024-03-31 11:06:05.477860: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-03-31 11:06:05.478085: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [15]:
# Define the model
inputs = tf.keras.Input(shape=(max_len,))
outputs, _ = sentiment_classifier(inputs, vocab_size, max_len, embedding_dim, attention_units, num_classes)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

Epoch 1/10


2024-03-31 11:06:48.984558: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-03-31 11:06:49.004085: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:693] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }




2024-03-31 11:07:16.616370: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-03-31 11:07:16.622001: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:693] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "GPU" } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Predictions

In [28]:
inputs = X_train_tokenized[:5]
text = X_train[:5]
true_values = y_train[:5]

predicted = model.predict(inputs)[:,-1]

labels = ['Positive' if i == 1 else 'Negative' for i in predicted]

for i in range(len(labels)):
    print(text[i])
    print(' ')
    print(true_values[i])
    print(labels[i])
    print(' ')
    print(' ------------------------------------------------------------------------')

One of the other reviewers has mentioned that after watching just 1 Oz episode you'll be hooked. They are right, as this is exactly what happened with me.<br /><br />The first thing that struck me about Oz was its brutality and unflinching scenes of violence, which set in right from the word GO. Trust me, this is not a show for the faint hearted or timid. This show pulls no punches with regards to drugs, sex or violence. Its is hardcore, in the classic use of the word.<br /><br />It is called OZ as that is the nickname given to the Oswald Maximum Security State Penitentary. It focuses mainly on Emerald City, an experimental section of the prison where all the cells have glass fronts and face inwards, so privacy is not high on the agenda. Em City is home to many..Aryans, Muslims, gangstas, Latinos, Christians, Italians, Irish and more....so scuffles, death stares, dodgy dealings and shady agreements are never far away.<br /><br />I would say the main appeal of the show is due to the fac