In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Parameters
vocab_size = 10000
maxlen = 200
embed_dim = 32
num_heads = 2
ff_dim = 32

In [3]:
# Load and preprocess data
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


In [4]:
# Input layer
inputs = layers.Input(shape=(maxlen,))

In [5]:
# Embedding + Positional Encoding (simple sum)
embedding_layer = layers.Embedding(vocab_size, embed_dim)(inputs)
positions = tf.range(start=0, limit=maxlen, delta=1)
position_embeddings = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)(positions)
x = embedding_layer + position_embeddings

In [6]:
# Transformer block
attention = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
x = layers.LayerNormalization()(x + attention)
ffn = layers.Dense(ff_dim, activation="relu")(x)
ffn = layers.Dense(embed_dim)(ffn)
x = layers.LayerNormalization()(x + ffn)

In [7]:
# Output layers
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

In [8]:
# Build and compile
model = models.Model(inputs, outputs)
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [9]:
# Train
model.fit(x_train, y_train, epochs=2, batch_size=64, validation_split=0.2)

Epoch 1/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 90ms/step - accuracy: 0.7033 - loss: 0.5450 - val_accuracy: 0.8604 - val_loss: 0.3220
Epoch 2/2
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 81ms/step - accuracy: 0.9145 - loss: 0.2268 - val_accuracy: 0.8640 - val_loss: 0.3327


<keras.src.callbacks.history.History at 0x1cfa3966fc0>

In [10]:
# Evaluate
model.evaluate(x_test, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 15ms/step - accuracy: 0.8504 - loss: 0.3540


[0.36059334874153137, 0.8492000102996826]

In [13]:
from transformers import DistilBertTokenizer

tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [19]:
from transformers import TFDistilBertForSequenceClassification, DistilBertTokenizerFast
import tensorflow as tf
import numpy as np

# Load model and tokenizer
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

# Input text
text = "The product was disgusting!"

# Tokenize input
inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True, max_length=128)

# Get model output
outputs = model(**inputs)
logits = outputs.logits

# Prediction
predicted_class = np.argmax(logits, axis=1)[0]

# Print result
if predicted_class == 1:
    print("Predicted Sentiment: Positive 😊")
else:
    print("Predicted Sentiment: Negative 😞")


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']
You should 

Predicted Sentiment: Negative 😞
