In [None]:
#Import Libraries
import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import DistilBertTokenizer, TFDistilBertModel,DistilBertConfig
import numpy as np





In [None]:

# Load and Preprocess IMDB Dataset
# Load a smaller subset for speed
train_data, test_data = tfds.load(
    'imdb_reviews',
    split=['train[:1000]', 'test[-200:]'],
    as_supervised=True
)

# Convert to Python lists
train_texts, train_labels = [], []
for text, label in train_data:
    train_texts.append(text.numpy().decode('utf-8'))
    train_labels.append(label.numpy())

test_texts, test_labels = [], []
for text, label in test_data:
    test_texts.append(text.numpy().decode('utf-8'))
    test_labels.append(label.numpy())


In [None]:
#Tokenize Text
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

def tokenize_texts(texts):
    return tokenizer(
        texts,
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors='tf'
    )

train_encodings = tokenize_texts(train_texts)
test_encodings = tokenize_texts(test_texts)


In [None]:
#Prepare TensorFlow Datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': train_encodings['input_ids'],
        'attention_mask': train_encodings['attention_mask']
    },
    train_labels
)).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': test_encodings['input_ids'],
        'attention_mask': test_encodings['attention_mask']
    },
    test_labels
)).batch(16)


In [None]:

#Build the Transfer Learning Model


# Custom layer that wraps DistilBERT
class DistilBertLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(DistilBertLayer, self).__init__(**kwargs)
        self.bert = TFDistilBertModel.from_pretrained('distilbert-base-uncased')

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.last_hidden_state[:, 0, :]  # CLS token

# Define input layers
input_ids = tf.keras.Input(shape=(128,), dtype=tf.int32, name='input_ids')
attention_mask = tf.keras.Input(shape=(128,), dtype=tf.int32, name='attention_mask')
inputs = {'input_ids': input_ids, 'attention_mask': attention_mask}

# BERT encoder
cls_output = DistilBertLayer()(inputs)

# Classification head
x = tf.keras.layers.Dense(64, activation='relu')(cls_output)
x = tf.keras.layers.Dropout(0.3)(x)
output = tf.keras.layers.Dense(2, activation='softmax')(x)

# Define the final model
model = tf.keras.Model(inputs=inputs, outputs=output)



Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [None]:
#Compile the Model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
)

model.summary()


In [None]:
#Train the Model
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=3
)


Epoch 1/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 4s/step - loss: 0.7275 - sparse_categorical_accuracy: 0.5240 - val_loss: 0.6753 - val_sparse_categorical_accuracy: 0.5550
Epoch 2/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 4s/step - loss: 0.6826 - sparse_categorical_accuracy: 0.5704 - val_loss: 0.6530 - val_sparse_categorical_accuracy: 0.6250
Epoch 3/3
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 4s/step - loss: 0.6621 - sparse_categorical_accuracy: 0.5774 - val_loss: 0.6374 - val_sparse_categorical_accuracy: 0.6400


In [None]:
# Evaluate the Model
results = model.evaluate(test_dataset)
print(f"Test Loss: {results[0]:.4f}, Test Accuracy: {results[1]:.4f}")


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 3s/step - loss: 0.6437 - sparse_categorical_accuracy: 0.6318
Test Loss: 0.6374, Test Accuracy: 0.6400


In [None]:
#Predict Sentiment

def predict_sentiment(text, model, tokenizer):
    """
    Predict sentiment for a given text.

    Args:
        text (str): The input text to analyze
        model: Your trained Keras model
        tokenizer: The DistilBERT tokenizer

    Returns:
        tuple: (predicted sentiment, confidence score)
    """
    # Tokenize the text
    inputs = tokenizer(
        text,
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors='tf'
    )

    # Make prediction
    outputs = model.predict({'input_ids': inputs['input_ids'],
                           'attention_mask': inputs['attention_mask']})

    # Get probabilities and predicted class
    probs = outputs[0]
    predicted_class = np.argmax(probs)
    confidence = np.max(probs)

    # Convert to human-readable labels
    sentiment = "positive" if predicted_class == 1 else "negative"

    return sentiment, float(confidence)

# Example usage
sample_text = "This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout."
sentiment, confidence = predict_sentiment(sample_text, model, tokenizer)
print(f"Sentiment: {sentiment} (Confidence: {confidence:.2f})")

sample_text2 = "I hated this film. It was boring and the characters were poorly developed."
sentiment2, confidence2 = predict_sentiment(sample_text2, model, tokenizer)
print(f"Sentiment: {sentiment2} (Confidence: {confidence2:.2f})")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
Sentiment: positive (Confidence: 0.57)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 267ms/step
Sentiment: negative (Confidence: 0.68)
