In [1]:
# Install required libraries (run only once in Colab)
# !pip install transformers

import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import BertTokenizer, TFBertForSequenceClassification

In [2]:
# Small dataset (100 movie reviews with labels: 1 = Positive, 0 = Negative)
reviews = [
    ("This movie was fantastic! I loved every moment.", 1),
    ("The storyline was gripping, and the acting was superb.", 1),
    ("I really enjoyed this film, highly recommend!", 1),
    ("An amazing experience! Great acting and direction.", 1),
    ("One of the best movies I've watched recently.", 1),
    ("Absolutely loved the plot and characters.", 1),
    ("A very well-made and touching movie.", 1),
    ("It was a beautiful film, full of emotions.", 1),
    ("Great cinematography and outstanding performances.", 1),
    ("Definitely worth watching, highly entertaining!", 1),
    ("Brilliant script, excellent execution.", 1),
    ("The cast gave a phenomenal performance.", 1),
    ("The direction was flawless.", 1),
    ("One of the most heartwarming films ever.", 1),
    ("A feel-good movie with great life lessons.", 1),
    ("The visuals were breathtaking!", 1),
    ("The soundtrack matched perfectly with the scenes.", 1),
    ("A well-executed emotional rollercoaster.", 1),
    ("The character development was amazing.", 1),
    ("Pure cinematic brilliance.", 1),
    ("Loved the twist at the end.", 1),
    ("Strong performances by the lead actors.", 1),
    ("Keeps you hooked till the end.", 1),
    ("A masterpiece in modern cinema.", 1),
    ("Flawless storytelling and direction.", 1),
    ("An instant classic.", 1),
    ("One of the best films of the year.", 1),
    ("A beautiful journey from start to finish.", 1),
    ("Outstanding from every angle.", 1),
    ("So good I watched it twice!", 1),
    ("Completely exceeded my expectations.", 1),
    ("A story that stays with you.", 1),
    ("Powerful and emotionally resonant.", 1),
    ("Touching and inspiring.", 1),
    ("Loved every single character.", 1),
    ("An unforgettable experience.", 1),
    ("A true work of art.", 1),
    ("Best film I’ve seen in years.", 1),
    ("The movie had soul and depth.", 1),
    ("The chemistry between the actors was perfect.", 1),
    ("Uplifting and beautifully done.", 1),
    ("A genuine feel-good movie.", 1),
    ("Masterfully done from start to end.", 1),
    ("Emotionally powerful and uplifting.", 1),
    ("You won’t be disappointed!", 1),
    ("Really well made, 10/10!", 1),
    ("Highly enjoyable and satisfying.", 1),
    ("Every scene was well thought out.", 1),
    ("Impressive visuals and storytelling.", 1),
    ("The worst movie I have ever seen. Total waste of time.", 0),
    ("Terrible acting, poor script, and bad direction.", 0),
    ("I regret watching this movie. Boring and predictable.", 0),
    ("A complete disaster. Don’t waste your time.", 0),
    ("Horrible experience. Not worth a single penny.", 0),
    ("Weak storyline and unconvincing acting.", 0),
    ("Felt like a waste of time. Very disappointed.", 0),
    ("The movie lacked depth and had no real plot.", 0),
    ("Poorly executed and poorly directed.", 0),
    ("Avoid this movie at all costs.", 0),
    ("Ridiculously bad and not funny at all.", 0),
    ("The plot made no sense whatsoever.", 0),
    ("Utterly boring and lifeless film.", 0),
    ("Fails on all fronts – writing, acting, and direction.", 0),
    ("Such a disappointing experience.", 0),
    ("Painfully slow and uneventful.", 0),
    ("Acting was cringe-worthy.", 0),
    ("Couldn’t finish it, so bad.", 0),
    ("Poor camera work and editing.", 0),
    ("A big letdown, expected more.", 0),
    ("No emotional depth or connection.", 0),
    ("Characters were annoying and flat.", 0),
    ("Just a lazy attempt at filmmaking.", 0),
    ("Terribly cliched and uninspired.", 0),
    ("It was all over the place.", 0),
    ("Story dragged endlessly.", 0),
    ("Predictable from start to finish.", 0),
    ("Worst screenplay ever.", 0),
    ("Nothing exciting happens.", 0),
    ("The jokes were awful.", 0),
    ("Such a mess, felt unfinished.", 0),
    ("Waste of a good cast.", 0),
    ("Zero chemistry between the leads.", 0),
    ("Lack of originality in every frame.", 0),
    ("Fails to create any suspense.", 0),
    ("Monotonous and poorly paced.", 0),
    ("Dialogues were awkward and forced.", 0),
    ("Felt like a school project.", 0),
    ("Too many plot holes to ignore.", 0),
    ("Just didn’t work for me.", 0),
    ("Visually dull and unimaginative.", 0),
    ("The climax was laughably bad.", 0),
    ("Music was irritating, not fitting.", 0),
    ("An insult to good cinema.", 0),
    ("Disjointed scenes and bad direction.", 0),
    ("No creativity whatsoever.", 0),
    ("Couldn’t relate to the story.", 0),
    ("The worst film of the year.", 0),
    ("Wouldn’t recommend to anyone.", 0),
    ("Unbearably long and drawn out.", 0),
    ("Please don’t watch this.", 0),
]




In [3]:
texts, labels = zip(*reviews)
texts = list(texts)
labels = np.array(labels)

In [4]:
# 2. Split the dataset
texts_train, texts_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [5]:
# 3. Tokenization
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokens_train = tokenizer(
    texts_train,
    padding=True,
    truncation=True,
    max_length=128,
    return_tensors='tf'
)
tokens_test = tokenizer(
    texts_test,
    padding=True,
    truncation=True,
    max_length=128,
    return_tensors='tf'
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [6]:
# 4. Load model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
# 5. Compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = ['accuracy']
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [8]:
# 6. Train model
model.fit(
    x={'input_ids': tokens_train['input_ids'], 'attention_mask': tokens_train['attention_mask']},
    y=y_train,
    validation_split=0.1,
    epochs=2,
    batch_size=8
)

Epoch 1/2
Epoch 2/2


<tf_keras.src.callbacks.History at 0x7ed3424da390>

In [9]:
# 7. Predict
preds = model.predict({'input_ids': tokens_test['input_ids'], 'attention_mask': tokens_test['attention_mask']})
y_pred = tf.argmax(preds.logits, axis=1).numpy()



In [10]:
# 8. Evaluate
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Accuracy: 0.9500
Precision: 1.0000
Recall: 0.9167
F1 Score: 0.9565


In [12]:
# Sample review for testing
sample_review = "the story line was boring"

# Tokenize the review
inputs = tokenizer(sample_review, return_tensors="tf", padding=True, truncation=True, max_length=64)

# Predict sentiment
outputs = model(inputs["input_ids"])
prediction = tf.argmax(outputs.logits, axis=1).numpy()[0]

# Map prediction to sentiment
sentiment = "Positive" if prediction == 1 else "Negative"
print(f"Review: {sample_review}")
print(f"Predicted Sentiment: {sentiment}")

Review: the story line was boring
Predicted Sentiment: Negative
