Train a Sentiment Analysis Model Using BERT

In [None]:
# Install & Import the Necessary Libraries
from transformers import BertTokenizer
import torch

# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
# Tokenize Your Sentences
sentences = ["The website is super slow today!", 
             "Great experience, everything is smooth.", 
             "I'm unable to access the website—error 500.", 
             "Fast loading times and great UI!", 
             "Pages keep crashing randomly."]

# Convert sentences to tokenized input
tokenized_inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt")

# Extract input_ids and attention_mask for the model
X_input_ids = tokenized_inputs["input_ids"]
X_attention_mask = tokenized_inputs["attention_mask"]

print("Tokenized Inputs Shape:", X_input_ids.shape)  # Check the shape

Tokenized Inputs Shape: torch.Size([5, 14])


In [None]:
pip install transformers datasets torch scikit-learn accelerate

Note: you may need to restart the kernel to use updated packages.


Load Pretrained BERT Tokenizer

In [None]:
from transformers import BertTokenizer

# Load the tokenizer for BERT
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")


Prepare the Dataset

In [None]:
from datasets import Dataset

# Sample data (Replace with real data)
texts = ["I love this movie!", "This was the worst experience ever.", "Amazing performance!", "Terrible acting."]
labels = [1, 0, 1, 0]  # 1 = Positive, 0 = Negative

# Tokenize text
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Create Hugging Face dataset
dataset = Dataset.from_dict({"text": texts, "label": labels})
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Split into train & test
split = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = split["train"]
val_dataset = split["test"]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Load BERT Model for Classification

In [None]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train the Model

Define Training Arguments

In [None]:
pip install "accelerate>=0.26.0"

Note: you may need to restart the kernel to use updated packages.


In [None]:
# Import required libraries
from transformers import TrainingArguments

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",  # Use `eval_strategy` instead of `evaluation_strategy`
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
)

# Print to confirm it works
print("Training arguments initialized successfully!")

Training arguments initialized successfully!


 Evaluate the Model

In [None]:
# Define Your Trainer
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,  # Make sure this dataset is defined
    eval_dataset=val_dataset,  # Ensure your validation dataset exists
)


In [None]:
# Start Fine-Tuning
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,0.87424
2,No log,0.825071
3,No log,0.786199


TrainOutput(global_step=3, training_loss=0.5284990469614664, metrics={'train_runtime': 79.6727, 'train_samples_per_second': 0.113, 'train_steps_per_second': 0.038, 'total_flos': 2367999498240.0, 'train_loss': 0.5284990469614664, 'epoch': 3.0})

In [None]:
eval_results = trainer.evaluate()
print(f"Final Evaluation Results: {eval_results}")

Final Evaluation Results: {'eval_loss': 0.7861990928649902, 'eval_runtime': 1.4676, 'eval_samples_per_second': 0.681, 'eval_steps_per_second': 0.681, 'epoch': 3.0}


Make Predictions

In [None]:
from transformers import pipeline

sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
test_sentence = "I love this product! It's amazing."
result = sentiment_pipeline(test_sentence)
print(result)


Device set to use cpu


[{'label': 'LABEL_0', 'score': 0.754209578037262}]


Label Mapping

In [None]:
sentences = ["This is amazing!", "I hate this!", "It's okay, not great.", "Absolutely love it!"]

for sentence in sentences:
    prediction = sentiment_pipeline(sentence)
    print(f"Text: {sentence} -> Prediction: {prediction}")


Text: This is amazing! -> Prediction: [{'label': 'POSITIVE', 'score': 0.9998769760131836}]
Text: I hate this! -> Prediction: [{'label': 'NEGATIVE', 'score': 0.9995765089988708}]
Text: It's okay, not great. -> Prediction: [{'label': 'NEGATIVE', 'score': 0.9766001105308533}]
Text: Absolutely love it! -> Prediction: [{'label': 'POSITIVE', 'score': 0.9998786449432373}]


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Example data: Sentiment label mapping
labels = ['Negative', 'Positive']
counts = [30, 70]  # Example counts of negative and positive feedback

# Create bar plot
plt.figure(figsize=(6, 4))
sns.barplot(x=labels, y=counts, palette=['red', 'green'])

# Add labels
plt.xlabel("Sentiment")
plt.ylabel("Count")
plt.title("Sentiment Label Mapping")
plt.show()

In [None]:
x = input("Hello world : ")  # Script waits here