#### What is sentiment analysis?
Sentiment analysis is a technique used to understand people’s emotions or opinions by analyzing text. It’s like teaching a computer to read reviews, comments, or social media posts and identify whether the text has a positive, negative, or neutral tone. For example, a company might use sentiment analysis to see if customers are happy or frustrated with a new product based on online feedback. This way, it helps companies, researchers, and others get a quick sense of how people feel about a topic or brand.

In [18]:
#load libraries
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [22]:
#the libraries required for implementing bert
from transformers import BertTokenizer, BertModel
import torch

In [24]:
data = pd.read_csv('Clean_sentiment.csv')

In [26]:
data

Unnamed: 0.1,Unnamed: 0,Text,Sentiment,target
0,0,Enjoying a beautiful day at the park! ...,Positive,Positive
1,1,Traffic was terrible this morning. ...,Negative,Negative
2,2,Just finished an amazing workout! 💪 ...,Positive,Positive
3,3,Excited about the upcoming weekend getaway! ...,Positive,Positive
4,4,Trying out a new recipe for dinner tonight. ...,Neutral,Neutral
...,...,...,...,...
727,727,Collaborating on a science project that receiv...,Happy,Positive
728,728,Attending a surprise birthday party organized ...,Happy,Positive
729,729,Successfully fundraising for a school charity ...,Happy,Positive
730,730,"Participating in a multicultural festival, cel...",Happy,Positive


In [28]:
data.drop(columns=['Unnamed: 0'],axis=1,inplace=True)

In [30]:
data

Unnamed: 0,Text,Sentiment,target
0,Enjoying a beautiful day at the park! ...,Positive,Positive
1,Traffic was terrible this morning. ...,Negative,Negative
2,Just finished an amazing workout! 💪 ...,Positive,Positive
3,Excited about the upcoming weekend getaway! ...,Positive,Positive
4,Trying out a new recipe for dinner tonight. ...,Neutral,Neutral
...,...,...,...
727,Collaborating on a science project that receiv...,Happy,Positive
728,Attending a surprise birthday party organized ...,Happy,Positive
729,Successfully fundraising for a school charity ...,Happy,Positive
730,"Participating in a multicultural festival, cel...",Happy,Positive


In [32]:
# Drop unnecessary columns and keep only 'Text' and 'target'
data = data[['Text', 'target']].rename(columns={"Text": "text", "target": "label"})

In [34]:
# Split into train and test sets
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42, stratify=data['label'])


In [36]:
from transformers import BertTokenizer

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the text data
train_encodings = tokenizer(list(train_data['text']), truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(list(test_data['text']), truncation=True, padding=True, max_length=128)


In [38]:
import torch

# Map labels to integers
label_dict = {'Positive': 0, 'Negative': 1, 'Neutral': 2}
train_labels = train_data['label'].map(label_dict).values
test_labels = test_data['label'].map(label_dict).values

# Create a Dataset class for PyTorch
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = SentimentDataset(train_encodings, train_labels)
test_dataset = SentimentDataset(test_encodings, test_labels)


In [40]:
from transformers import BertForSequenceClassification

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [42]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    evaluation_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset
)

trainer.train()





Epoch,Training Loss,Validation Loss
1,No log,0.91098
2,No log,0.648221
3,No log,0.647414


TrainOutput(global_step=222, training_loss=0.7458789584872959, metrics={'train_runtime': 659.1249, 'train_samples_per_second': 2.663, 'train_steps_per_second': 0.337, 'total_flos': 35173433352330.0, 'train_loss': 0.7458789584872959, 'epoch': 3.0})

In [44]:
eval_result = trainer.evaluate()
print(f"Evaluation results: {eval_result}")

Evaluation results: {'eval_loss': 0.6474141478538513, 'eval_runtime': 10.6894, 'eval_samples_per_second': 13.752, 'eval_steps_per_second': 1.777, 'epoch': 3.0}


In [46]:
# Evaluate on the test dataset
predictions = trainer.predict(test_dataset)

# Get the predictions as numpy arrays from the trainer output
preds = torch.argmax(torch.tensor(predictions.predictions), dim=-1)
print(preds)


tensor([0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
        0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 1, 0, 0, 1,
        0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 2, 0, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
        0, 2, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 2, 0, 1, 2, 1, 1, 0, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
        0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 1,
        0, 0, 0])


In [48]:
import torch

def predict_sentiment(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    
    # Put the model in evaluation mode
    model.eval()
    with torch.no_grad():
        # Pass the inputs through the model to get logits
        outputs = model(**inputs)
        logits = outputs.logits
    
    # Get the predicted class (0, 1, or 2) based on the highest logit
    predicted_class = torch.argmax(logits, dim=1).item()
    
    # Map the prediction back to the sentiment label
    label_dict = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}
    predicted_label = label_dict[predicted_class]
    
    return predicted_label
  


In [50]:
# Example input
text = input("Enter a sentence for sentiment prediction: ")
predicted_sentiment = predict_sentiment(text)
print(f"Predicted sentiment: {predicted_sentiment}")


Enter a sentence for sentiment prediction:  hi


Predicted sentiment: Positive


In [52]:
# Save model and tokenizer
model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")


('./saved_model\\tokenizer_config.json',
 './saved_model\\special_tokens_map.json',
 './saved_model\\vocab.txt',
 './saved_model\\added_tokens.json')