# MultiClass Text-Sentiment-Analysis using RoBERTa

In [47]:
!pip install transformers[torch] datasets comet_ml tensorboard evaluate --upgrade --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m81.9/84.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h

## 1. Download and Load the dataset

In [3]:
!kaggle datasets download -d yasserh/twitter-tweets-sentiment-dataset
!kaggle datasets download -d tirendazacademy/fifa-world-cup-2022-tweets
!unzip twitter-tweets-sentiment-dataset.zip
!unzip fifa-world-cup-2022-tweets.zip

Dataset URL: https://www.kaggle.com/datasets/yasserh/twitter-tweets-sentiment-dataset
License(s): CC0-1.0
twitter-tweets-sentiment-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)
Dataset URL: https://www.kaggle.com/datasets/tirendazacademy/fifa-world-cup-2022-tweets
License(s): CC0-1.0
fifa-world-cup-2022-tweets.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  twitter-tweets-sentiment-dataset.zip
replace Tweets.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Tweets.csv              
Archive:  fifa-world-cup-2022-tweets.zip
replace fifa_world_cup_2022_tweets.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: fifa_world_cup_2022_tweets.csv  


In [4]:
import pandas as pd
import numpy as np


df_1 = pd.read_csv("Tweets.csv")
df_2 = pd.read_csv("fifa_world_cup_2022_tweets.csv")

df_1.sample(5)

Unnamed: 0,textID,text,selected_text,sentiment
21008,8d271d16fb,u mean VIP this time ? coz of the KBS thingy ...,u mean VIP this time ? coz of the KBS thingy ?...,neutral
15008,3016c37350,: ''UB40-'Bring Me Your Cup''' ? http://blip....,: ''UB40-'Bring Me Your Cup''' ? http://blip....,neutral
27124,3c6d0e8b6a,just fed three very hungry stray kitties three...,i hope they stay warm and safe tonight,positive
2302,7dc906d5a1,"Omg, watching Hollie Steel`s crying is so so p...",painful,negative
4254,f21bc918f8,notthebest weekend....but oh well....I have a ...,I have a right to be sad,negative


In [5]:
df_1 = df_1.dropna()
df_1.isnull().sum()

Unnamed: 0,0
textID,0
text,0
selected_text,0
sentiment,0


In [6]:
# Rename the columns 'Tweet' to 'text' and 'Sentiment' to 'sentiment'
df_2 = df_2.rename(columns={'Tweet': 'text', 'Sentiment': 'sentiment'})
df_2

Unnamed: 0.1,Unnamed: 0,Date Created,Number of Likes,Source of Tweet,text,sentiment
0,0,2022-11-20 23:59:21+00:00,4,Twitter Web App,What are we drinking today @TucanTribe \n@MadB...,neutral
1,1,2022-11-20 23:59:01+00:00,3,Twitter for iPhone,Amazing @CanadaSoccerEN #WorldCup2022 launch ...,positive
2,2,2022-11-20 23:58:41+00:00,1,Twitter for iPhone,Worth reading while watching #WorldCup2022 htt...,positive
3,3,2022-11-20 23:58:33+00:00,1,Twitter Web App,Golden Maknae shinning bright\n\nhttps://t.co/...,positive
4,4,2022-11-20 23:58:28+00:00,0,Twitter for Android,"If the BBC cares so much about human rights, h...",negative
...,...,...,...,...,...,...
22519,22519,2022-11-20 00:00:21+00:00,1,Twitter Web App,Here We go World cup 2022 #WorldCup2022,positive
22520,22520,2022-11-20 00:00:03+00:00,0,DenetPro,Anderlecht confirms former Viborg FF's Jesper ...,neutral
22521,22521,2022-11-20 00:00:01+00:00,2,Twitter for iPhone,Great thread to read before the start of #Worl...,positive
22522,22522,2022-11-20 00:00:00+00:00,11,Twitter Web App,Raphinha wants Brazil to be united at the #Wor...,positive


In [12]:
# Selecting only the 'text' and 'sentiment' columns from both DataFrames
df_1_limited = df_1[['text', 'sentiment']]
df_2_limited = df_2[['text', 'sentiment']]

# Concatenating the two DataFrames row-wise
df_combined = pd.concat([df_1_limited, df_2_limited], ignore_index=True)
df = df_combined.sample(frac=1, random_state=42).reset_index(drop=True)

df

Unnamed: 0,text,sentiment
0,Just got home from another amazing night,positive
1,: Am getting upset listening to the say now I want to speak to you Mitchel but my darn phone got no money on x,negative
2,You can dislike BTS but don’t lie and just accept that the best song of this year’s #WorldCup2022 is #DreamersbyJungkook Jungkookie we 💜 you,positive
3,I think I could game for Qatar.\n\nI would say I’m fully expecting Qatar to be awarded a penalty too but they do need to get in the Ecuador box for that to happen. \n\n#WorldCup2022,neutral
4,"is done painting all the bedroom furniture, I still have to do the table but it will wait until after the move. Uggh moving in the heat",negative
...,...,...
49999,i wish paramore would come to ireland,neutral
50000,I got Serbia in the sweepstake #WorldCup2022 https://t.co/TYsZyqWRNu,neutral
50001,Vive la France but wouldn’t mind Messi taking it. #WorldCup2022,positive
50002,http://twitpic.com/4jken - fire and urban at rock challenge,neutral


In [8]:
df['text'][0]

'Just got home from another amazing night'

## 2. Text Pre-Processing

- Cleaning up the text data by removing punctuation, extra spaces, and numbers.
- Transform sentences into individual words. Remove some common words that could be in each sentiment like `worldcup` or `#worldcup`.

In [32]:
import re

# Precompile regular expressions for faster preprocessing
non_word_chars_pattern = re.compile(r"[^\w\s\*]")  # Exclude the '*' symbol
whitespace_pattern = re.compile(r"\s+")
digits_pattern = re.compile(r"\d")
username_pattern = re.compile(r"@\S+")
hashtags_pattern = re.compile(r"#(\w+)")
html_url_pattern = re.compile(r'<.*?>|http\S+')
contractions_pattern = re.compile(r"\b(can't|won't|n't|'re|'s|'d|'ll|'t|'ve|'m)\b")

# Expand common contractions
contractions_dict = {
    "can't": "cannot", "won't": "will not", "n't": "not", "'re": "are", "'s": "is",
    "'d": "would", "'ll": "will", "'t": "not", "'ve": "have", "'m": "am"
}

# Remove substrings of words containing fifa|worldcup|qatar|football
specific_words_pattern = re.compile(r"\b\w*(worldcup|fifa|qatar|football|ecuador)\w*\b", re.IGNORECASE)

def expand_contractions(text):
    return contractions_pattern.sub(lambda x: contractions_dict.get(x.group()), text)

def preprocess_text(text):
    # Remove HTML tags and URLs
    text = html_url_pattern.sub('', text)
    # Lowercase text
    text = text.lower()
    # Expand contractions
    text = expand_contractions(text)
    # Remove specific words
    text = specific_words_pattern.sub('', text)
    # Remove hashtags but retain the word
    text = hashtags_pattern.sub(r'\1', text)
    # Remove usernames
    text = username_pattern.sub('', text)
    # Remove non-word characters except '*' symbol
    text = non_word_chars_pattern.sub(' ', text)
    # Replace whitespaces with a single space
    text = whitespace_pattern.sub(' ', text)
    # Remove digits
    text = digits_pattern.sub('', text)

    return text.strip()

In [33]:
pd.set_option('display.max_colwidth', 200)

# Replaces the null values in the data with an empty string
df = df.where((pd.notnull(df)),'')

# Apply preprocessing function to your text column
df['cleaned_text'] = df['text'].apply(preprocess_text)
df = df[['text', 'cleaned_text', 'sentiment']]
df.sample(10)

Unnamed: 0,text,cleaned_text,sentiment
10067,wishing I was attending #beatweetup I even bought a badge,wishing i was attending beatweetup i even bought a badge,1
18121,"Morning! If I get to see it, I`ll let you know. Right now, I`m going to go see Wolverine.",morning if i get to see it i ll let you know right now i m going to go see wolverine,1
31499,stellar! You 2 look great,stellar you look great,2
16068,What A Goal 🙆🏾\n\n#WorldCup2022,what a goal,2
12917,i feel like dyeing right now...,i feel like dyeing right now,0
4211,The day is here. 🤩🤩\nThe FIFA World cup begins todaayyy! Suiiiiii! \n#FIFAWorldCup #WorldCup2022,the day is here the world cup begins todaayyy suiiiiii,2
31359,Usiworry tena my nakuru people cheers lodge is there for us all karibuni as we kick off #WorldCup2022 @NAlahaji,usiworry tena my nakuru people cheers lodge is there for us all karibuni as we kick off,2
22862,cleaning and packing,cleaning and packing,1
39155,#FIFAWorldCup2022 Was that Really an Offside.\n#WorldCup2022,was that really an offside,1
28389,2022 FIFA World Cup Live Streams Online\n\n#Qatar2022\n#FIFAWorldCup\n#WorldCup2022\n#QatarvsEcuador \n\n🔴LIVE Tv📺 https://t.co/IUHOMr595k\n\n🆚Qatar vs Ecuador \n🆚FIFA World Cup 2022\n🆚FIFA World ...,world cup live streams online live tv vs world cup world cup rty ryrtyrt,1


In [34]:
# Remove rows where 'cleaned_text' is empty or contains only whitespace
df = df[df['cleaned_text'].str.strip() != '']
df.shape

(50001, 3)

In [35]:
df['cleaned_text'][0], df['sentiment'][0]

('just got home from another amazing night', 2)

## 3. Mapping `sentiment` column to numeric values

In [36]:
df['sentiment'] = df['sentiment'].replace({'positive':2, 'neutral': 1, 'negative': 0})
df.head(10)

Unnamed: 0,text,cleaned_text,sentiment
0,Just got home from another amazing night,just got home from another amazing night,2
1,: Am getting upset listening to the say now I want to speak to you Mitchel but my darn phone got no money on x,am getting upset listening to the say now i want to speak to you mitchel but my darn phone got no money on x,0
2,You can dislike BTS but don’t lie and just accept that the best song of this year’s #WorldCup2022 is #DreamersbyJungkook Jungkookie we 💜 you,you can dislike bts but don t lie and just accept that the best song of this year s is dreamersbyjungkook jungkookie we you,2
3,I think I could game for Qatar.\n\nI would say I’m fully expecting Qatar to be awarded a penalty too but they do need to get in the Ecuador box for that to happen. \n\n#WorldCup2022,i think i could game for i would say i m fully expecting to be awarded a penalty too but they do need to get in the box for that to happen,1
4,"is done painting all the bedroom furniture, I still have to do the table but it will wait until after the move. Uggh moving in the heat",is done painting all the bedroom furniture i still have to do the table but it will wait until after the move uggh moving in the heat,0
5,Having one of my bad days....Migraine today. My 1st since my neck surgery about 2 1/2 months,having one of my bad days migraine today my st since my neck surgery about months,0
6,Yes!!!! Go #ECU Should really be 0-2 but VAR my arse!#WorldCup2022,yes go ecu should really be but var my arse,2
7,that sux but mayb 4 the btr u nvr know,that sux but mayb the btr u nvr know,1
8,Hey #YEG !!!!! Anyone goin to the Edmonton Energy game and wanna do some live updates? PLEASE!!!! They dont post live scores,hey yeg anyone goin to the edmonton energy game and wanna do some live updates please they dont post live scores,1
9,The Doha they are not going to show you.... #WorldCup2022\nhttps://t.co/zwZyTKH78W,the doha they are not going to show you,0


## 4. Spliiting datasets into train and test

In [37]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['cleaned_text'],
                                                    df['sentiment'],
                                                    test_size=0.2,
                                                    random_state=42)

len(X_train), len(X_test)

(40000, 10001)

In [38]:
X_train, X_test, y_train, y_test = list(X_train), list(X_test), list(y_train), list(y_test)
X_train[:2], y_train[:2]

(['hubby needs a vacation thank god we re leaving for myrtle beach in a week',
  'moms everywhere stop what you are doing and get a good night sleep for tomorrow its your childrens turn to pamper you enjoy your day'],
 [2, 2])

## 5. Preparing data using custom dataloader

In [45]:
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback

# Setting device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [40]:
class data(Dataset):
  def __init__(self, encodings, labels):
    self.encodings = encodings
    self.labels = labels

  def __getitem__(self, index):
    item = {key: torch.tensor(val[index]) for key, val in self.encodings.items()}
    item['labels'] = torch.tensor(self.labels[index])
    return item

  def __len__(self):
    return len(self.labels)

## 6. Load PreTrained RoBERTa Model

In [41]:
from huggingface_hub import notebook_login

# Paste hugging face token with write permission enabled and log in
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [42]:
import comet_ml
from comet_ml import Experiment

comet_ml.login(project_name="sentiment-analysis-transformer")

Please paste your Comet API key from https://www.comet.com/api/my/settings/
(api key may not show as you type)
Comet API key: ··········


[1;38;5;39mCOMET INFO:[0m Valid Comet API Key saved in /root/.comet.config (set COMET_CONFIG to change where it is saved).


In [43]:
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = RobertaTokenizer.from_pretrained(model_name, model_max_length=256)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

## 7. Tokenize and Create Encoded Dataset

In [44]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer)

# Tokenize with truncation and padding and create dataset from tokenized data
train_encoding = tokenizer(X_train, truncation=True, padding=True)
test_encoding = tokenizer(X_test, truncation=True, padding=True)

train_dataset = data(train_encoding, y_train)
test_dataset = data(test_encoding, y_test)

## 8. Fine-Tuning RoBERTa

In [46]:
batch_size = 32
epochs = 5

training_args = TrainingArguments(
    output_dir=f"{model_name}-finetuned-sentiment",
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=1,                   # adjust if needed for larger batch sizes
    learning_rate=2e-5,
    warmup_steps=500,
    weight_decay=0.01,
    num_train_epochs=epochs,                         # specify the number of epochs
    gradient_checkpointing=True,
    fp16=True,
    eval_strategy="epoch",                           # Perform evaluation at the end of each epoch
    per_device_eval_batch_size=batch_size,
    save_strategy="epoch",                           # Save model at the end of each epoch
    save_total_limit=1,                              # Only keep the best model (limit to 1 checkpoint)
    logging_strategy="epoch",
    report_to=["comet_ml", "tensorboard"],           # Experiment Tracker: CometML or others
    load_best_model_at_end=True,                     # Load the best model at the end of training
    metric_for_best_model="accuracy",               # Use eval_loss as the metric to track the best model
    greater_is_better=True,                         # Lower eval_loss is better
    push_to_hub=True,                                # Automatically push the best model to Hugging Face Hub
)

early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=2,
    early_stopping_threshold=0.0
)

## 9. Train the Fine-Tuned BERT Model

In [48]:
import evaluate
from sklearn.metrics import confusion_matrix

accuracy_metric = evaluate.load("accuracy")

LABELS = ['negative', 'neutral', 'positive']
exp = comet_ml.Experiment()

# Compute_metrics function with confusion matrix logging
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    # Calculate accuracy
    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)

    # Calculate confusion matrix
    cm = confusion_matrix(labels, predictions)

    # Log the confusion matrix to Comet ML
    exp.log_confusion_matrix(matrix=cm, labels=LABELS, file_name="confusion-matrix.json")

    return accuracy

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

[1;38;5;39mCOMET INFO:[0m Experiment is live on comet.com https://www.comet.com/luluw8071/sentiment-analysis-transformer/0ab0bd7e4acc48bfb16b20b552991ecf

[1;38;5;39mCOMET INFO:[0m Couldn't find a Git repository in '/content' nor in any parent directory. Set `COMET_GIT_DIRECTORY` if your Git Repository is elsewhere.


In [49]:
# Label mapping
label_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}

model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=3)

# Override the model configuration for custom labels
model.config.id2label = label_mapping
model.config.label2id = {v: k for k, v in label_mapping.items()}


trainer = Trainer(
    model=model,                        # The instantiated Transformers model to be trained
    args=training_args,                 # Training arguments, defined above
    train_dataset=train_dataset,        # Training dataset
    eval_dataset=test_dataset,          # Evaluation dataset
    tokenizer=tokenizer,                # Tokenizer
    data_collator=data_collator,        # Data collator
    compute_metrics=compute_metrics,    # Function to compute metrics
    callbacks=[early_stopping_callback] # Early Stop Callback
)

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

  trainer = Trainer(


In [51]:
from accelerate import Accelerator

# Initialize Accelerator and Trainer
Accelerator()
trainer.train()

[1;38;5;39mCOMET INFO:[0m An experiment with the same configuration options is already running and will be reused.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5275,0.464618,0.809819
2,0.4013,0.490454,0.812319
3,0.2941,0.545507,0.810419
4,0.2136,0.61,0.809619


TrainOutput(global_step=5000, training_loss=0.35911542358398435, metrics={'train_runtime': 2533.9352, 'train_samples_per_second': 78.929, 'train_steps_per_second': 2.467, 'total_flos': 2.104907341824e+16, 'train_loss': 0.35911542358398435, 'epoch': 4.0})

In [52]:
exp.end()

[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m Comet.ml Experiment Summary
[1;38;5;39mCOMET INFO:[0m ---------------------------------------------------------------------------------------
[1;38;5;39mCOMET INFO:[0m   Data:
[1;38;5;39mCOMET INFO:[0m     display_summary_level : 1
[1;38;5;39mCOMET INFO:[0m     name                  : pleasant_lizard_5116
[1;38;5;39mCOMET INFO:[0m     url                   : https://www.comet.com/luluw8071/sentiment-analysis-transformer/0ab0bd7e4acc48bfb16b20b552991ecf
[1;38;5;39mCOMET INFO:[0m   Metrics [count] (min, max):
[1;38;5;39mCOMET INFO:[0m     eval/accuracy [8]              : (0.8096190380961904, 0.8123187681231877)
[1;38;5;39mCOMET INFO:[0m     eval/loss [8]                  : (0.4646177887916565, 0.6100000739097595)
[1;38;5;39mCOMET INFO:[0m     eval/runtime [8]               : (20.0649, 20.1843)
[1;38;5;39mCOMET INFO:[0m     eva

In [53]:
trainer.evaluate()

{'eval_loss': 0.4904536008834839,
 'eval_accuracy': 0.8123187681231877,
 'eval_runtime': 20.3009,
 'eval_samples_per_second': 492.638,
 'eval_steps_per_second': 15.418,
 'epoch': 4.0}

In [54]:
kwargs = {
    "dataset": "Twitter Sentiment Datasets",
    # "dataset_args": "config: hi, split: test",
    "language": "en",
    "finetuned_from": model_name,
    "tasks": "multi-sentiment-classification",
}

trainer.push_to_hub(**kwargs)

events.out.tfevents.1732251031.e256319355c4.204.0:   0%|          | 0.00/7.92k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

events.out.tfevents.1732253644.e256319355c4.204.1:   0%|          | 0.00/411 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/luluw/twitter-roberta-base-sentiment-finetuned-sentiment/commit/b30b8a6d8378b03f59807eb48a88d826e9a9cc65', commit_message='End of training', commit_description='', oid='b30b8a6d8378b03f59807eb48a88d826e9a9cc65', pr_url=None, repo_url=RepoUrl('https://huggingface.co/luluw/twitter-roberta-base-sentiment-finetuned-sentiment', endpoint='https://huggingface.co', repo_type='model', repo_id='luluw/twitter-roberta-base-sentiment-finetuned-sentiment'), pr_revision=None, pr_num=None)

## 10. Sentiment Prediction using custom text


In [60]:
# Tokenize text, get output from model and predict
def predict_sentiment(model, tokenizer, text, device):
    tokenized = tokenizer(text, truncation=True, padding=True, return_tensors='pt').to(device)
    outputs = model(**tokenized)
    probs = F.softmax(outputs.logits, dim=-1)
    preds = torch.argmax(outputs.logits, dim=-1).item()
    probs_max = probs.max().detach().cpu().numpy()

    prediction = "Positive" if preds == 2 else "Neutral" if preds == 1 else "Negative"
    print(f'{text}\nSentiment: {prediction}\tProbability: {probs_max*100:.2f}%\n', end="-"*50 + "\n")
    # return prediction, probs_max

In [61]:
# An example of complex review that contains both positive and negative sentiment
texts = ["Despite facing numerous challenges and setbacks, the team worked tirelessly and managed to exceed all expectations, achieving remarkable success. However, despite their best efforts, the project encountered multiple setbacks, ultimately leading to its failure and significant financial losses.",
         "The hotel room was clean and comfortable, and the amenities were well-maintained. However, the noise from the nearby construction site was disruptive due to which i could not focus when working.",
         "The movie had an intriguing plot and captivating visuals, but the sound quality was poor, making it difficult to fully enjoy the experience."]
for text in texts:
    predict_sentiment(model, tokenizer, text, device)

Despite facing numerous challenges and setbacks, the team worked tirelessly and managed to exceed all expectations, achieving remarkable success. However, despite their best efforts, the project encountered multiple setbacks, ultimately leading to its failure and significant financial losses.
Sentiment: Neutral	Probability: 65.72%
--------------------------------------------------
The hotel room was clean and comfortable, and the amenities were well-maintained. However, the noise from the nearby construction site was disruptive due to which i could not focus when working.
Sentiment: Neutral	Probability: 68.75%
--------------------------------------------------
The movie had an intriguing plot and captivating visuals, but the sound quality was poor, making it difficult to fully enjoy the experience.
Sentiment: Negative	Probability: 94.74%
--------------------------------------------------


In [62]:
# Breaking down above example into parts
texts = ["Despite facing numerous challenges and setbacks, the team worked tirelessly and managed to exceed all expectations, achieving remarkable success.",
         "However, despite their best efforts, the project encountered multiple setbacks, ultimately leading to its failure and significant financial losses.",
         "The hotel room was clean and comfortable, and the amenities were well-maintained.",
         "However, the noise from the nearby construction site was disruptive due to which i could not focus when working."]

for text in texts:
  predict_sentiment(model, tokenizer, text, device)

Despite facing numerous challenges and setbacks, the team worked tirelessly and managed to exceed all expectations, achieving remarkable success.
Sentiment: Positive	Probability: 98.60%
--------------------------------------------------
However, despite their best efforts, the project encountered multiple setbacks, ultimately leading to its failure and significant financial losses.
Sentiment: Negative	Probability: 99.02%
--------------------------------------------------
The hotel room was clean and comfortable, and the amenities were well-maintained.
Sentiment: Positive	Probability: 99.29%
--------------------------------------------------
However, the noise from the nearby construction site was disruptive due to which i could not focus when working.
Sentiment: Negative	Probability: 96.12%
--------------------------------------------------


## Load the fine-tuned model from hugging face

In [63]:
%%writefile inference.py
import torch
from transformers import RobertaTokenizer, RobertaForSequenceClassification

model_name = "luluw/twitter-roberta-base-sentiment-tweet-sentiment"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name, max_length=128)

# Example usage
text = "Just finished organizing my desk. Got a few tasks lined up for the afternoon."

inputs = tokenizer(text, return_tensors='pt')
outputs = model(**inputs)
preds = torch.argmax(outputs.logits, dim=-1)
prediction = "Positive" if preds == 2 else "Neutral" if preds == 1 else "Negative"

print(text)
print(prediction)

Writing inference.py


In [65]:
!python3 inference.py

Just finished organizing my desk. Got a few tasks lined up for the afternoon.
Neutral
