In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
subset = pd.read_csv('sampleDataWithPrediction.csv')

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    "nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained(
    "nlptown/bert-base-multilingual-uncased-sentiment")

tokenizedReviewsPackedIntoList = subset['content'].tolist()

inputs = tokenizer(
    tokenizedReviewsPackedIntoList,
    is_split_into_words=False,
    return_tensors='pt',             # Return PyTorch tensors
    padding=True,                    # Pad sequences to the same length
    truncation=True,                 # Truncate sequences if too long
    max_length=512                   # Set a maximum length for each review
)

# Pass inputs through the model
with torch.no_grad():
    outputs = model(**inputs)


predictedScore = torch.argmax(outputs.logits, dim=1) + 1
confidence = torch.softmax(outputs.logits, dim=1).max(dim=1).values * 100

subset['BertPredictedScore'] = predictedScore.numpy()
subset['BertConfidence'] = confidence.numpy()

subset['BertPredictedSentiment'] = subset['BertPredictedScore'].apply(
    lambda x: 'positive' if x >= 4 else ('neutral' if x == 3 else 'negative'))

In [None]:
subset.to_csv('sampleDataWithPrediction.csv')