In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import nltk

In [45]:
df=pd.read_csv('IMDB Dataset.csv')

In [46]:
df

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [4]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax


MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [5]:
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'roberta_neg' : scores[0],
        'roberta_neu' : scores[1],
        'roberta_pos' : scores[2]
    }
    return scores_dict

In [6]:
df=df[df['review'].notna()]

In [7]:
tp=0
fp=0
tn=0
fn=0

In [8]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
for j in range(100):
  if (len(nltk.word_tokenize(df['review'][j])))>350:
    x=polarity_scores_roberta(' '.join(nltk.word_tokenize(df['review'][j])[:350]))
  else:
    x=polarity_scores_roberta(df['review'][j])
  neg=x['roberta_neg']
  pos=x['roberta_pos']
  if(neg>pos):
    sentiment='negative'
  else:
    sentiment='positive'
  sentiment_hat=df['sentiment'][j]
  if(sentiment=='negative' and sentiment_hat=='negative'):
    tn+=1
  if(sentiment=='negative' and sentiment_hat=='positive'):
    fp+=1
  if(sentiment=='positive' and sentiment_hat=='negative'):
    fn+=1
  if(sentiment=='positive' and sentiment_hat=='positive'):
    tp+=1
  print(str(j)+" "+sentiment+" , "+sentiment_hat)

0 negative , positive
1 positive , positive
2 positive , positive
3 negative , negative
4 positive , positive
5 positive , positive
6 positive , positive
7 negative , negative
8 negative , negative
9 positive , positive
10 positive , negative
11 positive , negative
12 negative , negative
13 negative , negative
14 positive , positive
15 negative , negative
16 negative , positive
17 negative , negative
18 negative , positive
19 negative , negative
20 positive , positive
21 negative , negative
22 positive , positive
23 negative , negative
24 negative , negative
25 positive , positive
26 positive , positive
27 negative , negative
28 negative , negative
29 negative , positive
30 negative , positive
31 positive , positive
32 negative , negative
33 negative , positive
34 negative , negative
35 negative , negative
36 negative , negative
37 negative , negative
38 positive , positive
39 negative , negative
40 negative , negative
41 positive , positive
42 negative , negative
43 negative , negativ

In [10]:
prec=tp/(tp+fp)
rec=tp/(tp+fn)

In [11]:
f1=2*prec*rec/(prec+rec)

In [12]:
f1

0.8354430379746834

In [13]:
[tp,tn,fp,fn]

[33, 54, 9, 4]

In [14]:
accuracy= (tp+tn)/(tp+tn+fp+fn)*100

In [15]:
print("Accuracy: "+str(accuracy)+"%")

Accuracy: 87.0%


In [16]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
def calculate_confusion_matrix(tp, fp, fn, tn):
    confusion_matrix = [[tp, fp], [fn, tn]]
    return confusion_matrix

def calculate_precision(tp, fp):
    if tp + fp == 0:
        return 0
    else:
        precision = tp / (tp + fp)
        return precision

def calculate_recall(tp, fn):
    if tp + fn == 0:
        return 0
    else:
        recall = tp / (tp + fn)
        return recall
confusion_matrix = calculate_confusion_matrix(tp, fp, fn, tn)
precision = calculate_precision(tp, fp)
recall = calculate_recall(tp, fn)

print("Confusion Matrix:")
print(confusion_matrix)
print("Precision:", precision)
print("Recall:", recall)

Confusion Matrix:
[[33, 9], [4, 54]]
Precision: 0.7857142857142857
Recall: 0.8918918918918919


In [17]:
!pip install sentencepiece



In [18]:
from transformers import XLNetTokenizer, TFXLNetForSequenceClassification
import tensorflow as tf
import numpy as np

# Load pretrained XLNet model and tokenizer
model_name = "xlnet-base-cased"
model = TFXLNetForSequenceClassification.from_pretrained(model_name)
tokenizer = XLNetTokenizer.from_pretrained(model_name)


Some layers from the model checkpoint at xlnet-base-cased were not used when initializing TFXLNetForSequenceClassification: ['lm_loss']
- This IS expected if you are initializing TFXLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFXLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFXLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary', 'logits_proj']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

In [19]:
def XL(statement):

  # Tokenize the statement
  tokenized_statement = tokenizer(statement, return_tensors="tf", padding=True, truncation=True)

  # Convert BatchEncoding to a dictionary of NumPy arrays
  tokenized_statement_dict = {key: np.array(value) for key, value in tokenized_statement.items()}

  # Make prediction
  logits = model.predict(tokenized_statement_dict)[0]
  predicted_class = tf.argmax(logits, axis=1).numpy()[0]

  #   Print the predicted sentiment
  sentiment = "positive" if predicted_class == 1 else "negative"
  return sentiment

In [20]:
for j in range(100):
  sentiment=XL(df['review'][j])
  sentiment_hat=df['sentiment'][j]
  if(sentiment=='negative' and sentiment_hat=='negative'):
    tn+=1
  if(sentiment=='negative' and sentiment_hat=='positive'):
    fp+=1
  if(sentiment=='positive' and sentiment_hat=='negative'):
    fn+=1
  if(sentiment=='positive' and sentiment_hat=='positive'):
    tp+=1
  print(str(j)+" "+sentiment+" , "+sentiment_hat)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


0 negative , positive
1 negative , positive
2 negative , positive
3 negative , negative
4 positive , positive
5 positive , positive
6 negative , positive
7 positive , negative
8 positive , negative
9 negative , positive
10 negative , negative
11 negative , negative
12 negative , negative
13 negative , negative
14 negative , positive
15 negative , negative
16 negative , positive
17 negative , negative
18 negative , positive
19 negative , negative
20 negative , positive
21 negative , negative
22 negative , positive
23 negative , negative
24 negative , negative
25 negative , positive
26 positive , positive
27 negative , negative
28 positive , negative
29 negative , positive
30 negative , positive
31 negative , positive
32 positive , negative
33 negative , positive
34 negative , negative
35 positive , negative
36 negative , negative
37 negative , negative
38 negative , positive
39 positive , negative
40 positive , negative
41 positive , positive
42 negative , negative
43 positive , negativ

In [21]:
prec=tp/(tp+fp)
rec=tp/(tp+fn)

In [22]:
f1=2*prec*rec/(prec+rec)

In [23]:
f1

0.5822784810126581

In [24]:
[tp,tn,fp,fn]

[46, 88, 38, 28]

In [25]:
accuracy= (tp+tn)/(tp+tn+fp+fn)*100

In [26]:
print("Accuracy: "+str(accuracy)+"%")

Accuracy: 67.0%


In [27]:
confusion_matrix = calculate_confusion_matrix(tp, fp, fn, tn)
precision = calculate_precision(tp, fp)
recall = calculate_recall(tp, fn)

print("Confusion Matrix:")
print(confusion_matrix)
print("Precision:", precision)
print("Recall:", recall)

Confusion Matrix:
[[46, 38], [28, 88]]
Precision: 0.5476190476190477
Recall: 0.6216216216216216


In [28]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load pretrained BERT model and tokenizer for sentiment analysis
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)



tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

In [29]:
def BERT(statement):

  # Example statement

  # Tokenize the statement
  tokenized_statement = tokenizer(statement, return_tensors="pt", padding=True, truncation=True)

  # Make prediction
  with torch.no_grad():
    logits = model(**tokenized_statement).logits

  # Convert logits to probabilities
  probabilities = torch.nn.functional.softmax(logits, dim=1)

  # Get the predicted sentiment class (0 to 4, corresponding to very negative to very positive)
  predicted_class = torch.argmax(probabilities, dim=1).item()

  # Print the predicted sentiment
  sentiment_mapping = {0: 'negative', 1: 'negative', 2: 'positive', 3: 'positive', 4: 'positive'}
  predicted_sentiment = sentiment_mapping[predicted_class]
  return predicted_sentiment

In [30]:
correct=0
c=0
j=0
for h in df['review']:
  rating=BERT(df['review'][j])
  rating_hat=df['sentiment'][j]
  if(rating==rating_hat):
    correct+=1
  c+=1
  j+=1
  if(j==100):
    break
  print([rating,rating_hat])

['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['positive', 'negative']
['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['positive', 'negative']
['negative', 'negative']
['positive', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['negative', 'positive']
['negative', 'negative']
['negative', 'positive']
['negative', 'negative']
['positive', 'positive']
['positive', 'negative']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']


In [31]:
Accuracy=(correct/c)*100

In [32]:
print("Accuracy: "+str(Accuracy)+"%")

Accuracy: 84.0%


In [33]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch

# Load pretrained DistilBERT model and tokenizer for sentiment analysis
model_name = "assemblyai/distilbert-base-uncased-sst2"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model = DistilBertForSequenceClassification.from_pretrained(model_name)

tokenizer_config.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/537 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [34]:
def distilBERT(statement):
  tokenized_statement = tokenizer(statement, return_tensors="pt", padding=True, truncation=True)

  # Make prediction
  with torch.no_grad():
    logits = model(**tokenized_statement).logits

  # Convert logits to probabilities
  probabilities = torch.nn.functional.softmax(logits, dim=1)

  # Get the predicted sentiment class (0 for negative, 1 for positive)
  predicted_class = torch.argmax(probabilities, dim=1).item()

  # Print the predicted sentiment
  sentiment_mapping = {0: 'negative', 1: 'positive'}
  predicted_sentiment = sentiment_mapping[predicted_class]

  return predicted_sentiment

In [36]:
correct=0
c=0
j=0
df_list = []
for h in df['review']:
  rating=distilBERT(df['review'][j])
  rating_hat=df['sentiment'][j]
  if(rating==rating_hat):
    correct+=1
  c+=1
  j+=1
  if(j==100):
    break
  print([rating,rating_hat])
  df_list.append([rating,rating_hat])

['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['negative', 'positive']
['negative', 'negative']
['negative', 'positive']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['negative', 'positive']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']


In [37]:
Accuracy=(correct/c)*100

In [38]:
print("Accuracy: "+str(Accuracy)+"%")

Accuracy: 91.0%


In [40]:
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
import torch

# Load pretrained GPT-2 model and tokenizer for sentiment analysis
model_name = "michelecafagna26/gpt2-medium-finetuned-sst2-sentiment"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2ForSequenceClassification.from_pretrained(model_name)



tokenizer_config.json:   0%|          | 0.00/755 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/999k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/470 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.44G [00:00<?, ?B/s]

In [41]:
def GPT2(statement):
  tokenized_statement = tokenizer(statement, return_tensors="pt", padding=True, truncation=True)

# Make prediction
  with torch.no_grad():
    logits = model(**tokenized_statement).logits

# Convert logits to probabilities
  probabilities = torch.nn.functional.softmax(logits, dim=1)

# Get the predicted sentiment class (0 for negative, 1 for positive)
  predicted_class = torch.argmax(probabilities, dim=1).item()

# Print the predicted sentiment
  sentiment_mapping = {0: 'negative', 1: 'positive'}
  predicted_sentiment = sentiment_mapping[predicted_class]

  return predicted_sentiment

In [None]:
correct=0
c=0
j=0
df_list = []
for h in df['review']:
  rating=GPT2(df['review'][j])
  rating_hat=df['sentiment'][j]
  if(rating==rating_hat):
    correct+=1
  c+=1
  j+=1
  if(j==100):
    break
  print([rating,rating_hat])
  df_list.append([rating,rating_hat])

['positive', 'positive']
['positive', 'positive']
['positive', 'positive']
['negative', 'negative']
['positive', 'positive']
['positive', 'positive']
['negative', 'positive']
['negative', 'negative']
['negative', 'negative']
['positive', 'positive']
['negative', 'negative']
['positive', 'negative']


In [None]:
Accuracy=(correct/c)*100

In [None]:
print("Accuracy: "+str(Accuracy)+"%")

In [None]:
df = pd.DataFrame(df_list,columns=["category_encoded","predicted_sentiment"])

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
y_true = df['category_encoded']
y_pred = df['predicted_sentiment']

# Calculate and print the confusion matrix
conf_mat = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_mat)

# Calculate and print other metrics
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy}")

# Print classification report which includes precision, recall, and F1 score
class_report = classification_report(y_true, y_pred)
print("Classification Report:")
print(class_report)

# Accuracies of various models:

*   RoBERTa -- **87.00%**
*   XLnet -- **71.00%**
*   BERT -- **84.00%**
*   DistilBERT -- **91.00%**
*   GPT2 -- **90.00%**