In [6]:
#!pip -q install datasets transformers sentence-transformers scikit-learn tqdm
#%pip install datasets

In [7]:
from datasets import load_dataset
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

data=load_dataset("rotten_tomatoes")

def evaluate(y_true,y_pred,title=""):
    if title:
        print(title)
        print(classification_report(y_true,y_pred))
        print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
    else:
        print("No Title provided please provide one")



In [8]:
from transformers import pipeline

MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"

pipe=pipeline(
    task='text-classification',
    model=MODEL_ID,
    tokenizer=MODEL_ID,
    top_k=None,
    truncation=True,
    device = "mps"
)

print (pipe.model.config.id2label)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{0: 'negative', 1: 'neutral', 2: 'positive'}


In [9]:
from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm


y_pred=[]

for out in tqdm(pipe(KeyDataset(data["test"],"text")), total=len(data["test"])):
    labels=[d["label"].lower() for d in out]
    scores=[d["score"] for d in out]

    pos_score=scores[labels.index("positive")] if "positive" in labels else 0.0
    neg_score=scores[labels.index("negative")] if "negative" in labels else 0.0

    y_pred.append(1 if pos_score >= neg_score else 0)

  0%|          | 0/1066 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|██████████| 1066/1066 [00:36<00:00, 29.38it/s]


In [15]:
# [7] Evaluate predictions
evaluate(data["test"]["label"], y_pred, "Exercise 1")

Exercise 1
              precision    recall  f1-score   support

           0       0.76      0.88      0.81       533
           1       0.86      0.72      0.78       533

    accuracy                           0.80      1066
   macro avg       0.81      0.80      0.80      1066
weighted avg       0.81      0.80      0.80      1066

Confusion matrix:
 [[469  64]
 [149 384]]


In [11]:
from sentence_transformers import SentenceTransformer

EMBEDDER = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")




In [12]:
#. Embed train and test texts

X_train = EMBEDDER.encode(data["train"]["text"], show_progress_bar=True)
X_test = EMBEDDER.encode(data["test"]["text"], show_progress_bar=True)

y_train=np.array(data["train"]["label"])
y_test=np.array(data["test"]["label"])


Batches: 100%|██████████| 267/267 [00:50<00:00,  5.29it/s]
Batches: 100%|██████████| 34/34 [00:06<00:00,  5.46it/s]


In [13]:
from sklearn.linear_model import LogisticRegression

clf=LogisticRegression(max_iter=2000)
clf.fit(X_train,y_train)

In [14]:
evaluate(y_test, y_pred, title="Exercise 2: Embeddings + Logistic Regression")

Exercise 2: Embeddings + Logistic Regression
              precision    recall  f1-score   support

           0       0.76      0.88      0.81       533
           1       0.86      0.72      0.78       533

    accuracy                           0.80      1066
   macro avg       0.81      0.80      0.80      1066
weighted avg       0.81      0.80      0.80      1066

Confusion matrix:
 [[469  64]
 [149 384]]
