In [4]:
#!pip -q install datasets transformers sentence-transformers scikit-learn tqdm
#%pip install datasets

In [5]:
from datasets import load_dataset
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

data=load_dataset("rotten_tomatoes")

def evaluate(y_true,y_pred,title=""):
    if title:
        print(title)
        print(classification_report(y_true,y_pred))
        print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
    else:
        print("No Title provided please provide one")



In [6]:
from transformers import pipeline

MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"

pipe=pipeline(
    task='text-classification',
    model=MODEL_ID,
    tokenizer=MODEL_ID,
    top_k=None,
    truncation=True,
    device = "cuda"
)

print (pipe.model.config.id2label)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda


{0: 'negative', 1: 'neutral', 2: 'positive'}


In [7]:
from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm


y_pred=[]

for out in tqdm(pipe(KeyDataset(data["test"],"text")), total=len(data["test"])):
    labels=[d["label"].lower() for d in out]
    scores=[d["score"] for d in out]

    pos_score=scores[labels.index("positive")] if "positive" in labels else 0.0
    neg_score=scores[labels.index("negative")] if "negative" in labels else 0.0

    y_pred.append(1 if pos_score >= neg_score else 0)

  0%|          | 0/1066 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  return forward_call(*args, **kwargs)
100%|██████████| 1066/1066 [00:07<00:00, 143.36it/s]


In [8]:
# [7] Evaluate predictions
evaluate(data["test"]["label"], y_pred, "Exercise 1")

Exercise 1
              precision    recall  f1-score   support

           0       0.76      0.88      0.81       533
           1       0.86      0.72      0.78       533

    accuracy                           0.80      1066
   macro avg       0.81      0.80      0.80      1066
weighted avg       0.81      0.80      0.80      1066

Confusion matrix:
 [[469  64]
 [149 384]]


In [9]:
from sentence_transformers import SentenceTransformer

EMBEDDER = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")


In [10]:
#. Embed train and test texts

X_train = EMBEDDER.encode(data["train"]["text"], show_progress_bar=True)
X_test = EMBEDDER.encode(data["test"]["text"], show_progress_bar=True)

y_train=np.array(data["train"]["label"])
y_test=np.array(data["test"]["label"])


Batches:   0%|          | 0/267 [00:00<?, ?it/s]

Batches:   0%|          | 0/34 [00:00<?, ?it/s]

In [15]:
y_pred2=clf.predict(X_test)

In [16]:
from sklearn.linear_model import LogisticRegression

clf=LogisticRegression(max_iter=2000)
clf.fit(X_train,y_train)

In [17]:
evaluate(y_test, y_pred2, title="Exercise 2: Embeddings + Logistic Regression")

Exercise 2: Embeddings + Logistic Regression
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       533
           1       0.86      0.85      0.85       533

    accuracy                           0.85      1066
   macro avg       0.85      0.85      0.85      1066
weighted avg       0.85      0.85      0.85      1066

Confusion matrix:
 [[457  76]
 [ 82 451]]


In [18]:
import numpy as np
np.array_equal(y_pred,y_pred2)

False

In [19]:
from transformers import pipeline 

t5_pipe= pipeline("text2text-generation", model="t5-small", device="cuda")



config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cuda


In [20]:
prompt="Is the following senstence positive or negative?"

data_t5=data.map(lambda x : {"t5_input": prompt + x["text"]})

Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [21]:
from transformers.pipelines.pt_utils import KeyDataset
from tqdm import tqdm

y_pred_ex3=[]
for out in tqdm(t5_pipe(KeyDataset(data["test"],"text")), total=len(data["test"])):
    gen = out[0]["generated_text"].strip().lower()

    if "negative"in gen:
        y_pred_ex3.append(0)
    elif "positive"in gen:
        y_pred_ex3.append(1)
    else:
        y_pred_ex3.append(0)

100%|██████████| 1066/1066 [05:32<00:00,  3.21it/s]


In [22]:
# [17] Evaluate Exercise 3
evaluate(data_t5["test"]["label"], y_pred_ex3, title="Exercise 3: Flan-T5 prompt-based classification")


Exercise 3: Flan-T5 prompt-based classification
              precision    recall  f1-score   support

           0       0.50      1.00      0.67       533
           1       1.00      0.00      0.00       533

    accuracy                           0.50      1066
   macro avg       0.75      0.50      0.34      1066
weighted avg       0.75      0.50      0.34      1066

Confusion matrix:
 [[533   0]
 [532   1]]


In [23]:
# [18] Inspect unexpected generations
weird = []

for i, out in enumerate(t5_pipe(KeyDataset(data_t5["test"], "t5_input"))):
    gen = out[0]["generated_text"].strip().lower()
    if ("negative" not in gen) and ("positive" not in gen):
        weird.append((i, data_t5["test"][i]["text"], gen))
    if len(weird) >= 10:
        break

for i, txt, gen in weird:
    print("\n---")
    print("REVIEW:", txt)
    print("MODEL OUTPUT:", gen)


---
REVIEW: . . . is funny in the way that makes you ache with sadness ( the way chekhov is funny ) , profound without ever being self-important , warm without ever succumbing to sentimentality .
MODEL OUTPUT: is funny in the way that makes you ache with sadness ( the way chekhov is funny ) , profound without ever being self-important , warm without ever succumbing to sentimentality .

---
REVIEW: the performances are immaculate , with roussillon providing comic relief .
MODEL OUTPUT: die folgenden sensibilitäten sind positiv oder negativ?die performances sind immaculate , mit roussillon delivering comic relief .

---
REVIEW: kinnear . . . gives his best screen performance with an oddly winning portrayal of one of life's ultimate losers .
MODEL OUTPUT: . . . gives his best screen performance with an oddly winning portrayal of one of life's ultimate losers .

---
REVIEW: its use of the thriller form to examine the labyrinthine ways in which people's lives cross and change , buffeted by