In [1]:
from datasets import load_dataset

# Load our data
data = load_dataset("rotten_tomatoes")

In [2]:
data

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})

In [3]:
data["train"][0]

{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .',
 'label': 1}

In [4]:
data["train"][-1]

{'text': 'things really get weird , though not particularly scary : the movie is all portent and no content .',
 'label': 0}

# Task-Specific Model

- BERT base model (uncased)
- RoBERTa base model
- DistilBERT base model (uncased)
- DeBERTa base model
- bert-tiny
- ALBERT base v2

In [5]:
from transformers import pipeline

# Path to our HF model
model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest"

# Load model into pipeline
pipe = pipeline(
    model=model_path,
    tokenizer=model_path,
    return_all_scores=True,
    device="cuda:0"
)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
import numpy as np
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset

In [7]:
# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(data["test"], "text")), total=len(data["test"])):
    negative_score = output[0]["score"]
    positive_score = output[2]["score"]
    assignment = np.argmax([negative_score, positive_score])
    y_pred.append(assignment)

100%|██████████| 1066/1066 [00:02<00:00, 378.50it/s]


In [8]:
pp = pipe(KeyDataset(data["test"], "text"))

In [9]:
for output in pp:
    break

In [10]:
output

[{'label': 'negative', 'score': 0.005161240231245756},
 {'label': 'neutral', 'score': 0.04023356735706329},
 {'label': 'positive', 'score': 0.9546051621437073}]

In [11]:
negative_score = output[0]["score"]
positive_score = output[2]["score"]
assignment = np.argmax([negative_score, positive_score])
assignment

1

In [12]:
data["test"]

Dataset({
    features: ['text', 'label'],
    num_rows: 1066
})

In [13]:
len(data["test"]["text"])

1066

In [14]:
data["test"]["text"][0]

'lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .'

In [16]:
data["test"]["label"][0]

1

In [17]:
pipe("A total shit")

[[{'label': 'negative', 'score': 0.8399618268013},
  {'label': 'neutral', 'score': 0.13086175918579102},
  {'label': 'positive', 'score': 0.02917640656232834}]]

In [20]:
pipe("Pretty average, good special effects, good plot, but no big surprise")

[[{'label': 'negative', 'score': 0.12898923456668854},
  {'label': 'neutral', 'score': 0.3275248110294342},
  {'label': 'positive', 'score': 0.5434859395027161}]]

In [21]:
from sklearn.metrics import classification_report

def evaluate_performance(y_true, y_pred):
    """Create and print the classification report"""
    performance = classification_report(
        y_true, y_pred,
        target_names=["Negative Review", "Positive Review"]
    )
    print(performance)

In [22]:
len(data["test"]["label"])

1066

In [23]:
len(y_pred)

1066

In [24]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

Negative Review       0.76      0.88      0.81       533
Positive Review       0.86      0.72      0.78       533

       accuracy                           0.80      1066
      macro avg       0.81      0.80      0.80      1066
   weighted avg       0.81      0.80      0.80      1066



# General-Purpose Embeddings for Classification Tasks

In [31]:
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression


In [26]:
# Load model
model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")



In [27]:
# Convert text to embeddings
train_embeddings = model.encode(data["train"]["text"], show_progress_bar=True)
test_embeddings = model.encode(data["test"]["text"], show_progress_bar=True)

Batches:   0%|          | 0/267 [00:00<?, ?it/s]

Batches:   0%|          | 0/34 [00:00<?, ?it/s]

In [29]:
train_embeddings.shape

(8530, 768)

In [30]:
test_embeddings.shape

(1066, 768)

In [32]:
type(data["train"]["label"])

list

In [33]:
# Train a logistic regression on our train embeddings
clf = LogisticRegression(random_state=42)
clf.fit(train_embeddings, data["train"]["label"])

In [34]:
# Predict previously unseen instances
y_pred = clf.predict(test_embeddings)

In [35]:
y_pred

array([1, 1, 0, ..., 0, 0, 0])

In [36]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

Negative Review       0.85      0.86      0.85       533
Positive Review       0.86      0.85      0.85       533

       accuracy                           0.85      1066
      macro avg       0.85      0.85      0.85      1066
   weighted avg       0.85      0.85      0.85      1066



# Zero-Shot Classification without labels

We will create fake labels as text, and embed these labels

In [47]:
label_embeddings = model.encode(["A very negative movie review",  "A very positive movie review"])

In [48]:
label_embeddings.shape

(2, 768)

To assign labels to documents, we can apply cosine similarity to the document label pairs.

In [49]:
from sklearn.metrics.pairwise import cosine_similarity

In [50]:
# Find the best matching label for each document
sim_matrix = cosine_similarity(test_embeddings, label_embeddings)
y_pred = np.argmax(sim_matrix, axis=1)

In [51]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

Negative Review       0.86      0.73      0.79       533
Positive Review       0.76      0.88      0.82       533

       accuracy                           0.80      1066
      macro avg       0.81      0.80      0.80      1066
   weighted avg       0.81      0.80      0.80      1066



# Text Classification with Generative Models

 ## Text-to-Text Transfer Transformer (Encoder-Decoder T5)

In [52]:
# Flan T5
# Load our model
pipe = pipeline(
    "text2text-generation", 
    model="google/flan-t5-small", 
    device="cuda:0"
)

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [53]:
# Prepare our data
prompt = "Is the following sentence positive or negative? "
data = data.map(lambda example: {"t5": prompt + example['text']})
data


Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 1066
    })
})

In [56]:
data["test"]["t5"][:2]

['Is the following sentence positive or negative? lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .',
 'Is the following sentence positive or negative? consistently clever and suspenseful .']

In [57]:
pipe('Is the following sentence positive or negative? consistently clever and suspenseful .')



[{'generated_text': 'positive'}]

In [58]:
kk = KeyDataset(data["test"], "t5")
for output in kk:
    break

In [63]:
output

'Is the following sentence positive or negative? lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .'

In [60]:
kk.dataset

Dataset({
    features: ['text', 'label', 't5'],
    num_rows: 1066
})

In [61]:
kk[0]

'Is the following sentence positive or negative? lovingly photographed in the manner of a golden book sprung to life , stuart little 2 manages sweetness largely without stickiness .'

In [62]:
kk[1]

'Is the following sentence positive or negative? consistently clever and suspenseful .'

In [64]:
p = pipe(KeyDataset(data["test"], "t5"))

In [67]:
for output in p:
    break



In [68]:
output

[{'generated_text': 'positive'}]

In [69]:
# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(data["test"], "t5")), total=len(data["test"])):
    text = output[0]["generated_text"]
    y_pred.append(0 if text == "negative" else 1)

100%|██████████| 1066/1066 [00:09<00:00, 115.46it/s]


In [70]:
evaluate_performance(data["test"]["label"], y_pred)

                 precision    recall  f1-score   support

Negative Review       0.83      0.85      0.84       533
Positive Review       0.85      0.83      0.84       533

       accuracy                           0.84      1066
      macro avg       0.84      0.84      0.84      1066
   weighted avg       0.84      0.84      0.84      1066



# Open AI API

In [None]:
# import openai

# # Create client
# client = openai.OpenAI(api_key="YOUR_KEY_HERE")

In [None]:
# def chatgpt_generation(prompt, document, model="gpt-3.5-turbo-0125"):
#     """Generate an output based on a prompt and an input document."""
#     messages=[
#         {
#             "role": "system",
#             "content": "You are a helpful assistant."
#             },
#         {
#             "role": "user",
#             "content":   prompt.replace("[DOCUMENT]", document)
#             }
#     ]
#     chat_completion = client.chat.completions.create(
#       messages=messages,
#       model=model,
#       temperature=0
#     )
#     return chat_completion.choices[0].message.content


In [None]:
# # Define a prompt template as a base
# prompt = """Predict whether the following document is a positive or negative movie review:

# [DOCUMENT]

# If it is positive return 1 and if it is negative return 0. Do not give any other answers.
# """

# # Predict the target using GPT
# document = "unpretentious , charming , quirky , original"
# chatgpt_generation(prompt, document)

In [None]:
# # You can skip this if you want to save your (free) credits
# predictions = [
#     chatgpt_generation(prompt, doc) for doc in tqdm(data["test"]["text"])
# ]

In [None]:
# # Extract predictions
# y_pred = [int(pred) for pred in predictions]

# # Evaluate performance
# evaluate_performance(data["test"]["label"], y_pred)