#Text classification with Generative Models

In [None]:
!pip install transformers>=4.41.2 accelerate>=0.31.0
!pip install transformers sentence-transformers openai
!pip install -U datasets



In [None]:
from datasets import load_dataset
ds = load_dataset(
    "ag_news",
    split={
        "train": "train[:1000]",
        "test": "test[:200]"
    }
)
ds


DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 200
    })
})

In [None]:
ds["train"][0]
ds["train"][1]
ds["test"][0]


{'text': "Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.",
 'label': 2}

##Using a task-specific model

In [None]:
from transformers import pipeline
import torch

model_path = "cardiffnlp/twitter-roberta-base-sentiment-latest"

# load model into pipeline
pipe = pipeline(
    model=model_path,
    tokenizer=model_path,
    return_all_scores=True,
    device=-1
)

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [None]:
import numpy as np
from tqdm import tqdm
from transformers.pipelines.pt_utils import KeyDataset

# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(ds["test"], "text")), total=len(ds["test"])):
    negative_score = output[0]["score"]
    positive_score = output[2]["score"]
    assignment = np.argmax([negative_score, positive_score])
    y_pred.append(assignment)


100%|██████████| 200/200 [01:01<00:00,  3.26it/s]


##Evaluation

In [None]:
from sklearn.metrics import classification_report

def evaluate_performance(y_true, y_pred):
    """Create and print the classification report"""
    performance = classification_report(
        y_true, y_pred,
        target_names=["World", "Sports", "Business", "Sci/Tech"]
    )
    print(performance)

In [None]:
evaluate_performance(ds["test"]["label"], y_pred)

              precision    recall  f1-score   support

       World       0.40      0.62      0.48        61
      Sports       0.30      0.58      0.39        53
    Business       0.00      0.00      0.00        29
    Sci/Tech       0.00      0.00      0.00        57

    accuracy                           0.34       200
   macro avg       0.17      0.30      0.22       200
weighted avg       0.20      0.34      0.25       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


#Classification tasks with embeddings

In [None]:
from sentence_transformers import SentenceTransformer

# Load model
model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

# Convert text to embeddings
train_embeddings = model.encode(ds["train"]["text"], show_progress_bar=True)
test_embeddings = model.encode(ds["test"]["text"], show_progress_bar=True)

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/32 [00:00<?, ?it/s]

Batches:   0%|          | 0/7 [00:00<?, ?it/s]

In [None]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
embeddings = model.encode(sentences)
print(embeddings)


[[ 0.02250261 -0.0782918  -0.02303076 ... -0.00827925  0.02652694
  -0.00201898]
 [ 0.04170239  0.00109739 -0.01553419 ... -0.02181625 -0.06359357
  -0.00875283]]


In [None]:
train_embeddings.shape

(1000, 768)

In [None]:
from sklearn.linear_model import LogisticRegression

# Train a Logistic Regression on our train embeddings
clf = LogisticRegression(random_state=42)
clf.fit(train_embeddings, ds["train"]["label"])

In [None]:
# Predict previously unseen instances
y_pred = clf.predict(test_embeddings)
evaluate_performance(ds["test"]["label"], y_pred)

              precision    recall  f1-score   support

       World       0.88      0.87      0.88        61
      Sports       0.96      0.85      0.90        53
    Business       0.75      0.72      0.74        29
    Sci/Tech       0.83      0.95      0.89        57

    accuracy                           0.86       200
   macro avg       0.86      0.85      0.85       200
weighted avg       0.87      0.86      0.86       200



##What if we do not have labeled data : unsupervised use case

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics.pairwise import cosine_similarity

# Average the embeddings of all documents in each target label
df = pd.DataFrame(np.hstack([train_embeddings, np.array(ds["train"]["label"]).reshape(-1, 1)]))
averaged_target_embeddings = df.groupby(768).mean().values

# Find the best matching embeddings between evaluation documents and target embeddings
sim_matrix = cosine_similarity(test_embeddings, averaged_target_embeddings)
y_pred = np.argmax(sim_matrix, axis=1)

# Evaluate the model
evaluate_performance(ds["test"]["label"], y_pred)

              precision    recall  f1-score   support

       World       0.95      0.87      0.91        61
      Sports       0.93      0.96      0.94        53
    Business       0.70      0.90      0.79        29
    Sci/Tech       0.96      0.88      0.92        57

    accuracy                           0.90       200
   macro avg       0.88      0.90      0.89       200
weighted avg       0.91      0.90      0.90       200



##Zero shot classification

In [None]:
# Create embeddings for our labels
label_embeddings = model.encode(["A negative review",  "A positive review"])

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Find the best matching label for each document
sim_matrix = cosine_similarity(test_embeddings, label_embeddings)
y_pred = np.argmax(sim_matrix, axis=1)

In [None]:
evaluate_performance(ds["test"]["label"], y_pred)

              precision    recall  f1-score   support

       World       0.34      0.51      0.41        61
      Sports       0.32      0.66      0.43        53
    Business       0.00      0.00      0.00        29
    Sci/Tech       0.00      0.00      0.00        57

    accuracy                           0.33       200
   macro avg       0.17      0.29      0.21       200
weighted avg       0.19      0.33      0.24       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


##Text classification with generative models

In [None]:
! pip install groq

Collecting groq
  Downloading groq-0.37.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.37.0-py3-none-any.whl (137 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.37.0


In [None]:
! export GROQ_API_KEY="YOUR_API_KEY"

In [None]:
sample_text = ds["test"]["text"][0]
print(f"Review: {sample_text}\n")


Review: Fears for T N pension after talks Unions representing workers at Turner   Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.



In [None]:
import os
os.environ["GROQ_API_KEY"] = "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"

from groq import Groq
from dotenv import load_dotenv

load_dotenv()

client = Groq(
    api_key=os.getenv("GROQ_API_KEY"),
)

chat_completion = client.chat.completions.create(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    messages=[
        {
            "role": "system",
            "content": "You are a sentiment classifier. Respond with only 'positive' or 'negative'."
        },
        {
            "role": "user",
            "content": f"Classify the sentiment of this movie review: {sample_text}"
        }
    ],
    temperature=0,
    max_tokens=10

)
print(chat_completion.choices[0].message.content)

negative


In [None]:
chat_completion = client.chat.completions.create(
    model="meta-llama/llama-4-scout-17b-16e-instruct",
    messages=[
        {
            "role": "system",
            "content": "You are a sentiment classifier. Rate the sentiment as a number between 0 (negative) and 1 (positive). Respond with only the number."
        },
        {
            "role": "user",
            "content": f"Rate the sentiment of this movie review: {sample_text}"
        }
    ],
    temperature=0,
    max_tokens=10

)
print(chat_completion.choices[0].message.content)

0.2


In [None]:
def groq_generation(prompt, model="meta-llama/llama-4-scout-17b-16e-instruct"):
  message = [
        {
            "role": "system",
            "content": "You are a sentiment classifier. Rate the sentiment as a number between 0 (negative) and 1 (positive). Respond with only the number."
        },
        {
            "role": "user",
            "content": f"Rate the sentiment of this movie review: {prompt}"
        }
  ]
  chat_completion = client.chat.completions.create(
      model=model,
      messages=message,
      temperature=0,
      max_tokens=10
    )
  return chat_completion.choices[0].message.content

In [None]:
groq_generation(sample_text)

'0.2'

In [None]:
#predictions = [groq_generation(prompt) for doc in tqdm(data["test"]["text"])]

In [None]:
#y_pred = [int(pred) for pred in predictions]
#evaluate_performance(data["test"]["label"], y_pred)

##Text2Text transfert transformers

In [None]:
# Load our model
pipe = pipeline(
    "text2text-generation",
    model="google/flan-t5-small",
    device=-1
)

Device set to use cpu


In [None]:
# Prepare our data
prompt = "Is the following sentence positive or negative? "
ds = ds.map(lambda example: {"t5": prompt + example['text']})
ds

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['text', 'label', 't5'],
        num_rows: 200
    })
})

In [None]:
# Run inference
y_pred = []
for output in tqdm(pipe(KeyDataset(ds["test"], "text")), total=len(ds["test"])):
    text = output[0]["generated_text"]
    y_pred.append(0 if text == "negative" else 1)

  1%|          | 2/200 [00:10<16:38,  5.04s/it]


KeyboardInterrupt: 

In [None]:
evaluate_performance(ds["test"]["label"], y_pred)