Install

In [20]:
!pip -q install datasets transformers evaluate scikit-learn

Load Data

In [2]:
from datasets import load_dataset
import random

# use random data (can change the seed)
seed = 42
random.seed(seed)

ds = load_dataset("glue", "sst2")

# use only 200 data
train = ds["train"].shuffle(seed=seed).select(range(200))
test  = ds["validation"].shuffle(seed=seed).select(range(200))

train[0]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

sst2/train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

sst2/validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

sst2/test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

{'sentence': 'klein , charming in comedies like american pie and dead-on in election , ',
 'label': 1,
 'idx': 32326}

Naive Baseline

In [3]:
pos_words = {"good","great","amazing","love","excellent","wonderful","best","enjoy","awesome","fantastic"}
neg_words = {"bad","terrible","boring","hate","awful","worst","poor","waste","dull","disappointing"}

def baseline_predict(text: str) -> int:
    tokens = [t.strip(".,!?;:()[]\"'").lower() for t in text.split()]
    score = 0
    for t in tokens:
        if t in pos_words:
            score += 1
        if t in neg_words:
            score -= 1
    return 1 if score > 0 else 0

In [11]:
# just test
print(baseline_predict("This movie was amazing and wonderful!"))  # anticipate: 1
print(baseline_predict("This movie was boring and awful."))       # anticipate: 0
print(baseline_predict("This movie was not good at all"))         # anticipate 0 but maybe 1 (not perfect)

1
0
1


AI Pipeline

In [5]:
from transformers import pipeline

clf = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

def model_predict(text: str) -> int:
    out = clf(text, truncation=True)[0]
    return 1 if out["label"].upper().startswith("POS") else 0  # if POSITIVE is the most value



config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


In [9]:
# just test
print(model_predict("This movie was amazing and wonderful!"))      # anticipate: 1
print(model_predict("This movie was boring and awful."))      # anticipate: 0
print(model_predict("This movie was not good at all"))      # anticipate: 0

1
0
0


In [10]:
print(clf("This movie was amazing and wonderful!"))
print(clf("This movie was boring and awful."))
print(clf("This movie was not good at all"))

[{'label': 'POSITIVE', 'score': 0.9998817443847656}]
[{'label': 'NEGATIVE', 'score': 0.9997926354408264}]
[{'label': 'NEGATIVE', 'score': 0.999765932559967}]


Extract Tests

In [13]:
texts = list(test["sentence"])
y_true = list(test["label"])

print(len(texts), len(y_true), texts[0], y_true[0], sep='\n')  # just to see and test

200
200
it gets onto the screen just about as much of the novella as one could reasonably expect , and is engrossing and moving in its own right . 
1


In [14]:
y_base = [baseline_predict(t) for t in texts]  # from naive baseline
y_model = [model_predict(t) for t in texts]   # from AI pipeline

y_base[:5], y_model[:5]  # see some cases

([0, 0, 0, 1, 0], [1, 1, 1, 1, 1])

Accuracy and F1

In [15]:
from sklearn.metrics import accuracy_score, f1_score

# naive baseline
acc_base = accuracy_score(y_true, y_base)
f1_base  = f1_score(y_true, y_base)

# AI pipeline
acc_model = accuracy_score(y_true, y_model)
f1_model  = f1_score(y_true, y_model)

print("Baseline  - Accuracy:", acc_base, "F1:", f1_base)
print("AI Model  - Accuracy:", acc_model, "F1:", f1_model)

Baseline  - Accuracy: 0.51 F1: 0.2222222222222222
AI Model  - Accuracy: 0.915 F1: 0.9230769230769231


In [17]:
import pandas as pd

results = pd.DataFrame([
    {"Method": "Baseline (keyword rules)", "Accuracy": acc_base, "F1": f1_base},
    {"Method": "AI Pipeline (DistilBERT)", "Accuracy": acc_model, "F1": f1_model},
])

results

Unnamed: 0,Method,Accuracy,F1
0,Baseline (keyword rules),0.51,0.222222
1,AI Pipeline (DistilBERT),0.915,0.923077


Examples with Different Results

In [18]:
diff_idx = [i for i in range(len(texts)) if y_base[i] != y_model[i]]
len(diff_idx), diff_idx[:10]  # get 10 indices with different results

(103, [0, 1, 2, 4, 8, 12, 13, 14, 16, 17])

In [19]:
label_map = {0: "NEGATIVE", 1: "POSITIVE"}

# see the three results from 10 diff_idx
for i in diff_idx[:3]:
    print("TEXT:", texts[i])
    print("TRUE:", label_map[y_true[i]])
    print("BASE:", label_map[y_base[i]])
    print("MODEL:", label_map[y_model[i]])
    print("-"*70)

TEXT: it gets onto the screen just about as much of the novella as one could reasonably expect , and is engrossing and moving in its own right . 
TRUE: POSITIVE
BASE: NEGATIVE
MODEL: POSITIVE
----------------------------------------------------------------------
TEXT: my big fat greek wedding uses stereotypes in a delightful blend of sweet romance and lovingly dished out humor . 
TRUE: POSITIVE
BASE: NEGATIVE
MODEL: POSITIVE
----------------------------------------------------------------------
TEXT: for the most part , director anne-sophie birot 's first feature is a sensitive , extraordinarily well-acted drama . 
TRUE: POSITIVE
BASE: NEGATIVE
MODEL: POSITIVE
----------------------------------------------------------------------
