## Libraries

In [None]:
!pip install -U transformers

In [None]:
!pip install -U datasets

In [None]:
!pip install -U huggingface_hub fsspec

In [None]:
from datasets import load_dataset, get_dataset_split_names
import torch
from sklearn.svm import SVC
from sklearn.metrics import classification_report
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import (
    pipeline,
    AutoModel,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
import matplotlib.pyplot as plt
from peft import get_peft_model, LoraConfig, TaskType

## Exercise 1.1: Load Rotten Tomatoes dataset

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
dataset = load_dataset("rotten_tomatoes")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

train.parquet:   0%|          | 0.00/699k [00:00<?, ?B/s]

validation.parquet:   0%|          | 0.00/90.0k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/92.2k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [None]:
print(dataset)
print(get_dataset_split_names("rotten_tomatoes"))

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 8530
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 1066
    })
})
['train', 'validation', 'test']


## Exercise 1.2: Load Distilbert model and corresponding tokenizer

In [None]:
pipe = pipeline("feature-extraction", model="distilbert/distilbert-base-uncased")

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [None]:
model = AutoModel.from_pretrained("distilbert/distilbert-base-uncased")

In [None]:
texts = [dataset['train'][i]['text'] for i in range(3)]

for text in texts:
    print(text)

the rock is destined to be the 21st century's new " conan " and that he's going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .
the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson's expanded vision of j . r . r . tolkien's middle-earth .
effective but too-tepid biopic


In [None]:
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
print(inputs)

{'input_ids': tensor([[  101,  1996,  2600,  2003, 16036,  2000,  2022,  1996,  7398,  2301,
          1005,  1055,  2047,  1000, 16608,  1000,  1998,  2008,  2002,  1005,
          1055,  2183,  2000,  2191,  1037, 17624,  2130,  3618,  2084,  7779,
         29058,  8625, 13327,  1010,  3744,  1011, 18856, 19513,  3158,  5477,
          4168,  2030,  7112, 16562,  2140,  1012,   102,     0,     0,     0,
             0,     0],
        [  101,  1996,  9882,  2135,  9603, 13633,  1997,  1000,  1996,  2935,
          1997,  1996,  7635,  1000, 11544,  2003,  2061,  4121,  2008,  1037,
          5930,  1997,  2616,  3685, 23613,  6235,  2522,  1011,  3213,  1013,
          2472,  2848,  4027,  1005,  1055,  4423,  4432,  1997,  1046,  1012,
          1054,  1012,  1054,  1012, 23602,  1005,  1055,  2690,  1011,  3011,
          1012,   102],
        [  101,  4621,  2021,  2205,  1011,  8915, 23267, 16012, 24330,   102,
             0,     0,     0,     0,     0,     0,     0,     0,     

In [None]:
with torch.no_grad():
    outputs = model(**inputs)

In [None]:
print("Keys in output:", outputs.keys())
print("Shape of last_hidden_state:", outputs.last_hidden_state.shape)
print("Output:", outputs)

Keys in output: odict_keys(['last_hidden_state'])
Shape of last_hidden_state: torch.Size([3, 52, 768])
Output: BaseModelOutput(last_hidden_state=tensor([[[-0.0332, -0.0168,  0.0194,  ...,  0.0476,  0.5834,  0.3036],
         [-0.0235, -0.0555, -0.3638,  ...,  0.1877,  0.5781, -0.1577],
         [-0.0516, -0.1014, -0.1511,  ...,  0.1503,  0.2649, -0.1575],
         ...,
         [ 0.3688, -0.1147,  0.8428,  ..., -0.0708, -0.0178, -0.2516],
         [ 0.0654, -0.0206,  0.1889,  ...,  0.1159,  0.2323, -0.2404],
         [ 0.0373, -0.0104,  0.1203,  ...,  0.1049,  0.2852, -0.3035]],

        [[-0.2062, -0.0490, -0.4036,  ..., -0.1186,  0.6141,  0.3919],
         [-0.4361, -0.1647, -0.3533,  ...,  0.1086,  0.9478, -0.0272],
         [-0.1164,  0.1690,  0.2698,  ..., -0.1971,  0.4372,  0.2527],
         ...,
         [-0.2341,  0.4810, -0.2634,  ..., -0.3397,  0.2567,  0.1274],
         [ 0.7139,  0.0574, -0.3260,  ...,  0.2041, -0.3800, -0.3343],
         [ 0.5649,  0.2806, -0.0295,  ...,  

## Exercise 1.3.1: Use Distilbert feature extractor

In [None]:
def extract_features(tokenizer, model, texts, batch_size=16):
    features = []

    for i in tqdm(range(0, len(texts), batch_size)):

        batch = texts[i:i+batch_size]
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(device)

        with torch.no_grad():

            outputs = model(**inputs)
            cls_embeddings = outputs.last_hidden_state[:, 0, :]  # CLS token
            features.append(cls_embeddings.cpu().numpy())

    return np.vstack(features)

In [None]:
model.to(device)
model.eval()

X_train = extract_features(tokenizer, model, dataset['train']['text'])
y_train = np.array(dataset['train']['label'])

X_val = extract_features(tokenizer, model, dataset['validation']['text'])
y_val = np.array(dataset['validation']['label'])

X_test = extract_features(tokenizer, model, dataset['test']['text'])
y_test = np.array(dataset['test']['label'])

100%|██████████| 534/534 [00:05<00:00, 92.62it/s] 
100%|██████████| 67/67 [00:00<00:00, 103.09it/s]
100%|██████████| 67/67 [00:00<00:00, 101.31it/s]


In [None]:
print(X_train.shape)
print(y_train.shape)

print(X_val.shape)
print(y_val.shape)

print(X_test.shape)
print(y_test.shape)

(8530, 768)
(8530,)
(1066, 768)
(1066,)
(1066, 768)
(1066,)


In [None]:
print(X_train[0])

[-3.31733041e-02 -1.68090370e-02  1.94118079e-02 -2.57178526e-02
 -1.37966782e-01 -3.96170467e-01  3.82996678e-01  5.11756539e-01
  2.30819006e-02 -5.55352978e-02 -6.31647855e-02 -1.36815831e-01
 -5.17974272e-02  4.98288572e-01  2.31825978e-01  2.37955913e-01
 -3.11688304e-01  2.47248575e-01  2.27939710e-01  4.64462340e-02
 -1.53963149e-01 -1.51284650e-01  1.73891783e-01 -7.23566562e-02
  5.87145127e-02 -1.85213745e-01 -3.66291292e-02 -6.72617331e-02
  8.16390291e-02  2.59853721e-01  2.37147082e-02  7.89611340e-02
 -5.16882658e-01 -2.95907408e-01  4.53972146e-02 -1.15343206e-01
  9.63999331e-02 -9.29997936e-02  1.08496033e-01  2.89733291e-01
  2.30172440e-01  2.28502959e-01  9.57188532e-02  7.57015571e-02
 -1.87787175e-01 -2.29609564e-01 -2.59354758e+00  1.40535071e-01
 -9.00674686e-02 -2.95129865e-01  4.71378088e-01  1.54841200e-01
 -1.94468066e-01  2.03654677e-01  4.33259577e-01  1.77997231e-01
 -3.48946601e-01  3.01722527e-01 -5.00008501e-02 -3.08810174e-02
  6.40418231e-02  1.68399

## Exercises 1.3.2 and 1.3.3: Train a classical classifier and evaluate its performance

### SVM training

In [None]:
svc = SVC(kernel='linear')
svc.fit(X_train, y_train)

In [None]:
val_preds_svc = svc.predict(X_val)
test_preds_svc = svc.predict(X_test)

In [None]:
print("\nValidation Results:")
print(classification_report(y_val, val_preds_svc))


Validation Results:
              precision    recall  f1-score   support

           0       0.80      0.85      0.82       533
           1       0.84      0.79      0.81       533

    accuracy                           0.82      1066
   macro avg       0.82      0.82      0.82      1066
weighted avg       0.82      0.82      0.82      1066



In [None]:
print("\nTest Results:")
print(classification_report(y_test, test_preds_svc))


Test Results:
              precision    recall  f1-score   support

           0       0.80      0.82      0.81       533
           1       0.82      0.79      0.80       533

    accuracy                           0.81      1066
   macro avg       0.81      0.81      0.81      1066
weighted avg       0.81      0.81      0.81      1066



### Logistic regression

In [None]:
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)

In [None]:
val_preds_logreg = logreg.predict(X_val)
test_preds_logreg = logreg.predict(X_test)

In [None]:
print("Logistic Regression - Validation Results:")
print(classification_report(y_val, val_preds_logreg))

Logistic Regression - Validation Results:
              precision    recall  f1-score   support

           0       0.81      0.86      0.83       533
           1       0.85      0.80      0.83       533

    accuracy                           0.83      1066
   macro avg       0.83      0.83      0.83      1066
weighted avg       0.83      0.83      0.83      1066



In [None]:
print("Logistic Regression - Test Results:")
print(classification_report(y_test, test_preds_logreg))

Logistic Regression - Test Results:
              precision    recall  f1-score   support

           0       0.79      0.81      0.80       533
           1       0.81      0.79      0.80       533

    accuracy                           0.80      1066
   macro avg       0.80      0.80      0.80      1066
weighted avg       0.80      0.80      0.80      1066



### K-Nearest Neighbor

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

In [None]:
val_preds_knn = knn.predict(X_val)
test_preds_knn = knn.predict(X_test)

In [None]:
print("K-Nearest Neighbor - Validation Results:")
print(classification_report(y_val, val_preds_knn))

K-Nearest Neighbor - Validation Results:
              precision    recall  f1-score   support

           0       0.74      0.74      0.74       533
           1       0.74      0.74      0.74       533

    accuracy                           0.74      1066
   macro avg       0.74      0.74      0.74      1066
weighted avg       0.74      0.74      0.74      1066



In [None]:
print("K-Nearest Neighbor - Test Results:")
print(classification_report(y_test, test_preds_knn))

K-Nearest Neighbor - Test Results:
              precision    recall  f1-score   support

           0       0.74      0.72      0.73       533
           1       0.73      0.74      0.73       533

    accuracy                           0.73      1066
   macro avg       0.73      0.73      0.73      1066
weighted avg       0.73      0.73      0.73      1066



## Exercise 2.1: Token Preprocessing

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/8530 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [None]:
print(tokenized_datasets.keys())

dict_keys(['train', 'validation', 'test'])


In [None]:
print(tokenized_datasets["train"].features.keys())

dict_keys(['text', 'label', 'input_ids', 'attention_mask'])


In [None]:
print(tokenized_datasets["train"].features)

{'text': Value(dtype='string', id=None), 'label': ClassLabel(names=['neg', 'pos'], id=None), 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None), 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}


In [None]:
print(tokenized_datasets["train"].features.keys())

dict_keys(['text', 'label', 'input_ids', 'attention_mask'])


In [None]:
print(tokenized_datasets["train"][0])

{'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1, 'input_ids': [101, 1996, 2600, 2003, 16036, 2000, 2022, 1996, 7398, 2301, 1005, 1055, 2047, 1000, 16608, 1000, 1998, 2008, 2002, 1005, 1055, 2183, 2000, 2191, 1037, 17624, 2130, 3618, 2084, 7779, 29058, 8625, 13327, 1010, 3744, 1011, 18856, 19513, 3158, 5477, 4168, 2030, 7112, 16562, 2140, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

## Exercise 2.2: Prepare the base Distilbert model for fine-tuning for a sequence classification task

In [None]:
tokenized_datasets.set_format("torch")

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased", num_labels=2)

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


## Exercise 2.3: Fine-tuning Distilbert

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
    acc = accuracy_score(labels, predictions)
    return {
        "accuracy": acc,
        "f1": f1,
        "precision": precision,
        "recall": recall,
    }

In [None]:
def finetune(epochs, datasets, tokenizer, data_collator, compute_metrics):

  training_args = TrainingArguments(
      output_dir="./results",
      eval_strategy="epoch",
      logging_strategy="epoch",
      save_strategy="epoch",
      num_train_epochs=epochs,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=64,
      load_best_model_at_end=True,
      metric_for_best_model="accuracy",
      logging_dir="./logs",
      report_to="none"
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_datasets["train"],
      eval_dataset=tokenized_datasets["validation"],
      tokenizer=tokenizer,
      data_collator=data_collator,
      compute_metrics=compute_metrics,
  )

  trainer.train()

  return trainer

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

### Evaluate performance before fine-tuning

In [None]:
dataset_val = load_dataset("rotten_tomatoes", split="validation")

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

In [None]:
tokenized_dataset = dataset_val.map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

all_preds, all_labels = [], []

with torch.no_grad():
    for batch in torch.utils.data.DataLoader(tokenized_dataset, batch_size=32):
        inputs = {k: v for k, v in batch.items() if k in ["input_ids", "attention_mask"]}
        outputs = model(**inputs)
        preds = torch.argmax(outputs.logits, dim=-1)
        all_preds.extend(preds.tolist())
        all_labels.extend(batch["label"].tolist())


Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [None]:
acc = accuracy_score(all_labels, all_preds)
print(f"Accuracy without fine-tuning: {acc:.4f}")
#randomly-initialized classification head

Accuracy without fine-tuning: 0.5000


### Inefficient fine-tuning

In [None]:
epochs = 3
ft = finetune(epochs, tokenized_datasets, tokenizer, data_collator, compute_metrics)

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.074,0.876057,0.835835,0.829601,0.862348,0.79925
2,0.0488,0.900803,0.831144,0.832402,0.826248,0.838649
3,0.021,1.095286,0.839587,0.84093,0.833948,0.84803


In [None]:
print("Validation results:")
print(ft.evaluate(tokenized_datasets["validation"]))

Validation results:


{'eval_loss': 1.0952861309051514, 'eval_accuracy': 0.8395872420262664, 'eval_f1': 0.8409302325581396, 'eval_precision': 0.8339483394833949, 'eval_recall': 0.8480300187617261, 'eval_runtime': 7.29, 'eval_samples_per_second': 146.227, 'eval_steps_per_second': 2.332, 'epoch': 3.0}


In [None]:
print("Test results:")
print(ft.evaluate(tokenized_datasets["test"]))

Test results:


{'eval_loss': 1.1140999794006348, 'eval_accuracy': 0.8292682926829268, 'eval_f1': 0.8283018867924529, 'eval_precision': 0.8330170777988615, 'eval_recall': 0.8236397748592871, 'eval_runtime': 7.3074, 'eval_samples_per_second': 145.88, 'eval_steps_per_second': 2.326, 'epoch': 3.0}


## Exercise 3.1: Efficient Fine-tuning for Sentiment Analysis

### Freeze Most of the Model (turns out it is inefficient)

In [None]:
for name, param in model.named_parameters():
    if not any(nd in name for nd in ["transformer.layer.5", "classifier"]):
        param.requires_grad = False

In [None]:
tokenized_datasets.set_format("torch")

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
epochs = 3
ft = finetune(epochs, tokenized_datasets, tokenizer, data_collator, compute_metrics)

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2786,0.424415,0.817073,0.834886,0.760802,0.924953
2,0.1544,0.650763,0.847092,0.845204,0.855769,0.834897
3,0.051,0.801396,0.85272,0.855032,0.841818,0.868668


In [None]:
print("Validation results:")
print(ft.evaluate(tokenized_datasets["validation"]))

Validation results:


{'eval_loss': 0.8013959527015686, 'eval_accuracy': 0.8527204502814258, 'eval_f1': 0.8550323176361958, 'eval_precision': 0.8418181818181818, 'eval_recall': 0.8686679174484052, 'eval_runtime': 7.5053, 'eval_samples_per_second': 142.034, 'eval_steps_per_second': 2.265, 'epoch': 3.0}


In [None]:
print("Test results:")
print(ft.evaluate(tokenized_datasets["test"]))

Test results:


{'eval_loss': 0.8453226089477539, 'eval_accuracy': 0.8452157598499062, 'eval_f1': 0.8462255358807083, 'eval_precision': 0.8407407407407408, 'eval_recall': 0.851782363977486, 'eval_runtime': 7.6288, 'eval_samples_per_second': 139.733, 'eval_steps_per_second': 2.228, 'epoch': 3.0}


### Use less memory

In [None]:
def finetune(model, epochs, datasets, tokenizer, data_collator, compute_metrics):

  training_args = TrainingArguments(
      output_dir="./results",
      eval_strategy="epoch",
      logging_strategy="epoch",
      save_strategy="epoch",
      num_train_epochs=epochs,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=64,
      load_best_model_at_end=True,
      metric_for_best_model="accuracy",
      logging_dir="./logs",
      report_to="none",
      fp16=True         ##### less memory used
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=tokenized_datasets["train"],
      eval_dataset=tokenized_datasets["validation"],
      tokenizer=tokenizer,
      data_collator=data_collator,
      compute_metrics=compute_metrics,
  )

  trainer.train()

  return trainer

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
tokenized_datasets.set_format("torch")

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
epochs = 3
ft = finetune(model, epochs, tokenized_datasets, tokenizer, data_collator, compute_metrics)

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0794,0.987037,0.822702,0.839422,0.767081,0.926829
2,0.0398,0.866324,0.844278,0.842803,0.85086,0.834897
3,0.0103,1.04618,0.850844,0.854529,0.833929,0.876173


In [None]:
print("Validation results:")
print(ft.evaluate(tokenized_datasets["validation"]))

Validation results:


{'eval_loss': 1.0461803674697876, 'eval_accuracy': 0.850844277673546, 'eval_f1': 0.8545288197621226, 'eval_precision': 0.8339285714285715, 'eval_recall': 0.8761726078799249, 'eval_runtime': 2.3607, 'eval_samples_per_second': 451.563, 'eval_steps_per_second': 7.201, 'epoch': 3.0}


In [None]:
print("Test results:")
print(ft.evaluate(tokenized_datasets["test"]))

Test results:


{'eval_loss': 1.136006474494934, 'eval_accuracy': 0.8395872420262664, 'eval_f1': 0.8423963133640553, 'eval_precision': 0.8278985507246377, 'eval_recall': 0.8574108818011257, 'eval_runtime': 2.3711, 'eval_samples_per_second': 449.586, 'eval_steps_per_second': 7.17, 'epoch': 3.0}


### Parameter-Efficient Fine-Tuning (PEFT)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_lin", "v_lin"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_CLS
)

model = get_peft_model(model, lora_config)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
tokenized_datasets.set_format("torch")

In [None]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
epochs=3
ft = finetune(epochs, tokenized_datasets, tokenizer, data_collator, compute_metrics)

  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4845,0.394277,0.821764,0.820416,0.826667,0.814259
2,0.4016,0.376595,0.832083,0.829035,0.844358,0.814259
3,0.3846,0.374142,0.829268,0.824324,0.848907,0.801126


In [None]:
print("Validation results:")
print(ft.evaluate(tokenized_datasets["validation"]))

Validation results:


{'eval_loss': 0.3765951693058014, 'eval_accuracy': 0.8320825515947468, 'eval_f1': 0.8290353390639924, 'eval_precision': 0.8443579766536965, 'eval_recall': 0.8142589118198874, 'eval_runtime': 8.3129, 'eval_samples_per_second': 128.234, 'eval_steps_per_second': 2.045, 'epoch': 3.0}


In [None]:
# Evaluate on test set
print("Test results:")
print(ft.evaluate(tokenized_datasets["test"]))

Test results:


{'eval_loss': 0.4090079665184021, 'eval_accuracy': 0.8208255159474672, 'eval_f1': 0.8196411709159585, 'eval_precision': 0.8250950570342205, 'eval_recall': 0.8142589118198874, 'eval_runtime': 8.5281, 'eval_samples_per_second': 124.998, 'eval_steps_per_second': 1.993, 'epoch': 3.0}


### Model finetuned on SST-2

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("distilbert/distilbert-base-uncased-finetuned-sst-2-english")

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [None]:
dataset = load_dataset("rotten_tomatoes", split="validation")

README.md: 0.00B [00:00, ?B/s]

train.parquet:   0%|          | 0.00/699k [00:00<?, ?B/s]

validation.parquet:   0%|          | 0.00/90.0k [00:00<?, ?B/s]

test.parquet:   0%|          | 0.00/92.2k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8530 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1066 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [None]:
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length")

In [None]:
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

all_preds, all_labels = [], []

with torch.no_grad():
    for batch in torch.utils.data.DataLoader(tokenized_dataset, batch_size=32):
        inputs = {k: v for k, v in batch.items() if k in ["input_ids", "attention_mask"]}
        outputs = model(**inputs)
        preds = torch.argmax(outputs.logits, dim=-1)
        all_preds.extend(preds.tolist())
        all_labels.extend(batch["label"].tolist())


Map:   0%|          | 0/1066 [00:00<?, ? examples/s]

In [None]:
acc = accuracy_score(all_labels, all_preds)
print(f"Accuracy: {acc:.4f}")

Accuracy: 0.9053
