In [51]:
from datasets import load_dataset, DatasetDict, Dataset
from transformers import (AutoTokenizer, AutoConfig,
    AutoModelForSequenceClassification, Trainer, TrainingArguments,
    DataCollatorWithPadding)
from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
import torch
import numpy as np
import evaluate

In [52]:
model_checkpoint = "distilbert-base-uncased"

id2label = {0: 'Negative', 1: 'Positive'}
label2id = {'Negative': 0, 'Positive': 1}

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels = 2, id2label=id2label, label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [53]:
dataset = load_dataset("shawhin/imdb-truncated")
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1000
    })
})


In [54]:
# create tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, add_prefix_space=True)

# create tokenize function
def tokenize_function(examples):
    text = examples["text"]

    # tokenize and truncate the text if it is too long
    tokenizer.truncation_side = 'left'
    tokenized_inputs = tokenizer(text, return_tensors='np', truncation=True, max_length=512)

    return tokenized_inputs

# add pad token if None exists
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tokenizer))

# tokenize the training and validation sets
tokenized_dataset = dataset.map(tokenize_function, batched=True)
print(tokenized_dataset)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'attention_mask'],
        num_rows: 1000
    })
})


In [55]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [56]:
accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

# define evaluation function to pass into trainer later
def compute_metrics(p):
    predictions, labels = p
    # Use argmax for other metrics that require class predictions
    predictions = np.argmax(predictions, axis=1)
    return {
        "accuracy": accuracy.compute(predictions=predictions, references=labels),
        "precision": precision.compute(predictions=predictions, references=labels),
        "recall": recall.compute(predictions=predictions, references=labels),
        "f1": f1.compute(predictions=predictions, references=labels)
    }

In [57]:
# Untrained model predictions/performance

text_list = ["It was good.", "Not a fan, don't recommend.",
             "Better than the first one.", "This is not worth watching even once.",
             "This one is a pass."]

print("Untrained model predictions:")
print("---------------------------")

# Get the device of the model
device = next(model.parameters()).device

for text in text_list:
    # tokenize the text and move to the same device as the model
    inputs = tokenizer.encode(text, return_tensors='pt').to(device)
    # compute logits
    logits =  model(inputs).logits
    # convert logits to labels
    predictions = torch.argmax(logits)

    print(text + " - " + id2label[predictions.tolist()])

Untrained model predictions:
---------------------------
It was good. - Positive
Not a fan, don't recommend. - Positive
Better than the first one. - Positive
This is not worth watching even once. - Positive
This one is a pass. - Positive


In [32]:
peft_config = LoraConfig(task_type='SEQ_CLS', # sequence classification
                         r = 4, # intrinsic rank of trainable weight matrix
                         lora_alpha=32,  # this is like learning rate
                         lora_dropout=0.01, # probability of dropout
                         target_modules=['q_lin'])  # we apply lora to query layer

In [33]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 628,994 || all params: 67,584,004 || trainable%: 0.9307


In [34]:
# hyperparameters
lr = 1e-4
batch_size = 8
num_epochs = 10

# define training arguments
training_args = TrainingArguments(output_dir=model_checkpoint + '-lora-text-classification',
                learning_rate=lr,
                per_device_train_batch_size=batch_size,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=num_epochs,
                weight_decay=0.01,
                eval_strategy='epoch',
                save_strategy='epoch',
                load_best_model_at_end=True,
                report_to="none")

In [35]:
# create trainer object
trainer = Trainer(model=model, args=training_args, data_collator=data_collator,
                  train_dataset=tokenized_dataset['train'], eval_dataset=tokenized_dataset['validation'],
                  tokenizer = tokenizer, compute_metrics=compute_metrics)

trainer.train()

  trainer = Trainer(model=model, args=training_args, data_collator=data_collator,
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.412499,{'accuracy': 0.852},{'precision': 0.8154121863799283},{'recall': 0.91},{'f1': 0.8601134215500945}
2,No log,0.262335,{'accuracy': 0.89},{'precision': 0.922077922077922},{'recall': 0.852},{'f1': 0.8856548856548857}
3,No log,0.258106,{'accuracy': 0.894},{'precision': 0.9191489361702128},{'recall': 0.864},{'f1': 0.8907216494845361}
4,0.378400,0.257079,{'accuracy': 0.904},{'precision': 0.9190871369294605},{'recall': 0.886},{'f1': 0.9022403258655805}
5,0.378400,0.279992,{'accuracy': 0.9},{'precision': 0.9310344827586207},{'recall': 0.864},{'f1': 0.8962655601659751}
6,0.378400,0.268206,{'accuracy': 0.91},{'precision': 0.9235537190082644},{'recall': 0.894},{'f1': 0.9085365853658537}
7,0.378400,0.27788,{'accuracy': 0.91},{'precision': 0.9270833333333334},{'recall': 0.89},{'f1': 0.9081632653061225}
8,0.226700,0.279186,{'accuracy': 0.911},{'precision': 0.9168356997971603},{'recall': 0.904},{'f1': 0.9103726082578046}
9,0.226700,0.285114,{'accuracy': 0.912},{'precision': 0.9204081632653062},{'recall': 0.902},{'f1': 0.9111111111111111}
10,0.226700,0.286654,{'accuracy': 0.911},{'precision': 0.9237113402061856},{'recall': 0.896},{'f1': 0.9096446700507614}


TrainOutput(global_step=1250, training_loss=0.2812756469726562, metrics={'train_runtime': 481.5066, 'train_samples_per_second': 20.768, 'train_steps_per_second': 2.596, 'total_flos': 1253694805157184.0, 'train_loss': 0.2812756469726562, 'epoch': 10.0})

In [43]:
print("Trained model predictions:")
print("-------------------------")

device = next(model.parameters()).device

for text in text_list:
    # tokenize the text
    inputs = tokenizer.encode(text, return_tensors='pt').to(device)
    # compute logits
    logits =  model(inputs).logits
    # convert logits to labels
    predictions = torch.argmax(logits, 1)

    print(text + " - " + id2label[predictions.tolist()[0]])

Trained model predictions:
-------------------------
It was good. - Positive
Not a fan, don't recommend. - Negative
Better than the first one. - Positive
This is not worth watching even once. - Negative
This one is a pass. - Positive
