In [40]:
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    BertTokenizer,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
)
from peft import get_peft_config, PeftModel, PeftConfig, get_peft_model, LoraConfig, TaskType, LoraConfig
import evaluate
import torch
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import wandb
from datetime import datetime as dt

print("Setup complete!")

Setup complete!


In [42]:
# set up wandb api key
%env WANDB_API_KEY=495a5b4509ce4b1ae15055b8f810dc296d4fa6fa
# login to wandb
wandb.login()
# set wandb project
%env WANDB_PROJECT=lora-emotion-classification

env: WANDB_API_KEY=495a5b4509ce4b1ae15055b8f810dc296d4fa6fa
env: WANDB_PROJECT="lora-emotion-classification"


In [43]:
# constants
model_name = "bert-base-uncased"
batch_size = 2
epochs = 1
sample_size = 50

In [44]:
# Loading dataset and basic stats
emotion_dataset = load_dataset("dair-ai/emotion")
print(emotion_dataset)

label2id = {text: num for num, text in enumerate(emotion_dataset["train"].features["label"].names)}
id2label = {num: text for num, text in enumerate(emotion_dataset["train"].features["label"].names)}
print(label2id)
print(id2label)

No config specified, defaulting to: emotion/split
Found cached dataset emotion (/home/dejang/.cache/huggingface/datasets/dair-ai___emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)
100%|██████████| 3/3 [00:00<00:00, 383.63it/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})
{'sadness': 0, 'joy': 1, 'love': 2, 'anger': 3, 'fear': 4, 'surprise': 5}
{0: 'sadness', 1: 'joy', 2: 'love', 3: 'anger', 4: 'fear', 5: 'surprise'}





In [45]:
print(emotion_dataset["train"].features["label"])
print(emotion_dataset["train"].features["text"])
print(emotion_dataset["train"][0])

ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)
Value(dtype='string', id=None)
{'text': 'i didnt feel humiliated', 'label': 0}


In [46]:
# Decrease the size of the dataset for faster training (local on cpu)
emotion_dataset["train"] = emotion_dataset["train"].select(range(sample_size))
emotion_dataset["validation"] = emotion_dataset["validation"].select(range(sample_size))
emotion_dataset["test"] = emotion_dataset["test"].select(range(sample_size))
print(emotion_dataset)

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 50
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 50
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 50
    })
})


In [47]:
# load tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

# take one example from train dataset and tokenize it
example = emotion_dataset["train"][0]
print(example)
print(tokenizer(example["text"]))

# take batch of examples and tokenize them
batch = tokenizer(emotion_dataset["train"]["text"][:5], padding="max_length", truncation=True, return_tensors="pt")
print(batch)
for ids in batch["input_ids"]:
    print(len(ids))

{'text': 'i didnt feel humiliated', 'label': 0}
{'input_ids': [101, 1045, 2134, 2102, 2514, 26608, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}
{'input_ids': tensor([[  101,  1045,  2134,  ...,     0,     0,     0],
        [  101,  1045,  2064,  ...,     0,     0,     0],
        [  101, 10047,  9775,  ...,     0,     0,     0],
        [  101,  1045,  2572,  ...,     0,     0,     0],
        [  101,  1045,  2572,  ...,     0,     0,     0]]), 'token_type_ids': tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}
512
512
512
512
512


In [48]:
# method for tokenizing examples
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)

# tokenize dataset
tokenized_emotions = emotion_dataset.map(tokenize, batched=True, batch_size=batch_size)
tokenized_emotions = tokenized_emotions.remove_columns(["text"])
tokenized_emotions = tokenized_emotions.rename_column("label", "labels")
print(tokenized_emotions)

Loading cached processed dataset at /home/dejang/.cache/huggingface/datasets/dair-ai___emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-dac030563fd04f5d.arrow
Loading cached processed dataset at /home/dejang/.cache/huggingface/datasets/dair-ai___emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-4363ea96b2fceb2c.arrow
Loading cached processed dataset at /home/dejang/.cache/huggingface/datasets/dair-ai___emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd/cache-21126a1284f3544b.arrow


DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 50
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 50
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 50
    })
})


In [49]:
example = tokenized_emotions['train'][0]
print(tokenizer.decode(example["input_ids"]))
print(example["labels"])

[CLS] i didnt feel humiliated [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD

In [50]:
# create model
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=6)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [51]:
inputs = tokenizer(emotion_dataset["train"]["text"][0], padding="max_length", truncation=True, return_tensors="pt")
outputs = model(**inputs)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.1236,  0.2148, -0.2504, -0.2534,  0.1549, -0.0872]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [52]:
# initialize data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="max_length", return_tensors="pt")

In [53]:
# define metrics
metric_acc = evaluate.load("accuracy")
metric_precision = evaluate.load("precision", average="weighted")
metric_recall = evaluate.load("recall", average="weighted")
metric_f1 = evaluate.load("f1", average="weighted")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return {
        "accuracy": metric_acc.compute(predictions=predictions, references=labels),
        "precision": metric_precision.compute(predictions=predictions, references=labels, average="weighted", zero_division=0), 
        "recall": metric_recall.compute(predictions=predictions, references=labels),
        "f1": metric_f1.compute(predictions=predictions, references=labels),
    }

In [54]:

# define metrics
# def compute_metrics(eval_preds):
#     metrics = evaluate.combine([
#         evaluate.load("accuracy"),
#         evaluate.load("precision", average="weighted")
#     #     evaluate.load("recall", average="weighted"),
#     #     evaluate.load("f1", average="weighted")
#     ])
#     logits, labels = eval_preds
#     predictions = np.argmax(logits, axis=-1)
#     return metrics.compute(predictions=predictions, references=labels)

# def compute_metrics(eval_preds):
#     metric = evaluate.load("glue", "mrpc")
#     logits, labels = eval_preds
#     predictions = np.argmax(logits, axis=-1)
#     return metric.compute(predictions=predictions, references=labels)

# def compute_metrics(pred):
#     labels = pred.label_ids
#     predictions = pred.predictions.argmax(-1)
#     accuracy = accuracy_score(labels, predictions)
#     f1 = f1_score(labels, predictions, average="weighted")
#     return {"accuracy": accuracy, "f1": f1}


training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    weight_decay=0.01,
    # logging_strategy = "steps",
    # logging_steps = 10,
    evaluation_strategy = "epoch",
    report_to="wandb",  # enable logging to W&B
    run_name=dt.now().strftime("%Y-%m-%d_%H-%M-%S"),  # name of the W&B run (optional)
    save_strategy = "epoch",
    data_seed = 42
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_emotions["train"],
    eval_dataset=tokenized_emotions["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics = compute_metrics
)

In [55]:
trainer.train()




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                

[A[A                                         
 36%|███▌      | 9/25 [3:01:28<00:45,  2.84s/it]
[A
[A

{'eval_loss': 1.560585618019104, 'eval_accuracy': 0.32, 'eval_runtime': 30.0115, 'eval_samples_per_second': 1.666, 'eval_steps_per_second': 0.833, 'epoch': 1.0}


                                                
100%|██████████| 25/25 [02:23<00:00,  5.73s/it]]

{'train_runtime': 148.2449, 'train_samples_per_second': 0.337, 'train_steps_per_second': 0.169, 'train_loss': 1.6040301513671875, 'epoch': 1.0}





TrainOutput(global_step=25, training_loss=1.6040301513671875, metrics={'train_runtime': 148.2449, 'train_samples_per_second': 0.337, 'train_steps_per_second': 0.169, 'train_loss': 1.6040301513671875, 'epoch': 1.0})

In [39]:
wandb.finish()

0,1
eval/accuracy,▁
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁
train/global_step,▁▁
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁

0,1
eval/accuracy,0.3
eval/loss,1.62465
eval/runtime,29.7177
eval/samples_per_second,1.683
eval/steps_per_second,0.841
train/epoch,1.0
train/global_step,25.0
train/total_flos,13156025241600.0
train/train_loss,1.68367
train/train_runtime,145.932


In [14]:
import pandas as pd
pd.DataFrame(trainer.state.log_history)

Unnamed: 0,loss,learning_rate,epoch,step,eval_loss,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss
0,1.7407,1.2e-05,0.4,10,,,,,,,,,
1,1.7239,4e-06,0.8,20,,,,,,,,,
2,,,1.0,25,1.7153,28.8028,1.736,0.868,,,,,
3,,,1.0,25,,,,,129.7367,0.385,0.193,13156030000000.0,1.717139


In [15]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=16, lora_dropout=0.1, bias="all"
)
# Train lora model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 407052 || all params: 109786380 || trainable%: 0.3707673028293674


In [16]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_emotions["train"],
    eval_dataset=tokenized_emotions["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
)
trainer.train()

 36%|███▌      | 9/25 [00:25<00:45,  2.84s/it]

KeyboardInterrupt: 

In [None]:
! pwd

/content


In [None]:
!zip -r /content/results.zip /content/results

  adding: content/results/ (stored 0%)
  adding: content/results/runs/ (stored 0%)
  adding: content/results/runs/Aug26_18-58-31_aff4bb91f6d2/ (stored 0%)
  adding: content/results/runs/Aug26_18-58-31_aff4bb91f6d2/events.out.tfevents.1693076381.aff4bb91f6d2.880.1 (deflated 58%)
  adding: content/results/runs/Aug26_18-58-31_aff4bb91f6d2/events.out.tfevents.1693076320.aff4bb91f6d2.880.0 (deflated 58%)
  adding: content/results/checkpoint-13/ (stored 0%)
  adding: content/results/checkpoint-13/trainer_state.json (deflated 54%)
  adding: content/results/checkpoint-13/adapter_config.json (deflated 44%)
  adding: content/results/checkpoint-13/scheduler.pt (deflated 50%)
  adding: content/results/checkpoint-13/training_args.bin (deflated 48%)
  adding: content/results/checkpoint-13/special_tokens_map.json (deflated 42%)
  adding: content/results/checkpoint-13/vocab.txt (deflated 53%)
  adding: content/results/checkpoint-13/pytorch_model.bin (deflated 7%)
  adding: content/results/checkpoint-1

In [None]:
! ls -lh

total 782M
drwxr-xr-x 5 root root 4.0K Aug 26 18:59 results
-rw-r--r-- 1 root root 782M Aug 26 19:01 results.zip
drwxr-xr-x 1 root root 4.0K Aug 24 21:25 sample_data


In [None]:
from google.colab import files
files.download("/content/results.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/results/runs

<IPython.core.display.Javascript object>

In [None]:
! fuser -k 6006/tcp

6006/tcp:             2123
