In [12]:
'''
Credits
--------

"Fine-Tuning BERT for text classification with LoRA" by Karkar Nizar - Used to verify and compare PEFT configuration when previous datasets were causing issues (Available at: https://medium.com/@karkar.nizar/fine-tuning-bert-for-text-classification-with-lora-f12af7fa95e4)
compute_metrics() - taken from an earlier instructional lesson
Dataset "Ukrainian Formality Dataset (translated)" available at: https://huggingface.co/datasets/ukr-detect/ukr-formality-dataset-translated-gyafc

'''
import numpy as np

from datasets import load_dataset
from loguru import logger
from peft import (
	AutoPeftModelForSequenceClassification,
	get_peft_model,
	LoraConfig,
	TaskType,
)
from transformers import (
	AutoTokenizer,
	# DistilBertForSequenceClassification,
	BertForSequenceClassification,
	Trainer,
	TrainingArguments,
)
logger.info('packages imported')

[32m2024-06-11 12:37:26.404[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m27[0m - [1mpackages imported[0m


In [3]:

model_name = 'bert-base-cased'
# model_name = 'distilbert-base-uncased'

dataset_name = 'ukr-detect/ukr-formality-dataset-translated-gyafc'
# dataset_name = 'dair-ai/emotion'
# dataset_name = 'roman_urdu_hate_speech'
# dataset_name = 'ctoraman/gender-hate-speech'
# dataset_name = 'imdb'

tokeniser_key = 'text'
train_key = 'train'
test_key = 'test'
initial_save_name = './data/initial'
final_save_name = './data/final'
select_size = 1000
num_train_epochs = 10

logger.info('settings created')


[32m2024-06-09 11:43:58.134[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1msettings created[0m


In [4]:
dataset = load_dataset(dataset_name)

tokeniser = AutoTokenizer.from_pretrained(model_name)

def tokenise(examples):
	return tokeniser(
        examples[tokeniser_key],
        padding='max_length',
        truncation=True
    )

tokenised_ds = dataset.map(tokenise, batched=True)

sampled_ds = {}

for label in [train_key, test_key]:
      sampled_ds[label] = tokenised_ds[label].shuffle(seed=202405241534).select(range(select_size))
      sampled_ds[label] = sampled_ds[label].rename_column('labels', 'label')

# train_ds = train_ds.rename_column('Text', 'text')
# train_ds = train_ds.rename_column('Label', 'label')

# test_ds = test_ds.rename_column('Text', 'text')
# test_ds = test_ds.rename_column('Label', 'label')
logger.info('tokenised dataset loaded')

Map: 100%|██████████| 209124/209124 [01:49<00:00, 1906.08 examples/s]
Map: 100%|██████████| 10272/10272 [00:03<00:00, 3098.55 examples/s]
Map: 100%|██████████| 4853/4853 [00:01<00:00, 3240.62 examples/s]
[32m2024-06-09 11:46:40.142[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m25[0m - [1mtokenised dataset loaded[0m


In [5]:

### Loading and Evaluating a Foundation Model
## Loading the model
# Once you have selected a model, load it in your notebook.
base_model = BertForSequenceClassification.from_pretrained(
	model_name,
	num_labels=2,
	id2label={
		0: 'INFORMAL',
		1: 'FORMAL',
	},
	label2id={
		'INFORMAL': 0,
		'FORMAL': 1,
	}
)
logger.debug(base_model)
logger.info('base model instantiated')
# base_model = DistilBertForSequenceClassification.from_pretrained(
# 	model_name,
# 	num_labels=2,
# )

# TODO: Copied from earlier lesson, potentially upgrade to something original
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': (predictions == labels).mean()}

training_args = TrainingArguments(
	evaluation_strategy='epoch',
	load_best_model_at_end=True,
	num_train_epochs=num_train_epochs,
	output_dir='./data/project1/initial',
	save_strategy='epoch',
)

# NOTE: I was unsure if the intent was to instantiate a PEFT version of the base model and evaluate that, but instead instantiated a Trainer in order to evaluate the base without training.
trainer = Trainer(
	args=training_args,
	compute_metrics=compute_metrics,
	model=base_model,
	eval_dataset=sampled_ds[test_key],
	train_dataset=sampled_ds[train_key],
)
logger.info('initial trainer instantiated')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-06-09 11:46:50.548[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [34m[1mBertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_

In [6]:

## Evaluating the model
# Perform an initial evaluation of the model on your chosen sequence classification task. This step will require that you also load an appropriate tokenizer and dataset.
initial_evaluation = trainer.evaluate()
logger.info('initial trainer evaluation complete')
logger.info('The base model was evaluated with the following metrics:')
logger.info(initial_evaluation)


[32m2024-06-09 14:36:14.554[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1minitial trainer evaluation complete[0m
[32m2024-06-09 14:36:14.555[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mThe base model was evaluated with the following metrics:[0m
[32m2024-06-09 14:36:14.556[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1m{'eval_loss': 0.6910282373428345, 'eval_accuracy': 0.55, 'eval_runtime': 10143.1314, 'eval_samples_per_second': 0.099, 'eval_steps_per_second': 0.012}[0m


In [7]:

### Performing Parameter-Efficient Fine-Tuning
## Creating a PEFT config
# Create a PEFT config with appropriate hyperparameters for your chosen model.
# lora_config = LoraConfig()

lora_config = LoraConfig(
	lora_alpha=1,
	lora_dropout=0.1,
	r=1,
	task_type=TaskType.SEQ_CLS,
)

logger.info('peft config created')

## Creating a PEFT model
# Using the PEFT config and foundation model, create a PEFT model.
model = get_peft_model(base_model, lora_config)
model.print_trainable_parameters()
logger.info('lora model instantiated')

peft_training_args = TrainingArguments(
	evaluation_strategy='epoch',
	load_best_model_at_end=True,
	num_train_epochs=num_train_epochs,
	output_dir=initial_save_name,
	save_strategy='epoch',
)

peft_trainer = Trainer(
	args=peft_training_args,
	compute_metrics=compute_metrics,
	model=model,
	eval_dataset=sampled_ds[test_key],
	train_dataset=sampled_ds[train_key],
)
logger.info('peft trainer instantiated')

## Training the model
# Using the PEFT model and dataset, run a training loop with at least one epoch.
peft_trainer.train()
logger.info('peft trainer training complete')


[32m2024-06-09 22:43:31.014[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mpeft config created[0m
[32m2024-06-09 22:43:31.102[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mlora model instantiated[0m
[32m2024-06-09 22:43:31.147[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m36[0m - [1mpeft trainer instantiated[0m


trainable params: 38,402 || all params: 108,350,212 || trainable%: 0.0354


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.685821,0.551
2,No log,0.681085,0.608
3,No log,0.66913,0.617
4,0.687400,0.654443,0.623
5,0.687400,0.647185,0.621
6,0.687400,0.643139,0.619
7,0.687400,0.640377,0.619
8,0.642200,0.641857,0.619
9,0.642200,0.64295,0.617
10,0.642200,0.641928,0.618


[32m2024-06-11 11:48:47.184[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m41[0m - [1mpeft trainer training complete[0m


In [13]:

## Saving the trained model
# Depending on your training loop configuration, your PEFT model may have already been saved. If not, use save_pretrained to save your progress.
logger.info('beginning save...')
model.save_pretrained(final_save_name)
logger.info('save complete.')

### Performing Inference with a PEFT Model
## Loading the model
# Using the appropriate PEFT model class, load your trained model.
logger.info(f'loading saved model {final_save_name}...')
final_model = AutoPeftModelForSequenceClassification.from_pretrained(final_save_name)
logger.info('...model loaded')
logger.info(final_model)


[32m2024-06-11 12:37:33.724[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m3[0m - [1mbeginning save...[0m
[32m2024-06-11 12:37:34.118[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1msave complete.[0m
[32m2024-06-11 12:37:34.119[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mloading saved model temp...[0m
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[32m2024-06-11 12:37:36.280[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m12[0m - [1m...model loaded[0m
[32m2024-06-11 12:37:36.281[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mPeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): Be

In [14]:

## Evaluating the model
# Repeat the previous evaluation process, this time using the PEFT model. Compare the results to the results from the original foundation model.
final_trainer = Trainer(
	args=peft_training_args,
	compute_metrics=compute_metrics,
	model=final_model,
	eval_dataset=sampled_ds[test_key],
	train_dataset=sampled_ds[train_key],
)
logger.info('final trainer instantiated')

final_evaluation = final_trainer.evaluate()
logger.info('evaluation complete :)')

logger.info('The final model was evaluated with the following metrics:')
logger.info(final_evaluation)


[32m2024-06-11 12:37:41.256[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mfinal trainer instantiated[0m


[32m2024-06-11 13:12:20.553[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m13[0m - [1mevaluation complete :)[0m
[32m2024-06-11 13:12:20.555[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mThe final model was evaluated with the following metrics:[0m
[32m2024-06-11 13:12:20.556[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m16[0m - [1m{'eval_loss': 0.6403766870498657, 'eval_accuracy': 0.619, 'eval_runtime': 2079.2669, 'eval_samples_per_second': 0.481, 'eval_steps_per_second': 0.06}[0m
