
# Parameter-Efficient Finetuning of a Language Model
This notebook demonstrates how to perform parameter-efficient finetuning on a dataset of programming problem descriptions using specified libraries and without `AutoTokenizer`.


In [1]:

# Install necessary libraries
!pip install transformers[torch]
!pip install datasets
!pip install seqeval


zsh:1: no matches found: transformers[torch]


In [None]:

# Import necessary libraries
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset



## Data Loading and Preprocessing
We'll start by loading the data and preparing it for model input.


In [None]:

# Load the data
data_path = '/mnt/data/output.tsv'
data = pd.read_csv(data_path, sep='\t')

# Display the first few rows of the dataset
data.head()



## Tokenization
Set up a tokenizer using `transformers` library.


In [None]:

# Initialize the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenization function
def tokenize_function(examples):
    return tokenizer(examples['Text'], padding="max_length", truncation=True)

# Apply tokenization
dataset = Dataset.from_pandas(data)
dataset = dataset.map(tokenize_function, batched=True)



## Model Selection and Finetuning Setup
Load a pre-trained model and set up for finetuning.


In [None]:

# Load the pre-trained model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)

# Freeze all layers except the classification head
for param in model.bert.parameters():
    param.requires_grad = False



## Training
Set up the training parameters and start training.


In [None]:

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
)

# Create a trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset  # for demonstration, using the same dataset as eval
)

# Train the model
trainer.train()



## Evaluation
Evaluate the model performance.


In [None]:

# Evaluate the model
trainer.evaluate()
