In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import DataLoader, TensorDataset
from datasets import load_dataset
import torch
import torch.nn as nn

In [2]:
from datasets import load_dataset
dataset = load_dataset("imdb")
print(dataset)
imdb_test = dataset['test']
imdb_train = dataset['train']

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})


In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
checkpoint_path = "/path"
model.load_state_dict(torch.load(checkpoint_path))
model.classifier = nn.Linear(model.config.hidden_size, 2)

FileNotFoundError: [Errno 2] No such file or directory: 'path/to/your/checkpoint'

In [5]:
from peft import LoraConfig, TaskType

lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS, r=8, lora_alpha=16, lora_dropout=0.05
)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [6]:
from peft import get_peft_model
model = get_peft_model(model, lora_config)

In [9]:
def count_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return total_params, trainable_params

total_params, trainable_params = count_parameters(model)
trainable_percentage = trainable_params / total_params * 100

print(f"trainable params: {trainable_params} || all params: {total_params} || trainable%: {trainable_percentage:.10f}")

trainable params: 296450 || all params: 109780228 || trainable%: 0.2700395193


In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=1e-5)



In [7]:
imdb_train_encodings = tokenizer(list(imdb_train['text']), truncation=True, padding=True,max_length=512)
imdb_train_labels = torch.tensor(list(imdb_train['label']))
imdb_train_dataset = TensorDataset(torch.tensor(imdb_train_encodings['input_ids']),
                                  torch.tensor(imdb_train_encodings['attention_mask']),
                                  imdb_train_labels)
imdb_train_loader = DataLoader(imdb_train_dataset, batch_size=16, shuffle=True)

imdb_test_encodings = tokenizer(list(imdb_test['text']), truncation=True, padding=True,max_length=512)
imdb_test_labels = torch.tensor(list(imdb_test['label']))
imdb_test_dataset = TensorDataset(torch.tensor(imdb_test_encodings['input_ids']),
                                  torch.tensor(imdb_test_encodings['attention_mask']),
                                  imdb_test_labels)
imdb_test_loader = DataLoader(imdb_test_dataset, batch_size=16, shuffle=False)

train_losses = []
train_accuracies = []
test_accuracies = []

for epoch in range(10):
    model.train()
    train_loss = 0
    train_correct = 0
    for batch in imdb_train_loader:
        optimizer.zero_grad()
        input_ids, attention_mask, labels = tuple(t.to(device) for t in batch)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        train_loss += loss.item()
        train_correct += (logits.argmax(dim=1) == labels).float().sum().item()
        loss.backward()
        optimizer.step()
    train_loss /= len(imdb_train_loader)
    train_accuracy = train_correct / len(imdb_train)
    train_losses.append(train_loss)
    train_accuracies.append(train_accuracy)

    model.eval()
    with torch.no_grad():
        test_correct = 0
        for batch in imdb_test_loader:
            input_ids, attention_mask, labels = tuple(t.to(device) for t in batch)
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            test_correct += (logits.argmax(dim=1) == labels).float().sum().item()
        test_accuracy = test_correct / len(imdb_test)
        test_accuracies.append(test_accuracy)

    print(f"Epoch {epoch + 1}: train_loss={train_loss:.4f}, train_accuracy={train_accuracy:.4f}, test_accuracy={test_accuracy:.4f}")

KeyboardInterrupt: 

In [None]:

import matplotlib.pyplot as plt

plt.plot(train_losses, label='train loss')
plt.plot(train_accuracies, label='train accuracy')
plt.plot(test_accuracies, label='test accuracy')
plt.legend()
plt.show()