In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType

NUM_LABELS = 4

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

model = AutoModelForSequenceClassification.from_pretrained(
    "google-bert/bert-base-uncased",
    num_labels=NUM_LABELS,
)


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"],  # BERT attention
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
from datasets import load_dataset

dataset = load_dataset("sh0416/ag_news")

def preprocess(examples):
    return tokenizer(
        examples["title"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

dataset = dataset.map(preprocess, batched=True)
dataset = dataset.rename_column("label", "labels")
dataset = dataset.map(
    lambda x: {"labels": x["labels"] - 1}
)

dataset.set_format("torch")


Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

In [None]:
dataset['train']['labels'][:1000]

tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./bert_agnews",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    # eval="steps",
    logging_steps=50,
    save_steps=500,
    fp16=True,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
)

trainer.train()


  trainer = Trainer(


Step,Training Loss
50,1.4075
100,1.3807
150,1.3766
200,1.3642
250,1.3427
300,1.3096
350,1.2358
400,1.1149
450,0.9806
500,0.9177


TrainOutput(global_step=22500, training_loss=0.48333968319363063, metrics={'train_runtime': 1961.6853, 'train_samples_per_second': 183.516, 'train_steps_per_second': 11.47, 'total_flos': 2.39258824704e+16, 'train_loss': 0.48333968319363063, 'epoch': 3.0})

In [None]:
import torch

text = "Israel bombed Palestine"

inputs = tokenizer(text, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model(**inputs)

pred = outputs.logits.argmax(dim=-1).item()
print("Predicted label:", pred)


Predicted label: 0


In [None]:
from sklearn.metrics import classification_report
import torch
from torch.utils.data import DataLoader

test_dataset = dataset['test']  # or however your test split is called

test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Put model in eval mode
model.eval()

# Move inputs to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

all_preds = []
all_labels = []

for batch in test_dataloader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
    
    all_preds.extend(preds.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, digits=4))


              precision    recall  f1-score   support

           0     0.8984    0.8605    0.8790      1900
           1     0.8943    0.9311    0.9123      1900
           2     0.8184    0.8158    0.8171      1900
           3     0.8381    0.8416    0.8398      1900

    accuracy                         0.8622      7600
   macro avg     0.8623    0.8622    0.8621      7600
weighted avg     0.8623    0.8622    0.8621      7600



In [None]:
model.save_pretrained("news_classifier_model")
tokenizer.save_pretrained("news_classifier_model")


('news_classifier_model/tokenizer_config.json',
 'news_classifier_model/special_tokens_map.json',
 'news_classifier_model/vocab.txt',
 'news_classifier_model/added_tokens.json',
 'news_classifier_model/tokenizer.json')

In [22]:
import os
!zip -r news_classifier_model.zip news_classifier_model
os.listdir('/content')

updating: news_classifier_model/ (stored 0%)
updating: news_classifier_model/adapter_model.safetensors (deflated 7%)
updating: news_classifier_model/special_tokens_map.json (deflated 42%)
updating: news_classifier_model/adapter_config.json (deflated 57%)
updating: news_classifier_model/tokenizer_config.json (deflated 75%)
updating: news_classifier_model/README.md (deflated 66%)
updating: news_classifier_model/vocab.txt (deflated 53%)
updating: news_classifier_model/tokenizer.json (deflated 71%)


['.config',
 'huggingface_tokenizers_cache',
 'news_classifier_model',
 'news_classifier_model.zip',
 'bert_agnews',
 'unsloth_compiled_cache',
 'outputs',
 'sample_data']

In [None]:
!zip -r news_classifier_model.zip news_classifier_model

In [17]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model2 = AutoModelForSequenceClassification.from_pretrained("news_classifier_model",num_labels=4)
tokenizer2 = AutoTokenizer.from_pretrained("news_classifier_model")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
from sklearn.metrics import classification_report
import torch
from torch.utils.data import DataLoader

test_dataset = dataset['test']  # or however your test split is called

test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Put model in eval mode
model2.eval()

# Move inputs to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

all_preds = []
all_labels = []

for batch in test_dataloader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
    
    all_preds.extend(preds.cpu().numpy())
    all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds, digits=4))


              precision    recall  f1-score   support

           0     0.8984    0.8605    0.8790      1900
           1     0.8943    0.9311    0.9123      1900
           2     0.8184    0.8158    0.8171      1900
           3     0.8381    0.8416    0.8398      1900

    accuracy                         0.8622      7600
   macro avg     0.8623    0.8622    0.8621      7600
weighted avg     0.8623    0.8622    0.8621      7600



In [13]:
import os
os.listdir('/')

['sys',
 'media',
 'libx32',
 'lib32',
 'boot',
 'bin',
 'tmp',
 'home',
 'opt',
 'srv',
 'root',
 'lib',
 'var',
 'usr',
 'run',
 'proc',
 'mnt',
 'sbin',
 'lib64',
 'dev',
 'etc',
 'content',
 'kaggle',
 '.dockerenv',
 'tools',
 'datalab',
 'python-apt',
 'python-apt.tar.xz',
 'NGC-DL-CONTAINER-LICENSE',
 'cuda-keyring_1.1-1_all.deb']