In [16]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, get_scheduler
from torch. utils. data import DataLoader
from accelerate import Accelerator
from torch .optim import AdamW
from tqdm. auto import tqdm
import evaluate

def training_function():
    
    #Preprocessing data
    raw_datasets = load_dataset("glue","mrpc")
    checkpoint = "bert-base-uncased"
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)

    def tokenize_function(example):
        return tokenizer(example["sentence1"],example["sentence2"],truncation=True)
    tokenized_datasets = raw_datasets.map(tokenize_function,batched = True)
    data_collator = DataCollatorWithPadding(tokenizer = tokenizer)
    
    #Model Training 
    tokenized_datasets = tokenized_datasets.remove_columns(["sentence1","sentence2","idx"])
    tokenized_datasets = tokenized_datasets.rename_column("label","labels")
    tokenized_datasets.set_format("torch")
    tokenized_datasets["train"].column_names
    
    #Defining dataloader
    train_dataloader = DataLoader(
        tokenized_datasets["train"],
        shuffle=True,
        batch_size=8,
        collate_fn = data_collator
    )
    eval_dataloader = DataLoader(
        tokenized_datasets["validation"],
        batch_size=8,
        collate_fn = data_collator
    )

    #instantiating model with accelerator
    accelerator = Accelerator()
    model = AutoModelForSequenceClassification.from_pretrained(checkpoint,num_labels=2)
    optimizer = AdamW(model.parameters(),lr=3e-5)
    train_dl,eval_dl,model,optimizer = accelerator.prepare(
        train_dataloader,eval_dataloader,model,optimizer
    )
    num_epochs=3
    num_training_steps = num_epochs * len(train_dl)
    lr_scheduler = get_scheduler(
        "linear",
        optimizer = optimizer,
        num_warmup_steps =0,
        num_training_steps = num_training_steps
    )
    
    #progressbar
    progress_bar = tqdm(range(num_training_steps))
    
    model.train()
    for epoch in range(num_epochs):
        for batch in train_dl:
            outputs = model(**batch)
            loss= outputs.loss
            accelerator.backward(loss)

            optimizer.step()
            lr_scheduler.step()
            optimizer.zero_grad()
            progress_bar.update(1)
    #Model evaluation
    metric = evaluate.load("glue", "mrpc")
    model.eval()


    for batch in eval_dl:
        with torch.no_grad():
            outputs = model(**batch)

            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)

            predictions, references = accelerator.gather((predictions, batch["labels"]))

            metric.add_batch(
            predictions=predictions,
            references=references)
    eval_metric = metric.compute()

    accelerator.print(f"Accuracy: {eval_metric['accuracy']:.4f}, "f"F1: {eval_metric['f1']:.4f}")

    
   

    
    

In [17]:
from accelerate import notebook_launcher
notebook_launcher(training_function,num_processes= 1)

Launching training on one GPU.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/1377 [00:00<?, ?it/s]

Accuracy: 0.8603, F1: 0.8998


In [3]:
!pip install evaluate


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6
