# HuggingFace Accelerate
Accelerate to biblioteka od HuggingFace, będąca prostym API pozwalającym na łatwe przyśpieszenie kodu dzięki m.in. mieszanej precyzji, przetwarzanie na wielu GPU/TPU

In [None]:
!pip3 install transformers[sentencepiece]==4.18.0
!pip3 install datasets==1.15.1
!pip3 install huggingface_hub>=0.1.0,<1.0.0
!pip install accelerate

Wykorzystamy wcześniejszy przykład z porównywaniem pytań na bazie qqp z glue, który rozbudujemy o wykorzystywanie Accelerate.
Na początku przeprowadzamy całą inicjalizację i tokenizację w taki sam sposób.

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding, TrainingArguments, AutoModelForSequenceClassification, Trainer, AdamW, get_scheduler
from torch.utils.data import DataLoader

def tokenize(sample):
  return tokenizer(sample['question1'], sample['question2'], truncation=True)

checkpoint = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

raw_dataset = load_dataset("glue", "qqp")

raw_dataset['train'] = raw_dataset['train'].shard(num_shards=100, index=0)
raw_dataset['test'] = raw_dataset['test'].shard(num_shards=100, index=0)
raw_dataset['validation'] = raw_dataset['validation'].shard(num_shards=100, index=0)

tokenized_datasets = raw_dataset.map(tokenize)
tokenized_datasets = tokenized_datasets.remove_columns(['question1', 'question2', 'idx'])
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch")



data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

train_dataloader = DataLoader(
    tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
)

optimizer = AdamW(model.parameters(), lr=5e-5)

num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

Teraz zaczynają się dopiero zmiany związane z Accelerate:
Usuwamy wszelkie *.to_device()* (ewentualnie *.cuda()*), jako że tym zajmie się Accelerate.\
W funkcji musimy stworzyć obiekt klasy Accelerator, a następnie przekazać do jego metody prepare() wszystkie obiekty związane z treningiem.
Dodatkowo, zamiast *loss.backward()*, używamy *accelerator.backward(loss)*

In [None]:
from tqdm.auto import tqdm
from accelerate import Accelerator

accelerator = Accelerator() # 
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
  model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
) #

progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss) # 

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)

Jednak z powodu specyfiki działania Accelerate, aby zadziałało ono w formie notatnika, całość musimy zamknąć w funkcji, którą następnie uruchomimy przekazując ją do *notebook_launcher*.

In [None]:
from tqdm.auto import tqdm
from accelerate import Accelerator
from tqdm.auto import tqdm
from accelerate import Accelerator
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding, TrainingArguments, AutoModelForSequenceClassification, Trainer, AdamW, get_scheduler
from torch.utils.data import DataLoader

def tokenize(sample):
  return tokenizer(sample['question1'], sample['question2'], truncation=True)
  
def train():
  checkpoint = 'bert-base-uncased'
  tokenizer = AutoTokenizer.from_pretrained(checkpoint)
  model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

  raw_dataset = load_dataset("glue", "qqp")

  raw_dataset['train'] = raw_dataset['train'].shard(num_shards=100, index=0)
  raw_dataset['test'] = raw_dataset['test'].shard(num_shards=100, index=0)
  raw_dataset['validation'] = raw_dataset['validation'].shard(num_shards=100, index=0)

  tokenized_datasets = raw_dataset.map(tokenize)
  tokenized_datasets = tokenized_datasets.remove_columns(['question1', 'question2', 'idx'])
  tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
  tokenized_datasets.set_format("torch")



  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  train_dataloader = DataLoader(
      tokenized_datasets["train"], shuffle=True, batch_size=8, collate_fn=data_collator
  )
  eval_dataloader = DataLoader(
      tokenized_datasets["validation"], batch_size=8, collate_fn=data_collator
  )

  optimizer = AdamW(model.parameters(), lr=5e-5)

  num_epochs = 3
  num_training_steps = num_epochs * len(train_dataloader)
  lr_scheduler = get_scheduler(
      "linear",
      optimizer=optimizer,
      num_warmup_steps=0,
      num_training_steps=num_training_steps,
  )
  
  accelerator = Accelerator()
  model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
    model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
  )

  progress_bar = tqdm(range(num_training_steps))

  model.train()
  for epoch in range(num_epochs):
      for batch in train_dataloader:
          outputs = model(**batch)
          loss = outputs.loss
          accelerator.backward(loss)

          optimizer.step()
          lr_scheduler.step()
          optimizer.zero_grad()
          progress_bar.update(1)

In [None]:
from accelerate import notebook_launcher

notebook_launcher(train)