# Fine-tuning com MLflow

Este notebook demonstra como treinar um modelo simples enquanto registra parametros e métricas no MLflow.

In [None]:
!pip install -q transformers datasets mlflow

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import mlflow

dataset = load_dataset('imdb', split='train[:1%]')
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
tokenized = dataset.map(lambda x: tokenizer(x['text'], truncation=True, padding='max_length'), batched=True)

In [None]:
mlflow.set_tracking_uri('http://localhost:5000')
with mlflow.start_run():
    mlflow.log_param('model', 'bert-base-uncased')
    mlflow.log_param('dataset_size', len(tokenized))
    model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
    args = TrainingArguments('out', num_train_epochs=1, per_device_train_batch_size=8)
    trainer = Trainer(model=model, args=args, train_dataset=tokenized.select(range(32)), tokenizer=tokenizer)
    trainer.train()
    metrics = trainer.evaluate()
    mlflow.log_metrics({k: float(v) for k, v in metrics.items()})