Closed
Description
roberta_config = RobertaConfig(
hidden_size=HIDDEN_SIZE,
num_attention_heads=NUM_ATTENTION_HEADS,
num_hidden_layers=NUM_HIDDEN_LAYERS,
intermediate_size=INTERMEDIATE_SIZE,
hidden_dropout_prob=HIDDEN_DROPOUT_PROB,
max_position_embeddings=MAX_POSITION_EMBEDDINGS,
type_vocab_size=TYPE_VOCAB_SIZE,
initializer_range=INITIALIZER_RANGE,
num_labels=NUM_LABELS
)
# Initialize Model from Scratch
model = RobertaForSequenceClassification(roberta_config)
logger.info(model.config)
# Define Optimizer with Weight Decay
optimizer = torch.optim.AdamW(
[
{"params": [p for n, p in model.named_parameters() if "bias" not in n and "LayerNorm" not in n], "weight_decay": WEIGHT_DECAY},
{"params": [p for n, p in model.named_parameters() if "bias" in n or "LayerNorm" in n], "weight_decay": 0.0}
],
lr=LEARNING_RATE,
eps=ADAM_EPS
)
# Alter this as needed for class weights.
class_weights = torch.tensor([1.0, 12.0], dtype=torch.float)
loss_fn = torch.nn.CrossEntropyLoss(weight=class_weights)
# Prepare with Accelerator
model, optimizer, train_dl, eval_dl, loss_fn = accelerator.prepare(
model, optimizer, train_dl, eval_dl, loss_fn
)
# Learning Rate Scheduler
total_training_steps = math.ceil(len(train_dl) * EPOCHS / GRADIENT_ACCUMULATION_STEPS)
lr_scheduler = CosineAnnealingWarmRestarts(
optimizer,
T_0=1,
T_mult=2,
eta_min=1e-6, # Minimum learning rate
)
model.train()
completed_steps = 0
for epoch in range(EPOCHS):
for step, batch in enumerate(train_dl):
# Forward pass
outputs = model(**batch)
# loss = outputs.loss
loss = loss_fn(outputs.logits, batch['labels'])
# Normalize loss
loss = loss / GRADIENT_ACCUMULATION_STEPS
# Backward pass
accelerator.backward(loss)
if (step + 1) % GRADIENT_ACCUMULATION_STEPS == 0 or step == len(train_dl) - 1:
accelerator.clip_grad_norm_(model.parameters(), max_norm=MAX_NORM)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
completed_steps += 1
if completed_steps % LOG_STEPS == 0:
progress = f"Epoch: {epoch}, Step: {completed_steps}/{total_training_steps}, Loss: {loss.item() * GRADIENT_ACCUMULATION_STEPS}"
accelerator.print(progress)
I have some code like above, which mostly allows me to configure different parameters with custom loss functions (shown here with a CrossEntropyLoss) along with Adam and torch's LRScheduler. I was wondering if there was something similar to Trainer.hyperparameter_search
for custom components like this?
Of course, I understand its a bit tricky to identify with custom components, but just wanted to know if its even possible, or would I need to manually configure several runs to arrive at the ideal values instead?
Thanks!
Metadata
Metadata
Assignees
Labels
No labels