# Tutorial 5: Searching for Optimal Transformer Architectures for Sequence Classification (NAS)

In [None]:
NUM_TRIALS = 10
TRIAL_CONCURRENCY = 3
EPOCHS_PER_TRIAL = 5
NUM_LATENCY_EVALUATION_ITERATIONS = 10

checkpoint = "bert-base-uncased"
tokenizer_checkpoint = "bert-base-uncased"
dataset_name = "imdb"

In [None]:
from transformers.models.bert.configuration_bert import BertConfig
from chop.actions.search.search_space import NasBertSpace

cf = BertConfig.from_pretrained(checkpoint)
cf._attn_implementation = "eager"

# Full model parameters
cf.num_hidden_layers = 3
cf.space_hidden_size = [128, 256, 512, 768, 1024]

# Per layer
cf.space_self_attention_implementation = ["attention", "linear", "feedthrough"]
cf.space_self_attention_layer_norm = ["layer_norm", "identity"]
cf.space_output_layer_norm = ["layer_norm", "identity"]
cf.space_intermediate_size = [192, 384, 768, 1536, 3072]
cf.space_num_attention_heads = [2, 4, 8, 16]

space = NasBertSpace(cf)

In [None]:
import nni
from chop.tools import get_tokenized_dataset, get_trainer

dataset, tokenizer = get_tokenized_dataset(
    dataset=dataset_name,
    checkpoint=tokenizer_checkpoint,
    return_tokenizer=True,
)


def fit(model):
    trainer = get_trainer(
        model=model,
        checkpoint=checkpoint,
        tokenized_dataset=dataset,
        tokenizer=tokenizer,
        evaluate_metric="accuracy",
    )

    # Train the model for 1 epoch
    trainer.train()

    # Evaluate accuracy
    eval_results = trainer.evaluate()

In [None]:
from nni.nas.evaluator import FunctionalEvaluator

evaluator = FunctionalEvaluator(fit)

In [None]:
import nni.nas.strategy as strategy

strat = strategy.TPE()

In [None]:
from nni.nas.experiment import NasExperimentConfig

experiment_config = NasExperimentConfig.default(space, evaluator, strat)
experiment_config.max_trial_number = NUM_TRIALS  # spawn 3 trials at most
experiment_config.trial_concurrency = TRIAL_CONCURRENCY  # will run 1 trial concurrently
experiment_config.trial_gpu_number = 1  # use 1 GPU for each trial
experiment_config.training_service.use_active_gpu = True

In [None]:
import socket


def find_free_port():
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("", 0))
        return s.getsockname()[1]


free_port = find_free_port()

print(free_port)

from nni.nas.experiment import NasExperiment

experiment = NasExperiment(space, evaluator, strat, config=experiment_config)
experiment.start(port=free_port)