In [None]:
# default_exp utils

In [None]:
#hide
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# utils

> Various utility functions used by the blurr package.

In [None]:
#export
import sys, inspect
from enum import Enum

import torch
from transformers import *
from fastai.text.all import *

logging.set_verbosity_error()

In [None]:
#hide
import pdb

from nbdev.showdoc import *
from fastcore.test import *

from fastai import __version__ as fa_version
from torch import __version__ as pt_version
from transformers import __version__ as hft_version

print(f'Using pytorch {pt_version}')
print(f'Using fastai {fa_version}')
print(f'Using transformers {hft_version}')

Using pytorch 1.7.1
Using fastai 2.1.8
Using transformers 4.0.1


In [None]:
# #cuda
# torch.cuda.set_device(1)
# print(f'Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}')

In [None]:
#export
def str_to_class(classname):
    "converts string representation to class"
    return getattr(sys.modules[__name__], classname)

In [None]:
#export
class Singleton:
    def __init__(self,cls):
        self._cls, self._instance = cls, None

    def __call__(self, *args, **kwargs):
        if self._instance == None: self._instance = self._cls(*args, **kwargs)
        return self._instance

`Singleton` functions as python decorator.  Use this above any class to turn that class into a singleton (see [here](https://python-3-patterns-idioms-test.readthedocs.io/en/latest/Singleton.html) for more info on the singleton pattern).

In [None]:
@Singleton
class TestSingleton: pass

a = TestSingleton()
b = TestSingleton()
test_eq(a,b)

## ModelHelper

In [None]:
#export
@Singleton
class ModelHelper():
    
    def __init__(self):
        # get hf classes (tokenizers, configs, models, etc...)
        transformer_classes = inspect.getmembers(sys.modules[__name__], 
                                                 lambda member: inspect.isclass(member)
                                                 and member.__module__.startswith('transformers.'))
        
        # build a df that we can query against to get various transformers objects/info
        self._df = pd.DataFrame(transformer_classes, columns=['class_name', 'class_location'])
        
        # add the module each class is included in
        self._df['module'] = self._df.class_location.apply(lambda v: v.__module__)
        
        # remove class_location (don't need it anymore)
        self._df.drop(labels=['class_location'], axis=1, inplace=True)
        
        # break up the module into separate cols
        module_parts_df = self._df.module.str.split(".", n = -1, expand = True) 
        for i in range(len(module_parts_df.columns)):
            self._df[f'module_part_{i}'] = module_parts_df[i]

        # using module part 3, break up the functional area and arch into separate cols
        module_part_3_df = self._df.module_part_3.str.split("_", n = 1, expand = True) 
        self._df[['functional_area', 'arch']] = module_part_3_df
        
        # if functional area = modeling, pull out the task it is built for
        model_type_df = self._df[(self._df.functional_area == 'modeling')].class_name.str.split('For', n=1, expand=True)
        
        model_type_df[1] = np.where(model_type_df[1].notnull(), 
                                    'For' + model_type_df[1].astype(str), 
                                    model_type_df[1])
        
        self._df['model_task'] = model_type_df[1]
        self._df['model_task'] = self._df['model_task'].str.replace('For', '', n=1, case=True, regex=False)
        
        model_type_df = self._df[(self._df.functional_area == 'modeling')].class_name.str.split('With', n=1, expand=True)
        model_type_df[1] = np.where(model_type_df[1].notnull(), 
                                    'With' + model_type_df[1].astype(str), 
                                    self._df[(self._df.functional_area == 'modeling')].model_task)
        
        self._df['model_task'] = model_type_df[1]
        self._df['model_task'] = self._df['model_task'].str.replace('With', '', n=1, case=True, regex=False)
        
        # look at what we're going to remove (use to verify we're just getting rid of stuff we want too)
        # df[~df['hf_class_type'].isin(['modeling', 'configuration', 'tokenization'])]
        
        # only need these 3 functional areas for our querying purposes
        self._df = self._df[self._df['functional_area'].isin(['modeling', 'configuration', 'tokenization'])]
        
    def get_architectures(self): 
        """Used to get all the architectures supported by your `Transformers` install"""
        return sorted(self._df[(self._df.arch.notna()) & 
                        (self._df.arch != None) & 
                        (self._df.arch != 'utils')].arch.unique().tolist())
    
    def get_config(self, arch): 
        """Used the locate the name of the configuration class for a given architecture"""
        config = self._df[(self._df.functional_area == 'configuration') & 
                          (self._df.arch == arch)].class_name.values[0]
        
        return str_to_class(config)
    
    def get_tokenizers(self, arch): 
        """Used to get the available huggingface tokenizers for a given architecture. Note: There may be 
        multiple tokenizers and so this returns a list.
        """
        toks = sorted(self._df[(self._df.functional_area == 'tokenization') & 
                               (self._df.arch == arch)].class_name.values)
        
        return [str_to_class(tok_name) for tok_name in toks]
    
    def get_tasks(self, arch=None): 
        """Get the type of tasks for which there is a custom model for (*optional: by architecture*). 
        There are a number of customized models built for specific tasks like token classification, 
        question/answering, LM, etc....
        """
        query = ['model_task.notna()']
        if (arch): query.append(f'arch == "{arch}"')

        return sorted(self._df.query(' & '.join(query), engine='python').model_task.unique().tolist())
    
    def get_models(self, arch=None, task=None):
        """The transformer models available for use (optional: by architecture | task)"""
        query = ['functional_area == "modeling"']
        if (arch): query.append(f'arch == "{arch}"')
        if (task): query.append(f'model_task == "{task}"')

        models = sorted(self._df.query(' & '.join(query)).class_name.tolist())
        return [str_to_class(model_name) for model_name in models] 
    
    def get_classes_for_model(self, model_name_or_cls):
        """Get tokenizers, config, and model for a given model name / class"""
        model_name = model_name_or_cls if isinstance(model_name_or_cls, str) else model_name_or_cls.__name__

        meta = self._df[self._df.class_name == model_name]
        tokenizers = self.get_tokenizers(meta.arch.values[0])
        config = self.get_config(meta.arch.values[0])

        return (config, tokenizers, str_to_class(model_name))
    
    def get_model_architecture(self, model_name_or_enum):
        """Get the architecture for a given model name / enum"""
        model_name = model_name_or_enum if isinstance(model_name_or_enum, str) else model_name_or_enum.name
        return self._df[self._df.class_name == model_name].arch.values[0]
    
    def get_hf_objects(self, pretrained_model_name_or_path, task=None,
                       config=None, tokenizer_cls=None, model_cls=None, 
                       config_kwargs={}, tokenizer_kwargs={}, model_kwargs={}, cache_dir=None):
        """Returns the architecture (str), config (obj), tokenizer (obj), and model (obj) given at minimum a
        `pre-trained model name or path`. Specify a `task` to ensure the right "AutoModelFor<task>" is used to
        create the model.
        
        Optionally, you can pass a config (obj), tokenizer (class), and/or model (class) (along with any 
        related kwargs for each) to get as specific as you want w/r/t what huggingface objects are returned.
        """
        
        # config
        if (config is None):
            config = AutoConfig.from_pretrained(pretrained_model_name_or_path, cache_dir=cache_dir, **config_kwargs)
            
        # tokenizer
        if (tokenizer_cls is None):
            tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, 
                                                      cache_dir=cache_dir, 
                                                      **tokenizer_kwargs)
        else:
            tokenizer = tokenizer_cls.from_pretrained(pretrained_model_name_or_path, 
                                                      cache_dir=cache_dir, 
                                                      **tokenizer_kwargs)
            
        # model
        if (model_cls is None and task is None):
            model = AutoModel.from_pretrained(pretrained_model_name_or_path, 
                                              config=config, 
                                              cache_dir=cache_dir, 
                                              **model_kwargs)
        else:
            if (model_cls is None and task is not None): 
                model_cls = self.get_models(arch="auto", task=task.name)[0]
            
            model = model_cls.from_pretrained(pretrained_model_name_or_path, 
                                              config=config, 
                                              cache_dir=cache_dir, 
                                              **model_kwargs)
            
        #arch
        arch = self.get_model_architecture(type(model).__name__)
        
        return (arch, config, tokenizer, model)

`ModelHelper` is a `Singleton` (there exists only one instance, and the same instance is returned upon subsequent instantiation requests).  You can get at via the `BLURR_MODEL_HELPER` constant below.

In [None]:
mh = ModelHelper()
# mh2 = ModelHelper()
# test_eq(mh, mh2)

In [None]:
#hide
display_df(mh._df.head())

print(list(mh._df.model_task.unique()))
print('')
print(list(mh._df.functional_area.unique()))
print('')
print(list(mh._df.module_part_2.unique()))
print('')
print(list(mh._df.module_part_3.unique()))

Unnamed: 0,class_name,module,module_part_0,module_part_1,module_part_2,module_part_3,functional_area,arch,model_task
3,AdaptiveEmbedding,transformers.models.transfo_xl.modeling_transfo_xl,transformers,models,transfo_xl,modeling_transfo_xl,modeling,transfo_xl,
4,AlbertConfig,transformers.models.albert.configuration_albert,transformers,models,albert,configuration_albert,configuration,albert,
5,AlbertForMaskedLM,transformers.models.albert.modeling_albert,transformers,models,albert,modeling_albert,modeling,albert,MaskedLM
6,AlbertForMultipleChoice,transformers.models.albert.modeling_albert,transformers,models,albert,modeling_albert,modeling,albert,MultipleChoice
7,AlbertForPreTraining,transformers.models.albert.modeling_albert,transformers,models,albert,modeling_albert,modeling,albert,PreTraining


[None, nan, 'MaskedLM', 'MultipleChoice', 'PreTraining', 'QuestionAnswering', 'SequenceClassification', 'TokenClassification', 'CausalLM', 'NextSentencePrediction', 'Seq2SeqLM', 'LMHead', 'ConditionalGeneration', 'QuestionAnsweringSimple', 'LMHeadModel', 'Classification', 'Generation']

['modeling', 'configuration', 'tokenization']

['transfo_xl', 'albert', 'auto', 'bart', 'bert', 'bert_generation', 'bert_japanese', 'bertweet', 'blenderbot', 'ctrl', 'camembert', 'dpr', 'deberta', 'distilbert', 'electra', 'encoder_decoder', 'fsmt', 'flaubert', 'funnel', 'gpt2', 'herbert', 'layoutlm', 'longformer', 'lxmert', 'mbart', 'mmbt', 'mt5', 'marian', 'mobilebert', 'openai', 'pegasus', 'phobert', 'prophetnet', 'rag', 'reformer', 'retribert', 'roberta', 'squeezebert', 't5', 'xlm', 'xlm_roberta', 'xlnet', 'xlm_prophetnet']

['modeling_transfo_xl', 'configuration_albert', 'modeling_albert', 'tokenization_albert', 'tokenization_albert_fast', 'configuration_auto', 'modeling_auto', 'tokenization_auto', 

### Provide global helper constant

Users of this library can simply use `BLURR_MODEL_HELPER` to access all the `ModelHelper` capabilities without having to fetch an instance themselves.

In [None]:
#export
BLURR_MODEL_HELPER = ModelHelper()

Here's how you can get at the core huggingface objects you need to work with ...

... the ***task***

In [None]:
show_doc(ModelHelper(ModelHelper).get_tasks)

<h4 id="ModelHelper.get_tasks" class="doc_header"><code>ModelHelper.get_tasks</code><a href="__main__.py#L75" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_tasks</code>(**`arch`**=*`None`*)

Get the type of tasks for which there is a custom model for (*optional: by architecture*). 
There are a number of customized models built for specific tasks like token classification, 
question/answering, LM, etc....

In [None]:
print(BLURR_MODEL_HELPER.get_tasks())
print('')
print(BLURR_MODEL_HELPER.get_tasks('bart'))

['CausalLM', 'Classification', 'ConditionalGeneration', 'Generation', 'LMHead', 'LMHeadModel', 'MaskedLM', 'MultipleChoice', 'NextSentencePrediction', 'PreTraining', 'QuestionAnswering', 'QuestionAnsweringSimple', 'Seq2SeqLM', 'SequenceClassification', 'TokenClassification']

['ConditionalGeneration', 'QuestionAnswering', 'SequenceClassification']


... the ***architecture***

In [None]:
show_doc(ModelHelper(ModelHelper).get_architectures)

<h4 id="ModelHelper.get_architectures" class="doc_header"><code>ModelHelper.get_architectures</code><a href="__main__.py#L53" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_architectures</code>()

Used to get all the architectures supported by your `Transformers` install

In [None]:
print(BLURR_MODEL_HELPER.get_architectures())

['albert', 'albert_fast', 'auto', 'bart', 'bart_fast', 'bert', 'bert_fast', 'bert_generation', 'bert_japanese', 'bertweet', 'blenderbot', 'camembert', 'camembert_fast', 'ctrl', 'deberta', 'distilbert', 'distilbert_fast', 'dpr', 'dpr_fast', 'electra', 'electra_fast', 'encoder_decoder', 'flaubert', 'fsmt', 'funnel', 'funnel_fast', 'gpt2', 'gpt2_fast', 'herbert', 'herbert_fast', 'layoutlm', 'layoutlm_fast', 'longformer', 'longformer_fast', 'lxmert', 'lxmert_fast', 'marian', 'mbart', 'mbart_fast', 'mmbt', 'mobilebert', 'mobilebert_fast', 'mt5', 'openai', 'openai_fast', 'pegasus', 'pegasus_fast', 'phobert', 'prophetnet', 'rag', 'reformer', 'reformer_fast', 'retribert', 'retribert_fast', 'roberta', 'roberta_fast', 'squeezebert', 'squeezebert_fast', 't5', 't5_fast', 'tf_albert', 'tf_auto', 'tf_bart', 'tf_bert', 'tf_blenderbot', 'tf_camembert', 'tf_ctrl', 'tf_distilbert', 'tf_dpr', 'tf_electra', 'tf_flaubert', 'tf_funnel', 'tf_gpt2', 'tf_longformer', 'tf_lxmert', 'tf_marian', 'tf_mbart', 'tf_m

In [None]:
show_doc(ModelHelper(ModelHelper).get_model_architecture)

<h4 id="ModelHelper.get_model_architecture" class="doc_header"><code>ModelHelper.get_model_architecture</code><a href="__main__.py#L104" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_model_architecture</code>(**`model_name_or_enum`**)

Get the architecture for a given model name / enum

In [None]:
print(BLURR_MODEL_HELPER.get_model_architecture('RobertaForSequenceClassification'))

roberta


... the ***config*** for that particular task and architecture

In [None]:
show_doc(ModelHelper(ModelHelper).get_config)

<h4 id="ModelHelper.get_config" class="doc_header"><code>ModelHelper.get_config</code><a href="__main__.py#L59" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_config</code>(**`arch`**)

Used the locate the name of the configuration class for a given architecture

In [None]:
print(BLURR_MODEL_HELPER.get_config('bert'))

<class 'transformers.models.bert.configuration_bert.BertConfig'>


... the available ***tokenizers*** for that architecture

In [None]:
show_doc(ModelHelper(ModelHelper).get_tokenizers)

<h4 id="ModelHelper.get_tokenizers" class="doc_header"><code>ModelHelper.get_tokenizers</code><a href="__main__.py#L66" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_tokenizers</code>(**`arch`**)

Used to get the available huggingface tokenizers for a given architecture. Note: There may be 
multiple tokenizers and so this returns a list.

In [None]:
print(BLURR_MODEL_HELPER.get_tokenizers('electra'))

[<class 'transformers.models.electra.tokenization_electra.ElectraTokenizer'>]


... and lastly the ***models*** (optionally for a given task and/or architecture)

In [None]:
show_doc(ModelHelper(ModelHelper).get_models)

<h4 id="ModelHelper.get_models" class="doc_header"><code>ModelHelper.get_models</code><a href="__main__.py#L85" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_models</code>(**`arch`**=*`None`*, **`task`**=*`None`*)

The transformer models available for use (optional: by architecture | task)

In [None]:
print(L(BLURR_MODEL_HELPER.get_models())[:5])

[<class 'transformers.models.transfo_xl.modeling_transfo_xl.AdaptiveEmbedding'>, <class 'transformers.models.albert.modeling_albert.AlbertForMaskedLM'>, <class 'transformers.models.albert.modeling_albert.AlbertForMultipleChoice'>, <class 'transformers.models.albert.modeling_albert.AlbertForPreTraining'>, <class 'transformers.models.albert.modeling_albert.AlbertForQuestionAnswering'>]


In [None]:
print(BLURR_MODEL_HELPER.get_models(arch='bert')[:5])

[<class 'transformers.models.bert.modeling_bert.BertForMaskedLM'>, <class 'transformers.models.bert.modeling_bert.BertForMultipleChoice'>, <class 'transformers.models.bert.modeling_bert.BertForNextSentencePrediction'>, <class 'transformers.models.bert.modeling_bert.BertForPreTraining'>, <class 'transformers.models.bert.modeling_bert.BertForQuestionAnswering'>]


In [None]:
print(BLURR_MODEL_HELPER.get_models(task='TokenClassification')[:5])

[<class 'transformers.models.albert.modeling_albert.AlbertForTokenClassification'>, <class 'transformers.models.auto.modeling_auto.AutoModelForTokenClassification'>, <class 'transformers.models.bert.modeling_bert.BertForTokenClassification'>, <class 'transformers.models.camembert.modeling_camembert.CamembertForTokenClassification'>, <class 'transformers.models.distilbert.modeling_distilbert.DistilBertForTokenClassification'>]


In [None]:
print(BLURR_MODEL_HELPER.get_models(arch='bert', task='TokenClassification'))

[<class 'transformers.models.bert.modeling_bert.BertForTokenClassification'>]


Here we define some helpful enums to make it easier to get at the *architecture and task* you're looking for.

In [None]:
#export
HF_ARCHITECTURES = Enum('HF_ARCHITECTURES', BLURR_MODEL_HELPER.get_architectures())

In [None]:
print(L(HF_ARCHITECTURES)[:5])

[<HF_ARCHITECTURES.albert: 1>, <HF_ARCHITECTURES.albert_fast: 2>, <HF_ARCHITECTURES.auto: 3>, <HF_ARCHITECTURES.bart: 4>, <HF_ARCHITECTURES.bart_fast: 5>]


In [None]:
#export
HF_TASKS_ALL = Enum('HF_TASKS_ALL', BLURR_MODEL_HELPER.get_tasks())
HF_TASKS_AUTO = Enum('HF_TASKS_AUTO', BLURR_MODEL_HELPER.get_tasks('auto'))

In [None]:
print('--- all tasks ---')
print(L(HF_TASKS_ALL))
print('\n--- auto only ---')
print(L(HF_TASKS_AUTO))

--- all tasks ---
[<HF_TASKS_ALL.CausalLM: 1>, <HF_TASKS_ALL.Classification: 2>, <HF_TASKS_ALL.ConditionalGeneration: 3>, <HF_TASKS_ALL.Generation: 4>, <HF_TASKS_ALL.LMHead: 5>, <HF_TASKS_ALL.LMHeadModel: 6>, <HF_TASKS_ALL.MaskedLM: 7>, <HF_TASKS_ALL.MultipleChoice: 8>, <HF_TASKS_ALL.NextSentencePrediction: 9>, <HF_TASKS_ALL.PreTraining: 10>, <HF_TASKS_ALL.QuestionAnswering: 11>, <HF_TASKS_ALL.QuestionAnsweringSimple: 12>, <HF_TASKS_ALL.Seq2SeqLM: 13>, <HF_TASKS_ALL.SequenceClassification: 14>, <HF_TASKS_ALL.TokenClassification: 15>]

--- auto only ---
[<HF_TASKS_AUTO.CausalLM: 1>, <HF_TASKS_AUTO.LMHead: 2>, <HF_TASKS_AUTO.MaskedLM: 3>, <HF_TASKS_AUTO.MultipleChoice: 4>, <HF_TASKS_AUTO.NextSentencePrediction: 5>, <HF_TASKS_AUTO.PreTraining: 6>, <HF_TASKS_AUTO.QuestionAnswering: 7>, <HF_TASKS_AUTO.Seq2SeqLM: 8>, <HF_TASKS_AUTO.SequenceClassification: 9>, <HF_TASKS_AUTO.TokenClassification: 10>]


In [None]:
HF_TASKS_ALL.Classification

<HF_TASKS_ALL.Classification: 2>

`BLURR_MODEL_HELPER.get_classes_for_model` can be used to get the config, tokenizer, and model *classes* you want

In [None]:
show_doc(ModelHelper(ModelHelper).get_classes_for_model)

<h4 id="ModelHelper.get_classes_for_model" class="doc_header"><code>ModelHelper.get_classes_for_model</code><a href="__main__.py#L94" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_classes_for_model</code>(**`model_name_or_cls`**)

Get tokenizers, config, and model for a given model name / class

In [None]:
config, tokenizers, model = BLURR_MODEL_HELPER.get_classes_for_model('RobertaForSequenceClassification')

print(config)
print(tokenizers[0])
print(model)

<class 'transformers.models.roberta.configuration_roberta.RobertaConfig'>
<class 'transformers.models.roberta.tokenization_roberta.RobertaTokenizer'>
<class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>


In [None]:
config, tokenizers, model = BLURR_MODEL_HELPER.get_classes_for_model(DistilBertModel)

print(config)
print(tokenizers[0])
print(model)

<class 'transformers.models.distilbert.configuration_distilbert.DistilBertConfig'>
<class 'transformers.models.distilbert.tokenization_distilbert.DistilBertTokenizer'>
<class 'transformers.models.distilbert.modeling_distilbert.DistilBertModel'>


### Methods for loading pre-trained (configs, tokenizer, model) hugginface objects

In [None]:
show_doc(ModelHelper(ModelHelper).get_hf_objects)

<h4 id="ModelHelper.get_hf_objects" class="doc_header"><code>ModelHelper.get_hf_objects</code><a href="__main__.py#L109" class="source_link" style="float:right">[source]</a></h4>

> <code>ModelHelper.get_hf_objects</code>(**`pretrained_model_name_or_path`**, **`task`**=*`None`*, **`config`**=*`None`*, **`tokenizer_cls`**=*`None`*, **`model_cls`**=*`None`*, **`config_kwargs`**=*`{}`*, **`tokenizer_kwargs`**=*`{}`*, **`model_kwargs`**=*`{}`*, **`cache_dir`**=*`None`*)

Returns the architecture (str), config (obj), tokenizer (obj), and model (obj) given at minimum a
`pre-trained model name or path`. Specify a `task` to ensure the right "AutoModelFor<task>" is used to
create the model.

Optionally, you can pass a config (obj), tokenizer (class), and/or model (class) (along with any 
related kwargs for each) to get as specific as you want w/r/t what huggingface objects are returned.

In [None]:
arch, config, tokenizer, model = BLURR_MODEL_HELPER.get_hf_objects("bert-base-cased-finetuned-mrpc",
                                                                   task=HF_TASKS_AUTO.MaskedLM)

print(arch)
print(type(config))
print(type(tokenizer))
print(type(model))

bert
<class 'transformers.models.bert.configuration_bert.BertConfig'>
<class 'transformers.models.bert.tokenization_bert_fast.BertTokenizerFast'>
<class 'transformers.models.bert.modeling_bert.BertForMaskedLM'>


In [None]:
arch, tokenizer, config, model = BLURR_MODEL_HELPER.get_hf_objects("fmikaelian/flaubert-base-uncased-squad",
                                                                   task=HF_TASKS_AUTO.QuestionAnswering)

print(arch)
print(type(config))
print(type(tokenizer))
print(type(model))

flaubert
<class 'transformers.models.flaubert.tokenization_flaubert.FlaubertTokenizer'>
<class 'transformers.models.flaubert.configuration_flaubert.FlaubertConfig'>
<class 'transformers.models.flaubert.modeling_flaubert.FlaubertForQuestionAnsweringSimple'>


In [None]:
arch, tokenizer, config, model = BLURR_MODEL_HELPER.get_hf_objects("bert-base-cased-finetuned-mrpc",
                                                                   config=None,
                                                                   tokenizer_cls=BertTokenizer, 
                                                                   model_cls=BertForNextSentencePrediction)
print(arch)
print(type(config))
print(type(tokenizer))
print(type(model))

bert
<class 'transformers.models.bert.tokenization_bert.BertTokenizer'>
<class 'transformers.models.bert.configuration_bert.BertConfig'>
<class 'transformers.models.bert.modeling_bert.BertForNextSentencePrediction'>


## Cleanup

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_utils.ipynb.
Converted 01_data-core.ipynb.
Converted 01a_data-token-classification.ipynb.
Converted 01b_data-question-answering.ipynb.
Converted 01za_data-text2text-core.ipynb.
Converted 01zb_data-text2text-language-modeling.ipynb.
Converted 01zc_data-text2text-summarization.ipynb.
Converted 02_modeling-core.ipynb.
Converted 02a_modeling-token-classification.ipynb.
Converted 02b_modeling-question-answering.ipynb.
Converted 02za_modeling-text2text-core.ipynb.
Converted 02zb_modeling-text2text-language-modeling.ipynb.
Converted 02zc_modeling-text2text-summarization.ipynb.
Converted 99a_examples-multilabel.ipynb.
Converted index.ipynb.
