In [None]:
import os, random
import numpy as np
import torch
from transformers import set_seed as hf_set_seed

SEED = 42
# стабильные словари/хеши Python
os.environ["PYTHONHASHSEED"] = str(SEED)

# детерминизм в линейной алгебре (PyTorch>=1.8/2.x)
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"   # или ":4096:8" на некоторых GPU

# отключаем подбор «самого быстрого» алгоритма cuDNN (он недетерминирует)
torch.backends.cudnn.benchmark = False

# включаем строго детерминированные алгоритмы (может замедлить обучение)
try:
    torch.use_deterministic_algorithms(True)
except Exception:
    # для старых версий PyTorch
    torch.backends.cudnn.deterministic = True

# сидим всё подряд
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
hf_set_seed(SEED)  # сидит random/numpy/torch; есть флаг deterministic у новых версий


In [None]:
from deeppavlov import train_model, build_model 
from deeppavlov.core.commands.utils import parse_config

PROJECT_DIR = '..'
MODEL_NAME = 'model'

model_config = parse_config('ner_collection3_bert')

# dataset that the model was trained on
print(model_config['dataset_reader']['data_path'])

model_config['dataset_reader']['data_path'] = PROJECT_DIR + '/datasets/conll/'

del model_config['metadata']['download']

model_config['dataset_reader']['iobes'] = False
model_config['metadata']['variables']['MODEL_PATH'] = PROJECT_DIR + '/models/' + MODEL_NAME

model_config['chainer']['pipe'][0]['max_seq_length'] = 128
model_config['chainer']['pipe'][0]['max_subword_length'] = 20


model_config['chainer']['pipe'][1]['save_path'] = PROJECT_DIR + '/models/tag.dict'
model_config['chainer']['pipe'][1]['load_path'] = PROJECT_DIR + '/models/tag.dict'

model_config['chainer']['pipe'][2]['save_path'] = PROJECT_DIR + '/models/' + MODEL_NAME
model_config['chainer']['pipe'][2]['load_path'] = PROJECT_DIR + '/models/' + MODEL_NAME
model_config['chainer']['pipe'][2]['learning_rate_drop_patience'] = 25



model_config['dataset_iterator'] = {
  "class_name": "data_learning_iterator",
  "seed": SEED,
  "shuffle": True
}

model_config['train']['batch_size'] = 550

model_config['train']['log_every_n_batches'] = 10
model_config['train']['val_every_n_batches'] = 10


In [None]:
model_config

In [None]:

ner_model = train_model(model_config, download=True)