In [1]:
%cd ../

D:\WorkFolder\nlp_thes_augm


In [2]:
from nlp_thes_augm.utils.wordnet import RuWordNet

wordnet = RuWordNet(r'D:\WorkFolder\data\models\RuWordNet')

In [303]:
import pymorphy2
from functools import lru_cache
import string 
import re

sep_reg = re.compile(r"[\w'\-\–]+|[$.,!?;“”\"\(\):»]")
morph_analyzer = pymorphy2.MorphAnalyzer()

@lru_cache(maxsize=1000000)
def get_morph(word):
    return morph_analyzer.parse(word)[0].normal_form

def lemmatize_word(word):
    if word.isdigit() or len(word) < 3:
        return word.lower()
    
    return get_morph(word).lower()

class Token:
    def __init__(self, word, token, num, shift, word_len):
        self.word = word
        self.token = token
        self.num = num
        self.shift = shift
        self.word_len = word_len
        

class ConceptDetector:
    def __init__(self, wordnet):
        self.wordnet = wordnet
        self.senses = {}
        self.issensecontinuation = set()
        self._transform_senses()

    def _transform_senses(self):
        for sense in self.wordnet.senses:
            splited_sense = sense.split('_')
            sense_len = len(splited_sense)
            for i in range(1, sense_len):
                self.issensecontinuation.add('_'.join(splited_sense[:i]))

            if sense_len not in self.senses:
                self.senses[sense_len] = set()

            self.senses[sense_len].add(sense)

    @staticmethod
    def _tokenize_text(text):
        text_splitted = re.findall(sep_reg, text)
        print(text_splitted)
        shift = 0
        result = []
        for i, w in enumerate(text_splitted):
            w_norm = lemmatize_word(w)
            if text[shift] == ' ':
                shift += 1
            w_shift = shift
            shift = shift + len(w)
            result.append(Token(w, w_norm, i, w_shift, len(w)))

        return result
    
    @staticmethod
    def _filter_tokens(tokens):
        print([t.token for t in tokens])
        return [token for token in tokens if token.token.isalpha() and not token.token.isupper()]

    def inject_tokens(self, tokens, text):
        shift = 0
        text_injected = ''
        for token_seq in tokens:
            first_token = token_seq[0]
            last_token = token_seq[-1]
            
            sense = '_'.join([t.token for t in token_seq])
            synsets = ','.join(self.wordnet.sense2synid[sense])
            info = f'{text[first_token.shift:last_token.shift + last_token.word_len]}|{sense}|{synsets}'
            
            text_injected += text[shift:first_token.shift] + '{{' + info + '}}'
            
            shift = last_token.shift + last_token.word_len
            
        text_injected += text[shift:]
        return text_injected
    
    @staticmethod
    def _sanity_check(tokens, text):
        for token in tokens:
            if token.word != text[token.shift:token.shift + token.word_len]:
                raise Exception(f'ERROR: {token.word} != {text[token.shift:token.shift + token.word_len]}')
                
    def detect(self, text):
        tokens = self._filter_tokens(self._tokenize_text(text))
        print([t.token for t in tokens])
        self._sanity_check(tokens, text)
        concepts_token_list = []

        start_w_i = 0
        while start_w_i < len(tokens):
            last_ok_tokens = [tokens[start_w_i]]
            for end_w_i in range(start_w_i + 1, len(tokens) + 1):
                if end_w_i - start_w_i > 1 and tokens[end_w_i-1].num - tokens[end_w_i-2].num != 1:
                    break
                word = '_'.join([t.token for t in tokens[start_w_i:end_w_i]])
                if word in self.senses[end_w_i - start_w_i]:
                    if end_w_i - start_w_i > 1:
                        last_ok_tokens = tokens[start_w_i:end_w_i]

                if word not in self.issensecontinuation:
                    break
            if '_'.join([t.token for t in last_ok_tokens]) in self.senses[len(last_ok_tokens)]:
                concepts_token_list.append(last_ok_tokens)
            start_w_i += len(last_ok_tokens)
        
        return self.inject_tokens(concepts_token_list, text), concepts_token_list, text

In [None]:
import fasttext
import fasttext.util
ft = fasttext.load_model('cc.ru.300.bin')

In [264]:


import numpy as np

class WordnetAugment:
    def __init__(self, wordnet):
        self.wordnet = wordnet
        self.sep_reg = sep_reg
        
    def augment(self, concepts_token_list, text, concept_count=4, only_deterministic=False):
        if only_deterministic:
            concepts_token_list = self._filter_only_deterministic(concepts_token_list)
            
        selected_concepts = self._select_concepts(concepts_token_list, concept_count)
        
        text_splitted = re.findall(self.sep_reg, text)
        
        text_augmented = ''
        shift = 0
        
        for concept_tokens in selected_concepts:
            synid = self._select_concept(concept_tokens, text_splitted, 10, 3)

            sense = self._select_sense(synid).replace('_', ' ')
            
            first_token = concept_tokens[0]
            last_token = concept_tokens[-1]
            
            #print(first_token.word, first_token.num)
            
            text_augmented += text[shift:first_token.shift] + f'{sense}'
            shift = last_token.shift + last_token.word_len
            
        text_augmented += text[shift:]
        return text_augmented
            
    def _select_concepts(self, concepts_token_list, concept_count):
        idx = np.random.choice(range(len(concepts_token_list)), min(concept_count, len(concepts_token_list)), replace=False).tolist()
        selected_concepts = np.array(concepts_token_list)[idx].tolist()
        return sorted(selected_concepts, key=lambda x: x[0].num)
    
    def _select_concept(self, concept_tokens, text_splitted, window, topk):
        sense = '_'.join(t.token for t in concept_tokens)
        synsets_ids = wordnet.sense2synid[sense]
        if len(synsets_ids) == 1:
            return synsets_ids[0]
        
        return np.random.choice(synsets_ids, 1).tolist()[0]
        #print(sense)
        
        token_left_num = concept_tokens[0].num
        token_right_num = concept_tokens[-1].num
        #span = ' '.join(text_splitted[max(0, token_left_num - window):min(len(text_splitted), token_right_num + window + 1)])
        left_context = text_splitted[max(0, token_left_num - window):token_left_num]
        right_context = text_splitted[token_right_num + 1:min(len(text_splitted), token_right_num + window + 1)]
        span = ' '.join(left_context + right_context)
        #print(span)
        span_vec = ft.get_sentence_vector(span)
        
        synset_list = []
        for synid in synsets_ids:
            synset = wordnet.synsets[synid]
            synset_words = [w.replace('_', ' ') for w in synset.synset_words]
            sim_list = sorted([self.sim(span_vec, ft.get_sentence_vector(w)) for w in synset_words])
            mean_of_top_words_vec = np.mean(sim_list[-topk:])
            synset_list.append((synid, synset.synset_name, mean_of_top_words_vec))
        synset_list = sorted(synset_list, key=lambda x: -x[2])
        #print(synset_list)
        return synset_list[0][0]
    
    def _select_sense(self, synset_id):
        senses = list(self.wordnet.synsets[synset_id].synset_words)
        idx = np.random.choice(range(len(senses)), 1).tolist()[0]
        
        return senses[idx]
    
    def _filter_only_deterministic(self, concepts_token_list):
        concepts_token_list_deterministic = []
        for concept_tokens in concepts_token_list:
            sense = '_'.join(t.token for t in concept_tokens)
            if len(wordnet.sense2synid[sense]) == 1:
                #print(sense)
                concepts_token_list_deterministic.append(concept_tokens)
        return concepts_token_list_deterministic
    
    @staticmethod
    def sim(v1, v2):
        return np.dot(v1, v2) / np.linalg.norm(v1) / np.linalg.norm(v2)
    


In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

MODEL_NAME = r'D:\WorkFolder\data\t5_augm\rut5_base_restorer_1m'
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model.cuda();
model.eval();

def calc_score(scores):
    scores = torch.cat(scores).detach().cpu()
    scores = Softmax(dim=1)(scores)
    return scores.max(axis=1).values.mean()

def generate(text):
    text = "thes_augm: " + text
    x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
    max_size = int(x.input_ids.shape[1] * 1.5 + 10)
    out = model.generate(**x, max_length=max_size, return_dict_in_generate=True, output_scores=True)
    return tokenizer.decode(out['sequences'][0], skip_special_tokens=True), float(calc_score(out['scores']))

In [160]:
sep_reg = re.compile(r"[\w'\-\–]+|[.,!?;“”]")
re.findall(sep_reg, 'Америки.хуе-мое а хуе–мое и – оно м - он')

['Америки', '.', 'хуе-мое', 'а', 'хуе–мое', 'и', '–', 'оно', 'м', '-', 'он']

In [167]:
wordnet.sense2synid['моль']

['9773-N', '114980-N', '150096-N']

In [170]:
morph_analyzer.parse('моли')

[Parse(word='моли', tag=OpencorporaTag('VERB,impf,tran sing,impr,excl'), normal_form='молить', score=0.25, methods_stack=((DictionaryAnalyzer(), 'моли', 634, 11),)),
 Parse(word='моли', tag=OpencorporaTag('NOUN,inan,masc plur,nomn'), normal_form='моль', score=0.125, methods_stack=((DictionaryAnalyzer(), 'моли', 92, 6),)),
 Parse(word='моли', tag=OpencorporaTag('NOUN,inan,masc plur,accs'), normal_form='моль', score=0.125, methods_stack=((DictionaryAnalyzer(), 'моли', 92, 9),)),
 Parse(word='моли', tag=OpencorporaTag('NOUN,anim,femn sing,gent'), normal_form='моль', score=0.125, methods_stack=((DictionaryAnalyzer(), 'моли', 397, 1),)),
 Parse(word='моли', tag=OpencorporaTag('NOUN,anim,femn sing,datv'), normal_form='моль', score=0.125, methods_stack=((DictionaryAnalyzer(), 'моли', 397, 2),)),
 Parse(word='моли', tag=OpencorporaTag('NOUN,anim,femn sing,loct'), normal_form='моль', score=0.125, methods_stack=((DictionaryAnalyzer(), 'моли', 397, 5),)),
 Parse(word='моли', tag=OpencorporaTag('N

In [173]:
concept_detector = ConceptDetector(wordnet)
injected_text, concepts_token_list, text = concept_detector.detect(text)
injected_text

['Недавно', 'другими', 'специалистами', 'был', 'обнаружен', 'новый', 'вид', 'моли', ',', 'которую', 'назвали', 'в', 'честь', 'Дональда', 'Трампа', 'за', 'то', ',', 'что', 'чешуйки', 'на', 'её', 'голове', 'несколько', 'напоминают', 'причёску', 'недавно', 'вступившего', 'в', 'должность', 'президента', 'Соединённых', 'штатов', 'Америки', '.']
['недавно', 'другой', 'специалист', 'быть', 'обнаружить', 'новый', 'вид', 'молить', ',', 'который', 'назвать', 'в', 'честь', 'дональд', 'трамп', 'за', 'то', ',', 'что', 'чешуйка', 'на', 'её', 'голова', 'несколько', 'напоминать', 'причёска', 'недавно', 'вступить', 'в', 'должность', 'президент', 'соединить', 'штат', 'америка', '.']
['недавно', 'другой', 'специалист', 'быть', 'обнаружить', 'новый', 'вид', 'молить', 'который', 'назвать', 'в', 'честь', 'дональд', 'трамп', 'за', 'то', 'что', 'чешуйка', 'на', 'её', 'голова', 'несколько', 'напоминать', 'причёска', 'недавно', 'вступить', 'в', 'должность', 'президент', 'соединить', 'штат', 'америка']


'Недавно другими {{специалистами|специалист|2550-N,6577-N}} {{был|быть|106569-N,150230-V,115786-V,149821-V,106922-V,106478-V,106947-V}} {{обнаружен|обнаружить|117371-V,151561-V}} {{новый|новый|113447-A,146685-A,120024-A}} {{вид|вид|112006-N,107545-N,120138-N,123619-N,127597-N}} {{моли|молить|107345-V}}, которую {{назвали|назвать|129138-V,116042-V,114458-V,117793-V}} {{в честь|в_честь|117804-N}} Дональда Трампа за то, что {{чешуйки|чешуйка|135655-N}} на её {{голове|голова|143622-N,107732-N,154104-N,115479-N}} несколько {{напоминают|напоминать|106807-V,120161-V}} причёску недавно {{вступившего в должность|вступить_в_должность|141643-V}} {{президента|президент|110438-N,8066-N}} {{Соединённых штатов Америки|соединить_штат_америка|104707-N}}.'

In [200]:
concepts_token_list

[[<__main__.Token at 0x1f98af93250>],
 [<__main__.Token at 0x1f98af93280>],
 [<__main__.Token at 0x1f98af93370>],
 [<__main__.Token at 0x1f98af936d0>],
 [<__main__.Token at 0x1f98af93700>],
 [<__main__.Token at 0x1f98af933a0>],
 [<__main__.Token at 0x1f98af93b80>],
 [<__main__.Token at 0x1f98af93910>, <__main__.Token at 0x1f98af93040>],
 [<__main__.Token at 0x1f98c5c5c10>],
 [<__main__.Token at 0x1f98c5c5c40>],
 [<__main__.Token at 0x1f98c5c5f10>],
 [<__main__.Token at 0x1f98c5c5040>,
  <__main__.Token at 0x1f98c5c5d30>,
  <__main__.Token at 0x1f98c5c5820>],
 [<__main__.Token at 0x1f98c5c5a30>],
 [<__main__.Token at 0x1f98c5c5f70>,
  <__main__.Token at 0x1f98c5c5460>,
  <__main__.Token at 0x1f98c5c5e50>]]

In [187]:
augmented_text = WordnetAugment(wordnet).augment(concepts_token_list, text, 1000)

специалист
Недавно другими был обнаружен новый вид моли , которую назвали в честь
[('2550-N', 'работник', 0.31633174), ('6577-N', 'эксперт', 0.21200687)]
специалистами 2
быть
Недавно другими специалистами обнаружен новый вид моли , которую назвали в честь Дональда
[('150230-V', 'пребывать в состоянии', 0.52913356), ('149821-V', 'представлять собой, являться', 0.43771145), ('106569-N', 'суть, существо, сущность, содержание', 0.36661097), ('115786-V', 'находиться, пребывать', 0.36013874), ('106478-V', 'произойти, случиться', 0.35277283), ('106947-V', 'существовать, быть, иметься', 0.34596488), ('106922-V', 'присутствие (пребывание)', 0.30161196)]
был 3
обнаружить
Недавно другими специалистами был новый вид моли , которую назвали в честь Дональда Трампа
[('151561-V', 'обнаружить свои чувства', 0.5026668), ('117371-V', 'заметить, обнаружить', 0.38578978)]
обнаружен 4
новый
Недавно другими специалистами был обнаружен вид моли , которую назвали в честь Дональда Трампа за
[('113447-A', 'новый

  selected_concepts = np.array(concepts_token_list)[idx].tolist()


In [None]:
1) выделение концептных сущностей в тексте
2) выбор концептов
3) разрешение многозначности
4) 

In [6]:
text = 'Участие в конкурсе приняли 4 кандидата, победу получила,бывший руководитель Президент, РФ горздрава Киева аварийное жилое помещение Алла Арешкович. Президент РФ.'
text = 'Они заменят подвижной состав на самой загруженной линии - Таганско-Краснопресненской.Название для нового типа поездов метро выбрали сами москвичи в ходе голосования в системе электронных референдумов “Активный гражданин”.'
text = 'Противоположный исход оценили онлайн букмекеры за 5,40 и на ничью можно ставить за 4,30.'
text = 'Ее выполнение позволит правительственным войскам САР установить контроль над сирийско-турецкой границей.'
injected_text, concepts_token_list, text = concept_detector.detect(text)
print(injected_text)

['Ее', 'выполнение', 'позволит', 'правительственным', 'войскам', 'САР', 'установить', 'контроль', 'над', 'сирийско-турецкой', 'границей.']
Ее {{выполнение|выполнение|106494-N,106479-N}} {{позволит|позволить|107175-V,107692-V}} {{правительственным|правительственный|2546-A}} {{войскам|войско|135192-N,772-N}} {{САР|сара|104695-N}} {{установить|установить|106699-V,106710-V,111944-V,125092-V}} {{контроль|контроль|3513-N}} над сирийско-турецкой границей.


In [188]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

gpt2 = GPT2LMHeadModel.from_pretrained('sberbank-ai/rugpt3small_based_on_gpt2')
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('sberbank-ai/rugpt3small_based_on_gpt2')

Downloading:   0%|          | 0.00/608 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/526M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.63M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.21M [00:00<?, ?B/s]

In [193]:
gpt2 =  gpt2.to(device)

In [196]:
import torch
from tqdm import tqdm

device = 'cuda'
def get_gpt2_ppl(text, gpt2, gpt2_tokenizer):
    max_length = gpt2.config.n_positions
    stride = 512
    encodings = gpt2_tokenizer(text, return_tensors='pt')
    nlls = []
    for i in range(0, encodings.input_ids.size(1), stride):
        begin_loc = max(i + stride - max_length, 0)
        end_loc = min(i + stride, encodings.input_ids.size(1))
        trg_len = end_loc - i    # may be different from stride on last loop
        input_ids = encodings.input_ids[:,begin_loc:end_loc].to(device)
        target_ids = input_ids.clone()
        target_ids[:,:-trg_len] = -100

        with torch.no_grad():
            outputs = gpt2(input_ids, labels=target_ids)
            neg_log_likelihood = outputs[0] * trg_len

        nlls.append(neg_log_likelihood)

    ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
    return float(ppl.detach().cpu())

get_gpt2_ppl(text, gpt2, gpt2_tokenizer)

35.74449157714844

In [8]:
import torch
from tqdm import tqdm

max_length = model.config.n_positions
stride = 512
encodings = gpt2_tokenizer(text, return_tensors='pt')
nlls = []
for i in tqdm(range(0, encodings.input_ids.size(1), stride)):
    begin_loc = max(i + stride - max_length, 0)
    end_loc = min(i + stride, encodings.input_ids.size(1))
    trg_len = end_loc - i    # may be different from stride on last loop
    input_ids = encodings.input_ids[:,begin_loc:end_loc].to(device)
    target_ids = input_ids.clone()
    target_ids[:,:-trg_len] = -100

    with torch.no_grad():
        outputs = gpt2(input_ids, labels=target_ids)
        neg_log_likelihood = outputs[0] * trg_len

    nlls.append(neg_log_likelihood)

ppl = torch.exp(torch.stack(nlls).sum() / end_loc)

In [9]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

MODEL_NAME = r'D:\WorkFolder\data\t5_augm\rut5_base_restorer_1m'
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model.cuda();
model.eval();



In [128]:
def calc_score(scores):
    scores = torch.cat(scores).detach().cpu()
    scores = Softmax(dim=1)(scores)
    return scores.max(axis=1).values.mean()

def generate(text):
    text = "thes_augm: " + text
    x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
    max_size = int(x.input_ids.shape[1] * 1.5 + 10)
    out = model.generate(**x, max_length=max_size, return_dict_in_generate=True, output_scores=True)
    return tokenizer.decode(out['sequences'][0], skip_special_tokens=True), float(calc_score(out['scores']))

In [127]:
generate(text)

('Ее осуществление дает разрешение правительственным войскам Сирии установить контроль над сирийско-турецкой границей.',
 tensor(0.9965))

In [24]:
x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
model.generate(**x, max_length=100, output_scores=True)

tensor([[    0,  1483,   324,  5133,  1733,  4285,  1597,   259, 13971, 12107,
           259, 27033,  3276,   688, 12828,   456, 13113, 11207,  2271, 13528,
          1004,  3781,   264,  7341,  3608,  2245,  8810,  1006,   260,     1]],
       device='cuda:0')

In [28]:
x

{'input_ids': tensor([[ 1483,   324,  5133,  1733,  4285,  1597,   259, 13971, 12107,   259,
         27033,  3276,   688, 12828,   456, 13113, 11207,  2271, 13528,  1004,
          3781,   264,  7341,  3608,  2245,  8810,  1006,   260,     1]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1]], device='cuda:0')}

In [31]:
outputs = model.generate(input_ids=x['input_ids'])
outputs

tensor([[    0,  1483,   324,  5133,  1733,  4285,  1597,   259, 13971, 12107,
           259, 27033,  3276,   688, 12828,   456, 13113, 11207,  2271, 13528]],
       device='cuda:0')

In [11]:
import pandas as pd

test = pd.read_csv(r'D:\WorkFolder\data\augmentation_data_news2017\0_sentences_test.csv')

In [302]:
text = test['text'].sample(1).tolist()[0]
print('ORIGINAL TEXT', text)

injected_text, concepts_token_list, text = concept_detector.detect(text)

print(injected_text)
results = []
for i in range(50):
    augmented_text = WordnetAugment(wordnet).augment(concepts_token_list, text, 5)
    #print('AUGMENTED', augmented_text)
    restored, score = generate(augmented_text)
    ft_score = WordnetAugment.sim(ft.get_sentence_vector(text), ft.get_sentence_vector(restored))
    gpt2_score = get_gpt2_ppl(restored, gpt2, gpt2_tokenizer)
    results.append([augmented_text, restored, gpt2_score])
    #print('RESTORED', restored, score, ft_score, gpt2_score)
    #print('---'*10)


ORIGINAL TEXT Сегодня в 13 часов дня правоохранители получили информацию о стрельбе на улице Приморской.
['Сегодня', 'в', '13', 'часов', 'дня', 'правоохранители', 'получили', 'информацию', 'о', 'стрельбе', 'на', 'улице', 'Приморской', '.']
['сегодня', 'в', '13', 'час', 'день', 'правоохранитель', 'получить', 'информация', 'о', 'стрельба', 'на', 'улица', 'приморский', '.']
['сегодня', 'в', 'час', 'день', 'правоохранитель', 'получить', 'информация', 'о', 'стрельба', 'на', 'улица', 'приморский']
Сегодня в 13 {{часов|час|106739-N}} {{дня|день|114433-N,106734-N,106738-N}} {{правоохранители|правоохранитель|6527-N}} {{получили информацию|получить_информация|142340-V}} о {{стрельбе|стрельба|2465-N}} на {{улице|улица|137867-N,109194-N,149490-N}} {{Приморской|приморский|105295-A}}.


  selected_concepts = np.array(concepts_token_list)[idx].tolist()


In [297]:
text

'То, что мы увидим, уже не вызовет такой резонанс в обществе, как декларации народных депутатов.'

In [298]:
injected_text

'То, что мы {{увидим|увидеть|117371-V,119037-V,142174-V,111002-V}}, уже не {{вызовет|вызвать|110474-V,120015-V,137404-V}} такой {{резонанс|резонанс|146569-N,128902-N,137979-N}} в {{обществе|общество|106667-N,138577-N,141170-N,140818-N}}, как {{декларации|декларация|115923-N,140585-N}} {{народных депутатов|народный_депутат|2839-N}}.'

In [304]:
results = sorted(results, key=lambda x: x[2])
results

[['Сегодня в 13 часов день работник орган внутренний дело получать информация о стрельбе на улица приморский.',
  'Сегодня в 13 часов дня работники органов внутренних дел получали информацию о стрельбе на улице Приморской.',
  45.62751007080078],
 ['Сегодня в 13 часов дневный время сотрудник правоохранительный орган собирать данный о стрельба из оружие на улица Приморской.',
  'Сегодня в 13 часов дневного времени сотрудники правоохранительных органов собирали данные о стрельбе из оружия на улице Приморской.',
  55.97041320800781],
 ['Сегодня в 13 часок дня правоохранители собирать информация о стрельба из оружие на улица приморский.',
  'Сегодня в 13 часов дня правоохранители собирали информацию о стрельбе из оружия на улице Приморской.',
  57.41008758544922],
 ['Сегодня в 13 часовой промежуток день сотрудник орган внутренний дело получили информацию о стрельбе на улица приморский.',
  'Сегодня в 13 часовой промежуток дня сотрудники органов внутренних дел получили информацию о стрельбе

In [305]:
print(text)
print('---'*10)
for res in results[:10]:
    print(res[1] + ' | GPT2 ppl: ' + str(res[2]))
    print('---'*10)

Сегодня в 13 часов дня правоохранители получили информацию о стрельбе на улице Приморской.
------------------------------
Сегодня в 13 часов дня работники органов внутренних дел получали информацию о стрельбе на улице Приморской. | GPT2 ppl: 45.62751007080078
------------------------------
Сегодня в 13 часов дневного времени сотрудники правоохранительных органов собирали данные о стрельбе из оружия на улице Приморской. | GPT2 ppl: 55.97041320800781
------------------------------
Сегодня в 13 часов дня правоохранители собирали информацию о стрельбе из оружия на улице Приморской. | GPT2 ppl: 57.41008758544922
------------------------------
Сегодня в 13 часовой промежуток дня сотрудники органов внутренних дел получили информацию о стрельбе на улице Приморской. | GPT2 ppl: 57.97651672363281
------------------------------
Сегодня в 13 часов дня правоохранители добывали информацию о стрельбе на улице Приморской. | GPT2 ppl: 60.124359130859375
------------------------------
Сегодня в 13 часов

In [232]:
wordnet.synsets['2636-A'].synset_words

{'внутрироссийский',
 'всероссийский',
 'общероссийский',
 'расейский',
 'российский'}

In [80]:
res = model.generate(**x, return_dict_in_generate=True, output_scores=True)


In [85]:
text = "thes_augm: " + 'Ее осуществление давать разрешение правительственный войскам сирия установить контроль над сирийско-турецкой границей.'
x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
res = model.generate(**x, return_dict_in_generate=True, output_scores=True, max_length=100)
tokenizer.decode(res['sequences'][0], skip_special_tokens=True)

'Ее осуществление дает разрешение правительственным войскам Сирии установить контроль над сирийско-турецкой границей.'

In [122]:
def calc_metric(scores):
    scores = torch.cat(scores).detach().cpu()
    scores = Softmax(dim=1)(scores)
    return scores.max(axis=1).values.mean()

In [123]:
res

GreedySearchEncoderDecoderOutput(sequences=tensor([[    0,  1483,   324,  7222,  4848,   259,  8599,  1151,  6204,   259,
         13971, 12107,   259, 27033,  3276,   259, 24643,   279,   456, 13113,
         11207,  2271, 13528,  1004,  3781,   264,  7341,  3608,  2245,  8810,
          1006,   260,     1]], device='cuda:0'), scores=(tensor([[-13.2943,   2.4545, -16.4007,  ..., -12.2882, -12.1882, -11.4383]],
       device='cuda:0'), tensor([[-21.7531,  -2.1604, -20.5204,  ..., -20.2950, -20.9032, -18.4980]],
       device='cuda:0'), tensor([[-26.7651,  -1.8619, -23.4041,  ..., -20.0265, -19.5755, -21.4833]],
       device='cuda:0'), tensor([[-15.1636,  -0.8706, -15.5689,  ..., -13.9910, -14.3391, -16.6264]],
       device='cuda:0'), tensor([[-14.8103,   3.3023, -15.4990,  ..., -15.4713, -16.3882, -15.4402]],
       device='cuda:0'), tensor([[-11.3478,   0.9582, -11.2576,  ..., -10.2010, -10.6760, -10.0458]],
       device='cuda:0'), tensor([[-16.7011,   0.9777, -15.9815,  ..., -14.1

In [56]:
from torch.nn import Softmax

m = Softmax(dim=1)
input = torch.randn(2, 3)
input, m(input)

(tensor([[-0.5309, -0.6506,  1.2176],
         [ 1.3373, -0.6554,  0.3732]]),
 tensor([[0.1310, 0.1162, 0.7528],
         [0.6589, 0.0898, 0.2513]]))

In [121]:
scores.max(axis=1).values.mean()

tensor(0.9956)

In [89]:
scores = torch.cat(res['scores']).detach().cpu()
scores

tensor([[-13.2943,   2.4545, -16.4007,  ..., -12.2882, -12.1882, -11.4383],
        [-21.7531,  -2.1604, -20.5204,  ..., -20.2950, -20.9032, -18.4980],
        [-26.7651,  -1.8619, -23.4041,  ..., -20.0265, -19.5755, -21.4833],
        ...,
        [-20.4700,   5.2008, -23.4456,  ..., -24.4979, -24.4573, -24.2864],
        [-20.1298,   0.1282, -22.6818,  ..., -22.0661, -22.6861, -20.8803],
        [-13.0130,  18.6775, -14.9290,  ..., -22.1747, -19.1455, -17.8160]])

In [101]:
scores = m(scores)
scores

tensor([[6.7711e-19, 4.6800e-12, 3.0308e-20,  ..., 1.8518e-18, 2.0465e-18,
         4.3321e-18],
        [2.9460e-22, 9.5115e-14, 1.0106e-21,  ..., 1.2662e-21, 6.8918e-22,
         7.6367e-21],
        [1.8366e-24, 1.2004e-13, 5.2928e-23,  ..., 1.5508e-21, 2.4346e-21,
         3.6131e-22],
        ...,
        [3.4353e-25, 4.8379e-14, 1.7526e-26,  ..., 6.1192e-27, 6.3727e-27,
         7.5598e-27],
        [4.6091e-17, 2.8945e-08, 3.5918e-18,  ..., 6.6481e-18, 3.5764e-18,
         2.1762e-17],
        [1.7259e-14, 1.0000e+00, 2.5403e-15,  ..., 1.8119e-18, 3.7471e-17,
         1.4160e-16]])

In [104]:
tokenizer.convert_ids_to_tokens(2)

'<unk>'

In [114]:
for probs in scores:
    token = np.argmax(probs)
    print(f'{tokenizer.convert_ids_to_tokens(int(token))}={float(probs[token]):.4f}')

▁Е=1.0000
е=0.9998
▁осуществ=1.0000
ление=0.9990
▁=0.8775
дает=0.9952
▁раз=0.9996
решение=0.9986
▁=0.9986
правительств=1.0000
енным=0.9978
▁=0.9999
войс=1.0000
кам=1.0000
▁=0.9965
Сири=0.9999
и=1.0000
▁у=0.9984
становить=1.0000
▁контроль=0.9994
▁над=1.0000
▁сир=0.9999
ий=1.0000
ско=1.0000
-=1.0000
тур=0.9989
ец=1.0000
кой=1.0000
▁границ=1.0000
ей=1.0000
.=1.0000
</s>=1.0000


In [98]:
m(scores)[3][4848]

tensor(0.9990)

In [51]:
np.argmax(res['scores'][0][0].detach().cpu().numpy())

1483

In [34]:
help(model.generate)

Help on method generate in module transformers.generation_utils:

generate(input_ids: Union[torch.LongTensor, NoneType] = None, max_length: Union[int, NoneType] = None, min_length: Union[int, NoneType] = None, do_sample: Union[bool, NoneType] = None, early_stopping: Union[bool, NoneType] = None, num_beams: Union[int, NoneType] = None, temperature: Union[float, NoneType] = None, top_k: Union[int, NoneType] = None, top_p: Union[float, NoneType] = None, repetition_penalty: Union[float, NoneType] = None, bad_words_ids: Union[Iterable[int], NoneType] = None, bos_token_id: Union[int, NoneType] = None, pad_token_id: Union[int, NoneType] = None, eos_token_id: Union[int, NoneType] = None, length_penalty: Union[float, NoneType] = None, no_repeat_ngram_size: Union[int, NoneType] = None, encoder_no_repeat_ngram_size: Union[int, NoneType] = None, num_return_sequences: Union[int, NoneType] = None, max_time: Union[float, NoneType] = None, max_new_tokens: Union[int, NoneType] = None, decoder_start_tok

In [None]:
import numpy as np
class WordnetAugment:
    def __init__(self, wordnet):
        self.wordnet = wordnet
        
    def augment(self, concepts_token_list, text, concept_count=4):
        selected_concepts = self._select_synsets(concepts_token_list, concept_count)
    
    def _select_concepts(self, concepts_token_list, concept_count):
        return np.random.choice(concepts_token_list, concept_count)
    
    def _select_concept(self, index, text):
        pass
    
    def _select_sense(self, synset_id):
        pass

In [363]:
import pandas as pd

df = pd.read_csv(r'D:\WorkFolder\data\augmentation_data_news2017\0_sentences_test.csv')

In [364]:
df.iloc[10]['text']

'Участие в конкурсе приняли 4 кандидата, победу получила бывший руководитель горздрава Киева Алла Арешкович.'

In [365]:
text = 'Участие в конкурсе приняли 4 кандидата, победу получила,бывший руководитель Президент, РФ горздрава Киева аварийное жилое помещение Алла Арешкович. Президент РФ.'
#text = 'Президент РФ'
res = tokenizer.tokenize(text)
res

'{{Участие|участие|3978-N,106856-N,115391-N}} в {{конкурсе|конкурс|6544-N}} {{приняли|принять|106884-V,141708-V,115487-V,124852-V,134035-V,146793-V,145869-V}} 4 {{кандидата|кандидат|23-N,121468-N}}, {{победу|победа|148624-N,132174-N,106958-N}} {{получила|получить|116303-V,133751-V,120784-V}},{{бывший|бывший|125496-A}} {{руководитель|руководитель|106606-N}} {{Президент|президент|110438-N,8066-N}}, {{РФ|рф|2636-N}} горздрава {{Киева|киев|102960-N}} {{аварийное жилое помещение|аварийный_жилой_помещение|5049-N}} Алла Арешкович. {{Президент РФ|президент_рф|290-N}}.'

In [None]:
class ConceptSelector:
    def __init__(self, wordnet):
        self.wordnet = wordnet
        
    def select_text(self, text):
        pass
    
    def _select_synset(self, index, text):
        pass
    
    def _select_sense(self, synset_id):
        pass

In [355]:
def inject_tokens(tokens, text):
    shift = 0
    text_injected = ''
    for token_seq in tokens:
        first_token = token_seq[0]
        last_token = token_seq[-1]
        text_injected += text[shift:first_token.shift]
        sense = '_'.join([t.token for t in token_seq])
        synsets = ','.join(wordnet.sense2synid[sense])
        info = f'{text[first_token.shift:last_token.shift + last_token.word_len]}|{synsets}'
        text_injected += '{{' + info + '}}'
        shift = last_token.shift + last_token.word_len
    text_injected += text[shift:]
    return text_injected
print(text)
print(inject_tokens(res, text))

Участие в конкурсе приняли 4 кандидата, победу получила,бывший руководитель Президент, РФ горздрава Киева аварийное жилое помещение Алла Арешкович. Президент РФ.


AttributeError: 'str' object has no attribute 'shift'

In [7]:
import fasttext.util
fasttext.util.download_model('ru', if_exists='ignore')  # English

Downloading https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.ru.300.bin.gz
 (0.55%) [>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]]>                                                  ]>                                                  ]                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]>                                                  ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (1.35%) [>                                                  ]                                                  ]]                                                  ]>                                                  ]>                                                  ]                                                  ]]>                                                  ]]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (2.18%) [=>                                                 ]>                                                  ]>                                                  ]]                                                  ]>                                                  ]>                                                  ]=>                                                 ]=>                                                 ]]=>                                                 ]]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (2.92%) [=>                                                 ]=>                                                 ]=>                                                 ]]>                                                 ]]=>                                                 ]]                                                 ]>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]=>                                                 ]>                                                 ]                                                 ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (3.64%) [=>                                                 ]=>                                                 ]]=>                                                 ]=>                                                 ]=>                                                 ]>                                                 ]=>                                                 ]=>                                                 ]]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (4.50%) [==>                                                ]=>                                                 ]>                                                ]==>                                                ]==>                                                ]==>                                                ]==>                                                ]                                                ]==>                                                ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (5.59%) [==>                                                ]==>                                                ]==>                                                ]                                                ]>                                                ]>                                                ]==>                                                ]==>                                                ]==>                                                ]>                                                ]                                                ]                                                ]==>                                                ]==>                                                ]                                                ]==>                                                ]]                                                ]                                                ]>                                                ]>                          

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (6.42%) [===>                                               ]                                                ]]===>                                               ]===>                                               ]>                                               ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (7.51%) [===>                                               ]]]===>                                               ]===>                                               ]===>                                               ]===>                                               ]===>                                               ]                                               ]===>                                               ]                                               ]]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (8.39%) [====>                                              ]]===>                                               ]                                               ]===>                                               ]===>                                               ]>                                              ]]====>                                              ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (9.17%) [====>                                              ]====>                                              ]====>                                              ]                                              ]                                              ]]]====>                                              ]====>                                              ]====>                                              ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (9.98%) [====>                                              ]====>                                              ]>                                              ]====>                                              ]====>                                              ]====>                                              ]====>                                              ]====>                                              ]====>                                              ]====>                                              ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (10.80%) [=====>                                             ]]=====>                                             ]                                             ]=====>                                             ]=====>                                             ]=====>                                             ]>                                             ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 (11.67%) [=====>                                             ]=====>                                             ]]]=====>                                             ]                                             ]>                                             ]=====>                                             ]>                                             ]>                                             ]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



'cc.ru.300.bin'

In [370]:
wordnet.synsets['3978-N'].synset_name

'долевое финансирование'

In [371]:
wordnet.synsets['106856-N'].synset_name

'участие, участвовать'

In [373]:
wordnet.synsets['115391-N'].synset_name

'сочувствие, участие'

In [142]:
max(tokenizer.senses.keys())

10

In [314]:
'аварийный_жилой_помещение' in wordnet.senses

True

In [70]:
text = 'Участие в конкурсе приняли 4 кандидата, победу получила,бывший руководитель горздрава Киева Алла Арешкович.'
tokenize_text(text)

[('участие', 0, 7),
 ('в', 8, 1),
 ('конкурс', 10, 8),
 ('принять', 19, 7),
 ('4', 27, 1),
 ('кандидат', 29, 9),
 (',', 38, 1),
 ('победа', 40, 6),
 ('получить', 47, 8),
 (',', 55, 1),
 ('бывший', 56, 6),
 ('руководитель', 63, 12),
 ('горздравый', 76, 9),
 ('киев', 86, 5),
 ('алла', 92, 4),
 ('арешкович', 97, 9),
 ('.', 106, 1)]

In [72]:
text[92:92+4]

'Алла'

In [74]:
[m for m in dir('.,') if m.startswith('is')]

['isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper']

In [300]:
tokenizer.senses

{3: {'квалификация_по_статья',
  'аварийный_жилой_помещение',
  'восхождение_к_вершина',
  'деятельность_по_лицензирование',
  'выступление_по_телевидение',
  'давать_на_чай',
  'цеплять_за_душа',
  'город_горячий_ключ',
  'ядро_в_процессор',
  'телефонный_система_связь',
  'стремление_к_деньга',
  'раскладывать_свой_вещь',
  'выцвести_на_солнце',
  'обнаружить_свой_чувство',
  'город_саратовский_область',
  'идти_на_увеличение',
  'отдавать_долг_отечество',
  'с_удовольствие_ждать',
  'высокий_по_положение',
  'производить_программный_обеспечение',
  'этикетка_на_товар',
  'важный_на_вид',
  'миграция_трудоспособный_население',
  'прорваться_через_препятствие',
  'высокопроизводительный_вычислительный_техника',
  'республика_марий_эл',
  'чтение_текст_вслух',
  'родство_через_брак',
  'вести_себя_плохо',
  'насильственный_захват_власть',
  'уральский_федеральный_округ',
  'больший_антильский_остров',
  'товар_для_малыш',
  'прием_на_работа',
  'отсидеть_свой_срок',
  'приведение_в_соо