# Preprocess train

In [29]:
import pandas as pd
from nltk.tokenize import word_tokenize

In [30]:
import torch
from transformers import AutoTokenizer, AutoModel
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2", device=device)
rubert_model = AutoModel.from_pretrained("cointegrated/rubert-tiny2").to(device)

In [31]:
ans = pd.read_csv('https://raw.githubusercontent.com/PhilBurub/NLPcourse_HSE/main/train_aspects.txt', header=None, delimiter='\t')

In [32]:
texts = pd.read_csv('https://raw.githubusercontent.com/PhilBurub/NLPcourse_HSE/main/train_reviews.txt', header=None, delimiter='\t', index_col=0)

In [33]:
def boudaries(text):
  out = []
  new_text = text
  cur = 0
  for word in tokenizer.convert_ids_to_tokens(tokenizer(text)['input_ids'])[1:-1]:
    new_word = word.lstrip('##')
    start = new_text.find(new_word)
    end = start + len(new_word)
    out.append((word, cur + start, cur + end, 'O'))
    cur += end
    new_text = new_text[end:]
  return pd.DataFrame(out)

In [34]:
with open('train.txt', 'w', encoding='utf-8') as f:
  pass

for review in ans[0].unique():
  anno = ans[ans[0] == review]
  out = boudaries(texts.loc[review][1])
  for _, row in anno.iterrows():
    start = True
    for idx, _ in out[(out[1] >= row[3]) & (out[2] <= row[4])].sort_values(1).iterrows():
      out.iloc[idx, 3] = f'B-{row[1]}' if start else f'I-{row[1]}'
      start = False

  with open('train.txt', 'a', encoding='utf-8') as f:
    f.write('\n'.join(map(lambda x: '\t'.join(x), out[[0, 3]].values)) + '\n\n')

# Initialize model



In [35]:
import random
import torch


def read_corpus(filepath):
    """ Read corpus from the given file path.
    Args:
        filepath: file path of the corpus
    Returns:
        sentences: a list of sentences, each sentence is a list of str
        tags: corresponding tags
    """
    sentences, tags = [], []
    sent, tag = ['<START>'], ['<START>']
    with open(filepath, 'r', encoding='utf8') as f:
        for line in f:
            if line == '\n':
                if len(sent) > 1:
                    sentences.append(sent + ['<END>'])
                    tags.append(tag + ['<END>'])
                sent, tag = ['<START>'], ['<START>']
            else:
                line = line.split()
                sent.append(line[0])
                tag.append(line[1])
    return sentences, tags


def generate_train_dev_dataset(filepath, sent_vocab, tag_vocab, train_proportion=0.8):
    """ Read corpus from given file path and split it into train and dev parts
    Args:
        filepath: file path
        sent_vocab: sentence vocab
        tag_vocab: tag vocab
        train_proportion: proportion of training data
    Returns:
        train_data: data for training, list of tuples, each containing a sentence and corresponding tag.
        dev_data: data for development, list of tuples, each containing a sentence and corresponding tag.
    """
    sentences, tags = read_corpus(filepath)
    tags = words2indices(tags, tag_vocab)
    data = list(zip(sentences, tags))
    random.shuffle(data)
    n_train = int(len(data) * train_proportion)
    train_data, dev_data = data[: n_train], data[n_train:]
    return train_data, dev_data


def batch_iter(data, batch_size=32, shuffle=True):
    """ Yield batch of (sent, tag), by the reversed order of source length.
    Args:
        data: list of tuples, each tuple contains a sentence and corresponding tag.
        batch_size: batch size
        shuffle: bool value, whether to random shuffle the data
    """
    data_size = len(data)
    indices = list(range(data_size))
    if shuffle:
        random.shuffle(indices)
    batch_num = (data_size + batch_size - 1) // batch_size
    for i in range(batch_num):
        batch = [data[idx] for idx in indices[i * batch_size: (i + 1) * batch_size]]
        batch = sorted(batch, key=lambda x: len(x[0]), reverse=True)
        sentences = [x[0] for x in batch]
        tags = [x[1] for x in batch]
        yield sentences, tags


def words2indices(origin, vocab):
    """ Transform a sentence or a list of sentences from str to int
    Args:
        origin: a sentence of type list[str], or a list of sentences of type list[list[str]]
        vocab: Vocab instance
    Returns:
        a sentence or a list of sentences represented with int
    """
    if isinstance(origin[0], list):
        result = [[vocab[w] for w in sent] for sent in origin]
    else:
        result = [vocab[w] for w in origin]
    return result


def indices2words(origin, vocab):
    """ Transform a sentence or a list of sentences from int to str
    Args:
        origin: a sentence of type list[int], or a list of sentences of type list[list[int]]
        vocab: Vocab instance
    Returns:
        a sentence or a list of sentences represented with str
    """
    if isinstance(origin[0], list):
        result = [[vocab.id2word(w) for w in sent] for sent in origin]
    else:
        result = [vocab.id2word(w) for w in origin]
    return result


def pad(data, padded_token, device):
    """ pad data so that each sentence has the same length as the longest sentence
    Args:
        data: list of sentences, List[List[word]]
        padded_token: padded token
        device: device to store data
    Returns:
        padded_data: padded data, a tensor of shape (max_len, b)
        lengths: lengths of batches, a list of length b.
    """
    lengths = [len(sent) for sent in data]
    max_len = lengths[0]
    padded_data = []
    for s in data:
        padded_data.append(s + [padded_token] * (max_len - len(s)))
    return torch.tensor(padded_data, device=device), lengths


def print_var(**kwargs):
    for k, v in kwargs.items():
        print(k, v)


def main():
    sentences, tags = read_corpus('train.txt')
    print(len(sentences), len(tags))


if __name__ == '__main__':
    main()

284 284


In [36]:
"""
Usage:
    vocab.py TRAIN SENT_VOCAB TAG_VOCAB [options]

Options:
    --max-size=<int>   maximum size of the dictionary [default: 5000]
    --freq-cutoff=<int>     frequency cutoff [default: 2]
"""
from itertools import chain
from collections import Counter
import json


class Vocab:
    def __init__(self, word2id, id2word):
        self.UNK = '<UNK>'
        self.PAD = '<PAD>'
        self.START = '<START>'
        self.END = '<END>'
        self.__word2id = word2id
        self.__id2word = id2word

    def get_word2id(self):
        return self.__word2id

    def get_id2word(self):
        return self.__id2word

    def __getitem__(self, item):
        if self.UNK in self.__word2id:
            return self.__word2id.get(item, self.__word2id[self.UNK])
        return self.__word2id[item]

    def __len__(self):
        return len(self.__word2id)

    def id2word(self, idx):
        return self.__id2word[idx]

    @staticmethod
    def build(data, max_dict_size, freq_cutoff, is_tags):
        """ Build vocab from the given data
        Args:
            data (List[List[str]]): List of sentences, each sentence is a list of str
            max_dict_size (int): The maximum size of dict
                                 If the number of valid words exceeds dict_size, only the most frequently-occurred
                                 max_dict_size words will be kept.
            freq_cutoff (int): If a word occurs less than freq_size times, it will be dropped.
            is_tags (bool): whether this Vocab is for tags
        Returns:
            vocab: The Vocab instance generated from the given data
        """
        word_counts = Counter(chain(*data))
        valid_words = [w for w, d in word_counts.items() if d >= freq_cutoff]
        valid_words = sorted(valid_words, key=lambda x: word_counts[x], reverse=True)
        valid_words = valid_words[: max_dict_size]
        valid_words += ['<PAD>']
        word2id = {w: idx for idx, w in enumerate(valid_words)}
        if not is_tags:
            word2id['<UNK>'] = len(word2id)
            valid_words += ['<UNK>']
        return Vocab(word2id=word2id, id2word=valid_words)

    def save(self, file_path):
        with open(file_path, 'w', encoding='utf8') as f:
            json.dump({'word2id': self.__word2id, 'id2word': self.__id2word}, f, ensure_ascii=False)

    @staticmethod
    def load(file_path):
        with open(file_path, 'r', encoding='utf8') as f:
            entry = json.load(f)
        return Vocab(word2id=entry['word2id'], id2word=entry['id2word'])


def main():
    sentences, tags = read_corpus('train.txt')
    sent_vocab = Vocab.build(sentences, 10**5, 3, is_tags=False)
    tag_vocab = Vocab.build(tags, 10**5, 0, is_tags=True)
    sent_vocab.save('sent_vocab.json')
    tag_vocab.save('tag_vocab.json')


if __name__ == '__main__':
    main()

In [37]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class BiLSTMCRF(nn.Module):
    def __init__(self, sent_vocab, tag_vocab, dropout_rate=0.5, embed_size=256, hidden_size=256):
        """ Initialize the model
        Args:
            sent_vocab (Vocab): vocabulary of words
            tag_vocab (Vocab): vocabulary of tags
            embed_size (int): embedding size
            hidden_size (int): hidden state size
        """
        super(BiLSTMCRF, self).__init__()
        self.dropout_rate = dropout_rate
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.sent_vocab = sent_vocab
        self.tag_vocab = tag_vocab
        self.embedding = nn.Linear(312, embed_size)
        self.dropout = nn.Dropout(dropout_rate)
        self.encoder = nn.LSTM(input_size=embed_size, hidden_size=hidden_size, bidirectional=True)
        self.hidden2emit_score = nn.Linear(hidden_size * 2, len(self.tag_vocab))
        self.transition = nn.Parameter(torch.randn(len(self.tag_vocab), len(self.tag_vocab)))  # shape: (K, K)

    def forward(self, sentences, mask, tags, sen_lengths):
        """
        Args:
            sentences (tensor): sentences, shape (b, len). Lengths are in decreasing order, len is the length
                                of the longest sentence
            tags (tensor): corresponding tags, shape (b, len)
            sen_lengths (list): sentence lengths
        Returns:
            loss (tensor): loss on the batch, shape (b,)
        """
        sentences = sentences  # shape: (len, b)
        sentences = self.embedding(sentences).transpose(0, 1)  # shape: (len, b, e)
        emit_score = self.encode(sentences, sen_lengths)  # shape: (b, len, K)
        loss = self.cal_loss(tags, mask, emit_score.transpose(0, 1))  # shape: (b,)
        return loss

    def encode(self, sentences, sent_lengths):
        """ BiLSTM Encoder
        Args:
            sentences (tensor): sentences with word embeddings, shape (len, b, e)
            sent_lengths (list): sentence lengths
        Returns:
            emit_score (tensor): emit score, shape (b, len, K)
        """
        hidden_states, _ = self.encoder(sentences)
        #hidden_states, _ = pad_packed_sequence(hidden_states, batch_first=True)  # shape: (b, len, 2h)
        emit_score = self.hidden2emit_score(hidden_states)  # shape: (b, len, K)
        emit_score = self.dropout(emit_score)  # shape: (b, len, K)
        return emit_score

    def cal_loss(self, tags, mask, emit_score):
        """ Calculate CRF loss
        Args:
            tags (tensor): a batch of tags, shape (b, len)
            mask (tensor): mask for the tags, shape (b, len), values in PAD position is 0
            emit_score (tensor): emit matrix, shape (b, len, K)
        Returns:
            loss (tensor): loss of the batch, shape (b,)
        """
        batch_size, sent_len = tags.shape
        # calculate score for the tags
        score = torch.gather(emit_score, dim=2, index=tags.unsqueeze(dim=2)).squeeze(dim=2)  # shape: (b, len)
        score[:, 1:] += self.transition[tags[:, :-1], tags[:, 1:]]
        total_score = (score * mask.type(torch.float)).sum(dim=1)  # shape: (b,)
        # calculate the scaling factor
        d = torch.unsqueeze(emit_score[:, 0], dim=1)  # shape: (b, 1, K)
        for i in range(1, sent_len):
            n_unfinished = mask[:, i].sum()
            d_uf = d[: n_unfinished]  # shape: (uf, 1, K)
            emit_and_transition = emit_score[: n_unfinished, i].unsqueeze(dim=1) + self.transition  # shape: (uf, K, K)
            log_sum = d_uf.transpose(1, 2) + emit_and_transition  # shape: (uf, K, K)
            max_v = log_sum.max(dim=1)[0].unsqueeze(dim=1)  # shape: (uf, 1, K)
            log_sum = log_sum - max_v  # shape: (uf, K, K)
            d_uf = max_v + torch.logsumexp(log_sum, dim=1).unsqueeze(dim=1)  # shape: (uf, 1, K)
            d = torch.cat((d_uf, d[n_unfinished:]), dim=0)
        d = d.squeeze(dim=1)  # shape: (b, K)
        max_d = d.max(dim=-1)[0]  # shape: (b,)
        d = max_d + torch.logsumexp(d - max_d.unsqueeze(dim=1), dim=1)  # shape: (b,)
        llk = total_score - d  # shape: (b,)
        loss = -llk  # shape: (b,)
        return loss

    def predict(self, sentences, mask, sen_lengths):
        """
        Args:
            sentences (tensor): sentences, shape (b, len). Lengths are in decreasing order, len is the length
                                of the longest sentence
            sen_lengths (list): sentence lengths
        Returns:
            tags (list[list[str]]): predicted tags for the batch
        """
        batch_size = sentences.shape[0]
        sentences = sentences.transpose(0, 1)  # shape: (len, b)
        sentences = self.embedding(sentences)  # shape: (len, b, e)
        emit_score = self.encode(sentences, sen_lengths).transpose(1, 0)  # shape: (b, len, K)
        tags = [[[i] for i in range(len(self.tag_vocab))]] * batch_size  # list, shape: (b, K, 1)
        d = torch.unsqueeze(emit_score[:, 0], dim=1)  # shape: (b, 1, K)
        for i in range(1, max(sen_lengths)):
            n_unfinished = mask[:, i].sum()
            d_uf = d[: n_unfinished]  # shape: (uf, 1, K)
            emit_and_transition = self.transition + emit_score[: n_unfinished, i].unsqueeze(dim=1)  # shape: (uf, K, K)
            new_d_uf = d_uf.transpose(1, 2) + emit_and_transition  # shape: (uf, K, K)
            d_uf, max_idx = torch.max(new_d_uf, dim=1)
            max_idx = max_idx.tolist()  # list, shape: (nf, K)
            tags[: n_unfinished] = [[tags[b][k] + [j] for j, k in enumerate(max_idx[b])] for b in range(n_unfinished)]
            d = torch.cat((torch.unsqueeze(d_uf, dim=1), d[n_unfinished:]), dim=0)  # shape: (b, 1, K)
        d = d.squeeze(dim=1)  # shape: (b, K)
        _, max_idx = torch.max(d, dim=1)  # shape: (b,)
        max_idx = max_idx.tolist()
        tags = [tags[b][k] for b, k in enumerate(max_idx)]
        return tags

    def save(self, filepath):
        params = {
            'sent_vocab': self.sent_vocab,
            'tag_vocab': self.tag_vocab,
            'args': dict(dropout_rate=self.dropout_rate, embed_size=self.embed_size, hidden_size=self.hidden_size),
            'state_dict': self.state_dict()
        }
        torch.save(params, filepath)

    @staticmethod
    def load(filepath, device_to_load):
        params = torch.load(filepath, map_location=lambda storage, loc: storage)
        model = BiLSTMCRF(params['sent_vocab'], params['tag_vocab'], **params['args'])
        model.load_state_dict(params['state_dict'])
        model.to(device_to_load)
        return model

    @property
    def device(self):
        return self.embedding.weight.device


def main():
    sent_vocab = Vocab.load('sent_vocab.json')
    tag_vocab = Vocab.load('tag_vocab.json')
    train_data, dev_data = generate_train_dev_dataset('train.txt', sent_vocab, tag_vocab)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = BiLSTMCRF(sent_vocab, tag_vocab)
    model.to(device)
    model.save('model.pth')
    model = model.load('model.pth', device)


if __name__ == '__main__':
    main()

# Train

In [38]:
"""
Usage:
    run.py train TRAIN SENT_VOCAB TAG_VOCAB [options]
    run.py test TEST RESULT SENT_VOCAB TAG_VOCAB MODEL [options]

Options:
    --dropout-rate=<float>              dropout rate [default: 0.5]
    --embed-size=<int>                  size of word embedding [default: 256]
    --hidden-size=<int>                 size of hidden state [default: 256]
    --batch-size=<int>                  batch-size [default: 32]
    --max-epoch=<int>                   max epoch [default: 10]
    --clip_max_norm=<float>             clip max norm [default: 5.0]
    --lr=<float>                        learning rate [default: 0.001]
    --log-every=<int>                   log every [default: 10]
    --validation-every=<int>            validation every [default: 250]
    --patience-threshold=<float>        patience threshold [default: 0.98]
    --max-patience=<int>                time of continuous worse performance to decay lr [default: 4]
    --max-decay=<int>                   time of lr decay to early stop [default: 4]
    --lr-decay=<float>                  decay rate of lr [default: 0.5]
    --model-save-path=<file>            model save path [default: ./model/model.pth]
    --optimizer-save-path=<file>        optimizer save path [default: ./model/optimizer.pth]
    --cuda                              use GPU
"""

import time
import torch
import torch.nn as nn
import random


def train():
    """ Training BiLSTMCRF model
    Args:
        args: dict that contains options in command
    """
    sent_vocab = Vocab.load('sent_vocab.json')
    tag_vocab = Vocab.load('tag_vocab.json')
    train_data, dev_data = generate_train_dev_dataset('train.txt', sent_vocab, tag_vocab)
    print('num of training examples: %d' % (len(train_data)))
    print('num of development examples: %d' % (len(dev_data)))

    max_epoch = 1000
    log_every = 5
    validation_every = 10
    model_save_path = 'model.pth'
    optimizer_save_path = 'opt.pth'
    min_dev_loss = float('inf')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    patience, decay_num = 0, 0

    model = BiLSTMCRF(sent_vocab, tag_vocab, 0.2, 256, 256).to(device)
    for name, param in model.named_parameters():
        if 'weight' in name:
            nn.init.normal_(param.data, 0, 0.01)
        else:
            nn.init.constant_(param.data, 0)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    train_iter = 0  # train iter num
    record_loss_sum, record_tgt_word_sum, record_batch_size = 0, 0, 0  # sum in one training log
    cum_loss_sum, cum_tgt_word_sum, cum_batch_size = 0, 0, 0  # sum in one validation log
    record_start, cum_start = time.time(), time.time()

    print('start training...')
    for epoch in range(max_epoch):
        for sentences, tags in batch_iter(train_data, batch_size=256):
            train_iter += 1
            current_batch_size = len(sentences)
            sentences_text = [tokenizer.convert_tokens_to_string(sent[1:-1]) for sent in sentences]
            tokenized = tokenizer(sentences_text, return_tensors='pt',
                                  padding=True).to(device)
            embded = rubert_model(**tokenized)['last_hidden_state'].to(device)
            mask = tokenized['attention_mask'].to(device)
            sent_lengths = mask.sum(1)

            tags, _ = pad(tags, tag_vocab[tag_vocab.PAD], device)
            # back propagation
            optimizer.zero_grad()
            batch_loss = model(embded, mask, tags, sent_lengths)  # shape: (b,)
            loss = batch_loss.mean()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1e5)
            optimizer.step()

            record_loss_sum += batch_loss.sum().item()
            record_batch_size += current_batch_size
            record_tgt_word_sum += sum(sent_lengths)

            cum_loss_sum += batch_loss.sum().item()
            cum_batch_size += current_batch_size
            cum_tgt_word_sum += sum(sent_lengths)

            if train_iter % log_every == 0:
                print('log: epoch %d, iter %d, %.1f words/sec, avg_loss %f, time %.1f sec' %
                      (epoch + 1, train_iter, record_tgt_word_sum / (time.time() - record_start),
                       record_loss_sum / record_batch_size, time.time() - record_start))
                record_loss_sum, record_batch_size, record_tgt_word_sum = 0, 0, 0
                record_start = time.time()

            if train_iter % validation_every == 0:
                print('dev: epoch %d, iter %d, %.1f words/sec, avg_loss %f, time %.1f sec' %
                      (epoch + 1, train_iter, cum_tgt_word_sum / (time.time() - cum_start),
                       cum_loss_sum / cum_batch_size, time.time() - cum_start))
                cum_loss_sum, cum_batch_size, cum_tgt_word_sum = 0, 0, 0

                dev_loss = cal_dev_loss(model, dev_data, 64, sent_vocab, tag_vocab, device)
                if dev_loss < min_dev_loss * 0.95:
                    min_dev_loss = dev_loss
                    model.save(model_save_path)
                    torch.save(optimizer.state_dict(), optimizer_save_path)
                    patience = 0
                else:
                    patience += 1
                    if patience == 2:
                        decay_num += 1
                        if decay_num == 2:
                            print('Early stop. Save result model to %s' % model_save_path)
                            return
                        lr = optimizer.param_groups[0]['lr'] * 0.95
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr
                        patience = 0
                print('dev: epoch %d, iter %d, dev_loss %f, patience %d, decay_num %d' %
                      (epoch + 1, train_iter, dev_loss, patience, decay_num))
                cum_start = time.time()
                if train_iter % log_every == 0:
                    record_start = time.time()
    print('Reached %d epochs, Save result model to %s' % (max_epoch, model_save_path))


def cal_dev_loss(model, dev_data, batch_size, sent_vocab, tag_vocab, device):
    """ Calculate loss on the development data
    Args:
        model: the model being trained
        dev_data: development data
        batch_size: batch size
        sent_vocab: sentence vocab
        tag_vocab: tag vocab
        device: torch.device on which the model is trained
    Returns:
        the average loss on the dev data
    """
    is_training = model.training
    model.eval()
    loss, n_sentences = 0, 0
    with torch.no_grad():
        for sentences, tags in batch_iter(dev_data, batch_size, shuffle=False):
            sentences_text = [tokenizer.convert_tokens_to_string(sent[1:-1]) for sent in sentences]
            tokenized = tokenizer(sentences_text, return_tensors='pt',
                                  padding=True).to(device)
            embded = rubert_model(**tokenized)['last_hidden_state'].to(device)
            mask = tokenized['attention_mask'].to(device)
            sent_lengths = mask.sum(1)

            tags, _ = pad(tags, tag_vocab[sent_vocab.PAD], device)
            batch_loss = model(embded, mask, tags, sent_lengths)  # shape: (b,)
            loss += batch_loss.sum().item()
            n_sentences += len(sentences)
    model.train(is_training)
    return loss / n_sentences

In [None]:
# train()

In [39]:
model = BiLSTMCRF.load('model.pth', 'cuda')

In [40]:
sent_vocab = Vocab.load('sent_vocab.json')
tag_vocab = Vocab.load('tag_vocab.json')

In [41]:
text = 'Не рекомендуем сие заведение от слова совсем. Позвонили забронировать столик. Нам сказали -да на 23:00 вечера столик за Вами. Приезжаем в предвкушении повеселиться. Охранник не пускает у него нет информации что столик забронирован. Более того что у него даже не было попытки прояснить ситуацию элементарно вызвав администратора. Это всё свидетельствует о странности сотрудников сея заведения.'

In [42]:
tokenized = tokenizer([text], return_tensors='pt',
                      padding=True).to(device)
embded = rubert_model(**tokenized)['last_hidden_state'].to(device)
mask = tokenized['attention_mask'].to(device)
sent_lengths = mask.sum(1)
tokens = model.predict(embded, mask, sent_lengths)

In [44]:
words = tokenizer.convert_ids_to_tokens(tokenized['input_ids'][0])
tags = [tag_vocab.id2word(i) for i in tokens[0]]

In [51]:
out = []
new_text = text
cur = 0
is_span = False
tag = ''
for word, tag in list(zip(words, tags))[1:]:
  new_word = word.lstrip('##')
  start = new_text.find(new_word)
  end = start + len(new_word)
  if is_span and (tag.split('-')[0] != 'I' or tag.split('-')[-1] != category):
    out.append((text[beginning:cur+1], beginning, cur+1, category))
    is_span = False

  if tag.split('-')[0] == 'B':
    beginning = cur + start
    category = tag.split('-')[1]
    is_span = True

  cur += end
  new_text = new_text[end:]

In [52]:
out

[('заведение ', 19, 29, 'Whole'),
 ('Позвонили забронировать столик.', 46, 77, 'Service'),
 ('Охранник ', 165, 174, 'Service'),
 ('сотрудников сея заведения.', 366, 392, 'Whole')]