In [11]:
#! pip install deeppavlov
#! pip install transformers

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/ColabNotebooks/intents.csv', sep=';')
df[:3]

Unnamed: 0,content,intent
0,Спасибо нет,Благодарность
1,спасибо за ответ. мне этого достаточно пока чт...,Благодарность
2,"спасибо, нет",Благодарность


In [18]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.33, random_state=42)

train.to_csv()

In [3]:
from deeppavlov import Element, Model
from deeppavlov.core.commands.utils import expand_path
from deeppavlov.core.data.simple_vocab import SimpleVocabulary
from deeppavlov.download import download_resource
from deeppavlov.models.classifiers.proba2labels import Proba2Labels
from deeppavlov.models.preprocessors.torch_transformers_preprocessor import TorchTransformersPreprocessor
from deeppavlov.models.torch_bert.torch_transformers_classifier import TorchTransformersClassifierModel


model_path = expand_path('~/.deeppavlov/models/classifiers/insults_kaggle_torch_bert')

# Downloading pretrained model
download_resource(
    'http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz',
    {expand_path('~/.deeppavlov/models/classifiers')}
)

preprocessor = TorchTransformersPreprocessor(
    vocab_file='bert-base-uncased',
    do_lower_case=True,
    max_seq_length=64
)

classes_vocab = SimpleVocabulary(
    save_path=model_path/'classes.dict',
    load_path=model_path/'classes.dict'
)

classifier = TorchTransformersClassifierModel(
    n_classes=classes_vocab.len,
    return_probas=True,
    pretrained_bert='bert-base-uncased',
    save_path=model_path/'model',
    load_path=model_path/'model',
    optimizer='AdamW',
    optimizer_parameters={'lr': 1e-05},
    learning_rate_drop_patience=5,
    learning_rate_drop_div=2.0
)

proba2labels = Proba2Labels(max_proba=True)

model = Model(
    x=['x'],
    out=['y_pred_labels'],
    pipe=[
        Element(component=preprocessor, x=['x'], out=['bert_features']),
        Element(component=classifier, x=['bert_features'], out=['y_pred_probas']),
        Element(component=proba2labels, x=['y_pred_probas'], out=['y_pred_ids']),
        Element(component=classes_vocab, x=['y_pred_ids'], out=['y_pred_labels'])
    ]
)

# Using model
model(['you are stupid', 'you are smart'])

2023-09-09 22:00:45.844 INFO in 'deeppavlov.core.data.utils'['utils'] at line 95: Downloading from http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz to /root/.deeppavlov/models/insults_kaggle_torch_bert_v0.tar.gz
INFO:deeppavlov.core.data.utils:Downloading from http://files.deeppavlov.ai/deeppavlov_data/classifiers/insults_kaggle_torch_bert_v0.tar.gz to /root/.deeppavlov/models/insults_kaggle_torch_bert_v0.tar.gz
100%|██████████| 1.09G/1.09G [01:01<00:00, 17.7MB/s]
2023-09-09 22:01:48.530 INFO in 'deeppavlov.core.data.utils'['utils'] at line 276: Extracting /root/.deeppavlov/models/insults_kaggle_torch_bert_v0.tar.gz archive into /root/.deeppavlov/models/classifiers
INFO:deeppavlov.core.data.utils:Extracting /root/.deeppavlov/models/insults_kaggle_torch_bert_v0.tar.gz archive into /root/.deeppavlov/models/classifiers


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


['Insult', 'Not Insult']

In [10]:
model.train_model()

AttributeError: ignored