In [55]:
%load_ext autoreload
%autoreload 2

import pandas as pd

import torch
from torch.utils.data import DataLoader, SequentialSampler

from transformers import BertTokenizer, BertForSequenceClassification

from utils import EmotionDataset, predict_with_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
PRED = False # If true, we use the model to predict labels
PATH_MODEL = "./Pre-trained Models/BertSentimentAnalysis.pth"
PATH_DATASET = './Data/Dataset_cleaned.csv'
PATH_DATASET_WITH_EMOTIONS = './Data/Dataset_with_emotions.csv'

In [57]:
dataset = pd.read_csv(PATH_DATASET)['Application description']

In [58]:
model_name = "cointegrated/rubert-tiny2"

tokenizer = BertTokenizer.from_pretrained(model_name, do_lower_case=True)

In [59]:
batch_size = 256
max_length = 100

test_dataset = EmotionDataset(dataset, tokenizer, max_length=max_length)
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False,
    sampler=SequentialSampler(test_dataset)
)

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = BertForSequenceClassification.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cointegrated/rubert-tiny2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [61]:
model.load_state_dict(torch.load(PATH_MODEL, map_location=device))
model = model.to(device)

In [62]:
if PRED:
    pred = predict_with_model(
        model,
        test_dataloader,
        device=device,
        use_sigmoid=True,
        return_labels=False
    )

    pred = pred.argmax(axis=1)
    pred = ['Positive' if emotion == 1 else 'Negative' for emotion in pred]

    dataset_with_emotions = pd.DataFrame({'Application description': dataset,
                                          'Emotion': pred})
    dataset_with_emotions.to_csv(PATH_DATASET_WITH_EMOTIONS, index=False)
else:
    dataset_with_emotions = pd.read_csv(PATH_DATASET_WITH_EMOTIONS)

In [64]:
dataset_with_emotions.sample(10)

Unnamed: 0,Application description,Emotion
546,Не очищена от снега пешеходная дорожка между д...,Negative
15178,Прошу забетонировать крыльцо 2 подъезда мира 7а,Positive
23465,Нету тратуара чтоб люди живущие в 49 микрорайо...,Negative
9092,Надписи присутствуют не первый день. Прямо нап...,Positive
20938,За всю зиму ни разу не приезжал трактор - терп...,Negative
2537,"Не по ГОСТу установленный дорожный значения,ко...",Positive
20269,"Срочно установите ограждения! Дети мимо ходят,...",Negative
15371,Как так!? Возмущает отсутствие! Требую сделать!,Negative
22961,В доме проводится капитальный ремонт. Мусор не...,Negative
805,Необходимо дублировать подземный пешеходный пе...,Positive
