# Persian Sentiment
It aims to classify text, such as comments, based on their emotional bias. We tested three well-known datasets for this task: **Digikala** user comments, **SnappFood** user comments, and **DeepSentiPers** in two binary-form and multi-form types.


In [1]:
!nvidia-smi
!lscpu

Tue Aug  3 05:10:23 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install hazm==0.7.0
!pip install seqeval==1.2.2
!pip install sentencepiece==0.1.96
!pip install transformers==4.7.0
!pip install clean-text[gpl]==0.4.0

Collecting hazm==0.7.0
  Downloading hazm-0.7.0-py3-none-any.whl (316 kB)
[?25l[K     |█                               | 10 kB 18.1 MB/s eta 0:00:01[K     |██                              | 20 kB 9.9 MB/s eta 0:00:01[K     |███                             | 30 kB 8.4 MB/s eta 0:00:01[K     |████▏                           | 40 kB 7.8 MB/s eta 0:00:01[K     |█████▏                          | 51 kB 4.2 MB/s eta 0:00:01[K     |██████▏                         | 61 kB 4.4 MB/s eta 0:00:01[K     |███████▎                        | 71 kB 4.7 MB/s eta 0:00:01[K     |████████▎                       | 81 kB 5.3 MB/s eta 0:00:01[K     |█████████▎                      | 92 kB 5.2 MB/s eta 0:00:01[K     |██████████▍                     | 102 kB 4.2 MB/s eta 0:00:01[K     |███████████▍                    | 112 kB 4.2 MB/s eta 0:00:01[K     |████████████▍                   | 122 kB 4.2 MB/s eta 0:00:01[K     |█████████████▌                  | 133 kB 4.2 MB/s eta 0:00:01[K 

In [3]:
!pip install PyDrive
import os
import IPython.display as ipd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



In [4]:
# Import required packages
import os
import gc
import re
import hazm
import time
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

import transformers
from transformers import AutoConfig, AutoTokenizer
from transformers import AutoModelForSequenceClassification

from cleantext import clean

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

print()
print('numpy', np.__version__)
print('pandas', pd.__version__)
print('transformers', transformers.__version__)
print('torch', torch.__version__)
print()

# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")



numpy 1.19.5
pandas 1.1.5
transformers 4.7.0
torch 1.9.0+cu102

There are 1 GPU(s) available.
We will use the GPU: Tesla K80


In [5]:
class SentimentAnalysisDataset(torch.utils.data.Dataset):
    """ Create a PyTorch dataset for Sentiment Analysis. """

    def __init__(self, tokenizer, comments, targets, label_list=None, max_len=128):
        self.comments = comments
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.label2index = {label: i for i, label in enumerate(label_list)} if isinstance(label_list, list) else {}
        self.index2label = {i: label for label, i in self.label2index.items()}

    def __len__(self):
        return len(self.comments)

    def __getitem__(self, item):
        comment = self.comments[item]
        target = self.label2index[self.targets[item]]
        encoding = self.tokenizer.encode_plus(
            comment,
            add_special_tokens=True,
            truncation=True,
            max_length=self.max_len,
            padding='max_length',
            return_tensors='pt')

        inputs = {
            'comment': comment,
            'targets': torch.tensor(target, dtype=torch.long),
            'original_targets': self.targets[item],
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'token_type_ids': encoding['token_type_ids'].flatten(),
        }

        return inputs


class MT5SentimentAnalysisDataset(torch.utils.data.Dataset):
    """ Create a PyTorch dataset for Sentiment Analysis. """

    def __init__(self, reviews, aspects, labels, tokenizer, max_length=128):
        self.reviews = reviews
        self.aspects = aspects
        self.targets = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.reviews)

    def __getitem__(self, item):
        if self.aspects is not None:
            encoding = self.tokenizer(
                self.reviews[item] + " <sep> " + self.aspects[item],
                add_special_tokens=True,
                max_length=self.max_length,
                truncation=True,
                padding='max_length',
                return_tensors="pt"
            )
            inputs = {
                'review': self.reviews[item],
                'aspects': self.aspects[item],
                'targets': self.targets[item],
                'input_ids': encoding['input_ids'].flatten(),
                'attention_mask': encoding['attention_mask'].flatten()
            }
        else:
            encoding = self.tokenizer(
                self.reviews[item],
                add_special_tokens=True,
                max_length=self.max_length,
                truncation=True,
                padding='max_length',
                return_tensors="pt"
            )
            inputs = {
                'review': self.reviews[item],
                'targets': self.targets[item],
                'input_ids': encoding['input_ids'].flatten(),
                'attention_mask': encoding['attention_mask'].flatten()
            }

        return inputs


class SentimentAnalysis:
    def __init__(self, model_name, model_type=None):
        self.normalizer = hazm.Normalizer()
        self.model_name = model_name
        if model_type == "mt5":
            self.tokenizer = MT5Tokenizer.from_pretrained(model_name)
            self.model = MT5ForConditionalGeneration.from_pretrained(model_name)
            self.config = MT5Config.from_pretrained(self.model_name)
        else:
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForSequenceClassification.from_pretrained(self.model_name)
            self.config = AutoConfig.from_pretrained(self.model_name)
            self.id2label = self.config.id2label
            self.label2id = self.config.label2id

    def cleaning(self, text):
        def cleanhtml(raw_html):
            clean_pattern = re.compile('<.*?>')
            clean_text = re.sub(clean_pattern, '', raw_html)
            return clean_text

        if type(text) is not str:
            return None

        text = text.strip()

        # regular cleaning
        text = clean(
            text,
            fix_unicode=True,
            to_ascii=False,
            lower=True,
            no_line_breaks=True,
            no_urls=True,
            no_emails=True,
            no_phone_numbers=True,
            no_numbers=False,
            no_digits=False,
            no_currency_symbols=True,
            no_punct=False,
            replace_with_url="",
            replace_with_email="",
            replace_with_phone_number="",
            replace_with_number="",
            replace_with_digit="0",
            replace_with_currency_symbol=""
        )

        # cleaning htmls
        text = cleanhtml(text)

        # normalizing
        text = self.normalizer.normalize(text)

        # removing wierd patterns
        wierd_pattern = re.compile("["
                                   u"\U0001F600-\U0001F64F"  # emoticons
                                   u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                                   u"\U0001F680-\U0001F6FF"  # transport & map symbols
                                   u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                                   u"\U00002702-\U000027B0"
                                   u"\U000024C2-\U0001F251"
                                   u"\U0001f926-\U0001f937"
                                   u'\U00010000-\U0010ffff'
                                   u"\u200d"
                                   u"\u2640-\u2642"
                                   u"\u2600-\u2B55"
                                   u"\u23cf"
                                   u"\u23e9"
                                   u"\u231a"
                                   u"\u3030"
                                   u"\ufe0f"
                                   u"\u2069"
                                   u"\u2066"
                                   # u"\u200c"
                                   u"\u2068"
                                   u"\u2067"
                                   "]+", flags=re.UNICODE)

        text = wierd_pattern.sub(r'', text)

        # removing extra spaces, hashtags
        text = re.sub("#", "", text)
        text = re.sub("\s+", " ", text)
        if text in ['', " "]:
            return None
        return text

    def load_dataset_test_file(self, dataset_name, dataset_file, **kwargs):
        if dataset_name.lower() == "snappfood":
            if not os.path.exists(dataset_file):
                print(f'{dataset_file} not exists!')
                return
            data = pd.read_csv(dataset_file, delimiter="\t")
            # drop label_id because its not consistent with albert model labels!
            data = data[['comment', 'label']]

            # cleaning comments
            data = data.dropna(subset=['comment'])
            data['comment'] = data['comment'].apply(self.cleaning)
            data = data.dropna(subset=['comment'])

            if 'label_map' in kwargs:
                data['label'] = data['label'].apply(lambda l: kwargs['label_map'][l])
                data = data.dropna(subset=['label'])
                data = data.reset_index(drop=True)

            data['label_id'] = data['label'].apply(lambda t: self.label2id[t])
            x_test, y_test = data['comment'].values.tolist(), data['label_id'].values.tolist()
            print(f'test part:\n #comment: {len(x_test)}, #labels: {len(y_test)}')
            return x_test, y_test
        if dataset_name.lower() == "deepsentipers":
            if not os.path.exists(dataset_file):
                print(f'{dataset_file} not exists!')
                return
            if 'label_map' not in kwargs:
                print("label_map is missing!")
                return
            data = pd.read_csv(dataset_file, delimiter=",", names=['comment', 'label'], header=None)

            # cleaning comments
            data = data.dropna(subset=['comment'])
            data['comment'] = data['comment'].apply(self.cleaning)
            data = data.dropna(subset=['comment'])

            # map labels
            label_map = kwargs['label_map']
            data['label'] = data['label'].apply(lambda l: label_map[l])
            data = data.dropna(subset=['label'])
            data = data.reset_index(drop=True)

            data['label_id'] = data['label'].apply(lambda t: self.label2id[t])
            x_test, y_test = data['comment'].values.tolist(), data['label_id'].values.tolist()
            print(f'test part:\n #comment: {len(x_test)}, #labels: {len(y_test)}')
            return x_test, y_test
        if dataset_name.lower() == "pasinlu-aspect-sentiment":
            if not os.path.exists(dataset_file):
                print(f'{dataset_file} not exists!')
                return
            if 'label_map' not in kwargs:
                print("label_map is missing!")
                return

            reviews, aspects, labels = [], [], []
            with open(dataset_file, encoding="utf8") as infile:
                for line in infile:
                    json_line = json.loads(line.strip())

                    review = json_line['review']
                    reviews.append(review)

                    question = json_line['question']
                    aspects.append(question)

                    label = kwargs['label_map'][json_line['label']]
                    labels.append(label)

            return reviews, aspects, labels

    def load_dataset_file(self, dataset_name, dataset_file, **kwargs):
        if dataset_name.lower() == "digikala":
            if not os.path.exists(dataset_file):
                print(f'{dataset_file} not exists!')
                return
            data = pd.read_excel(dataset_file)
            data = data[['comment', 'recommend']]

            # cleaning comments
            data = data.dropna(subset=['comment'])
            data['comment'] = data['comment'].apply(self.cleaning)
            data = data.dropna(subset=['comment'])

            # cleaning labels
            valid_labels = ['no_idea', 'not_recommended', 'recommended']
            data['recommend'] = data['recommend'].apply(lambda r: r if r in valid_labels else None)
            data = data.dropna(subset=['recommend'])
            if 'label_map' in kwargs:
                data['recommend'] = data['recommend'].apply(lambda l: kwargs['label_map'][l])
            data = data.dropna(subset=['recommend'])
            data = data.reset_index(drop=True)

            data['label_id'] = data['recommend'].apply(lambda t: self.label2id[t])

            x_all, y_all = data['comment'].values.tolist(), data['label_id'].values.tolist()
            print(f'all data: #comment: {len(x_all)}, #labels: {len(y_all)}')

            _, test = train_test_split(data, test_size=0.1, random_state=1, stratify=data['recommend'])
            test = test.reset_index(drop=True)
            x_test, y_test = test['comment'].values.tolist(), test['label_id'].values.tolist()
            print(f'test part:\n #comment: {len(x_test)}, #labels: {len(y_test)}')
            return x_all, y_all, x_test, y_test
        if dataset_name.lower() == "pasinlu-review-sentiment":
            if not os.path.exists(dataset_file):
                print(f'{dataset_file} not exists!')
                return
            if 'label_map' not in kwargs:
                print("label_map is missing!")
                return

            reviews, labels = [], []
            with open(dataset_file, encoding="utf8") as infile:
                for line in infile:
                    json_line = json.loads(line.strip())

                    review = json_line['review']
                    reviews.append(review)

                    label = kwargs['label_map'][json_line['sentiment']]
                    labels.append(label)
            return reviews, labels

    def load_dataset_composite_file(self, dataset_name, dataset_files, **kwargs):
        if dataset_name.lower() == "digikala+snappfood+deepsentipers":
            if sorted(list(dataset_files.keys())) != ["deepsentipers", "digikala", "snappfood"]:
                print("dataset_files must contains path of all three datasets")
                return
            if 'label_map' not in kwargs:
                print("label_map is missing!")
                return
            elif sorted(list(kwargs['label_map'].keys())) != ["deepsentipers", "digikala", "snappfood"]:
                print("label_map must contains label_map for all three datasets!")
                return
            print("digikala dataset - we only use test set:")
            _, _, x_test_digi, y_test_digi = self.load_dataset_file('digikala', dataset_files['digikala'],
                                                                    label_map=kwargs['label_map']['digikala'])
            print("snappfood dataset:")
            x_test_snapp, y_test_snapp = self.load_dataset_test_file('snappfood', dataset_files['snappfood'],
                                                                     label_map=kwargs['label_map']['snappfood'])
            print("deepsentipers dataset:")
            x_test_senti, y_test_senti = self.load_dataset_test_file('deepsentipers', dataset_files['deepsentipers'],
                                                                     label_map=kwargs['label_map']['deepsentipers'])
            return x_test_digi + x_test_snapp + x_test_senti, y_test_digi + y_test_snapp + y_test_senti

    def sentiment_analysis_inference(self, input_text, device):
        if not self.model or not self.tokenizer or not self.id2label:
            print('Something wrong has been happened!')
            return

        pt_batch = self.tokenizer(
            input_text,
            padding=True,
            truncation=True,
            max_length=self.config.max_position_embeddings,
            return_tensors="pt"
        )

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()
        pt_batch = pt_batch.to(device)

        pt_outputs = self.model(**pt_batch)
        pt_predictions = torch.argmax(F.softmax(pt_outputs.logits, dim=1), dim=1)

        output_predictions = []
        for i, sentence in enumerate(input_text):
            output_predictions.append((sentence, self.id2label.get(pt_predictions[i].item())))
        return output_predictions

    def mt5_sentiment_analysis_inference(self, reviews, device):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return

        tokenized_batch = self.tokenizer(
            reviews,
            padding=True,
            return_tensors="pt"
        )

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()

        input_ids = tokenized_batch.input_ids.to(device)
        attention_mask = tokenized_batch.attention_mask.to(device)
        outputs = self.model.generate(input_ids=input_ids,
                                      attention_mask=attention_mask)
        predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        return predictions

    def mt5_aspect_sentiment_analysis_inference(self, reviews, aspects, device):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return

        new_input = []
        for r, a in zip(reviews, aspects):
            new_input.append(r + " <sep> " + a)

        tokenized_batch = self.tokenizer(
            new_input,
            padding=True,
            return_tensors="pt"
        )

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()

        input_ids = tokenized_batch.input_ids.to(device)
        attention_mask = tokenized_batch.attention_mask.to(device)
        outputs = self.model.generate(input_ids=input_ids,
                                      attention_mask=attention_mask)
        predictions = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
        return predictions

    def evaluation(self, input_text, input_labels, device, batch_size=4):
        if not self.model or not self.tokenizer or not self.id2label:
            print('Something wrong has been happened!')
            return

        max_len = self.config.max_position_embeddings
        label_list = list(set(input_labels))
        label_count = {self.id2label[label]: input_labels.count(label) for label in label_list}
        print("label_count:", label_count)
        dataset = SentimentAnalysisDataset(comments=input_text, targets=input_labels, tokenizer=self.tokenizer,
                                           max_len=max_len, label_list=label_list)
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)

        print("#samples:", len(input_text))
        print("#batch:", len(data_loader))

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()

        total_loss, total_time = 0, 0
        output_predictions = []
        golden_labels, predicted_labels = [], []
        print("Start to evaluate test data ...")
        for step, batch in enumerate(data_loader):
            b_comments = batch['comment']
            b_input_ids = batch['input_ids']
            b_attention_mask = batch['attention_mask']
            b_token_type_ids = batch['token_type_ids']
            b_targets = batch['targets']

            # move tensors to GPU if CUDA is available
            b_input_ids = b_input_ids.to(device)
            b_attention_mask = b_attention_mask.to(device)
            b_token_type_ids = b_token_type_ids.to(device)
            b_targets = b_targets.to(device)

            # This will return the loss (rather than the model output) because we have provided the `labels`.
            with torch.no_grad():
                start = time.monotonic()
                b_outputs = self.model(input_ids=b_input_ids, attention_mask=b_attention_mask,
                                       token_type_ids=b_token_type_ids, labels=b_targets)
                end = time.monotonic()
                total_time += end - start
                print(f'inference time for step {step}: {end - start}')
            # get the loss
            total_loss += b_outputs.loss.item()

            b_original_targets = batch['original_targets']
            golden_labels.extend(b_original_targets.tolist())

            b_predictions = torch.argmax(F.softmax(b_outputs.logits, dim=1), dim=1)
            b_predictions = b_predictions.cpu().detach().numpy().tolist()
            b_predictions = [dataset.index2label[label] for label in b_predictions]
            predicted_labels.extend(b_predictions)

            for i, comment in enumerate(b_comments):
                output_predictions.append((
                    comment,
                    self.id2label[b_original_targets[i].item()],
                    self.id2label[b_predictions[i]]
                ))
                # print(f'output prediction: {i},{comment},{self.id2label[b_original_targets[i].item()]},'
                #       f'{self.id2label[b_predictions[i]]}')

        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / len(data_loader)
        print("average loss:", avg_train_loss)
        print("total inference time:", total_time)
        print("total inference time / #samples:", total_time / len(input_text))

        # evaluate
        print("Test Accuracy: {}".format(accuracy_score(golden_labels, predicted_labels)))
        print("Test Precision: {}".format(precision_score(golden_labels, predicted_labels, average="weighted")))
        print("Test Recall: {}".format(recall_score(golden_labels, predicted_labels, average="weighted")))
        print("Test F1-Score(weighted average): {}".format(
            f1_score(golden_labels, predicted_labels, average="weighted")))
        print("Test classification Report:\n{}".format(classification_report(
            golden_labels, predicted_labels, digits=10, target_names=[self.id2label[_] for _ in sorted(label_list)])))
        return output_predictions

    def mt5_sentiment_analysis_evaluation(self, reviews, labels, device, max_length, batch_size=4):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return
        if len(reviews) != len(labels):
            print('length of inputs and labels is not equal!!')
            return

        dataset = MT5SentimentAnalysisDataset(reviews=reviews, aspects=None, labels=labels, tokenizer=self.tokenizer,
                                              max_length=max_length)
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
        print(f'#reviews:{len(reviews)}, #labels:{len(labels)}')
        print("#batch:", len(data_loader))

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()

        total_time = 0
        output_predictions = []
        golden_labels, predicted_labels = [], []
        print("Start to evaluate test data ...")
        for step, batch in enumerate(data_loader):
            # move tensors to GPU if CUDA is available
            b_input_ids = batch['input_ids'].to(device)
            b_attention_mask = batch['attention_mask'].to(device)

            # This will return the loss (rather than the model output) because we have provided the `labels`.
            with torch.no_grad():
                start = time.monotonic()
                b_outputs = self.model.generate(input_ids=b_input_ids, attention_mask=b_attention_mask)
                end = time.monotonic()
                total_time += end - start
                print(f'inference time for step {step}: {end - start}')

            b_targets = batch['targets']
            golden_labels.extend(b_targets)

            b_predictions = self.tokenizer.batch_decode(b_outputs, skip_special_tokens=True)
            predicted_labels.extend(b_predictions)

            for i, review in enumerate(batch['review']):
                output_predictions.append((
                    review,
                    b_targets[i],
                    b_predictions[i]
                ))

        print("total inference time:", total_time)
        print("total inference time / #samples:", total_time / len(reviews))

        # evaluate
        print("Test Accuracy: {}".format(accuracy_score(golden_labels, predicted_labels)))
        print("Test Precision: {}".format(precision_score(golden_labels, predicted_labels, average="weighted")))
        print("Test Recall: {}".format(recall_score(golden_labels, predicted_labels, average="weighted")))
        print("Test F1-Score(weighted average): {}".format(
            f1_score(golden_labels, predicted_labels, average="weighted")))
        print("Test classification Report:\n{}".format(
            classification_report(golden_labels, predicted_labels, digits=10)))
        return output_predictions

    def mt5_aspect_sentiment_analysis_evaluation(self, reviews, aspects, labels, device, max_length, batch_size=4):
        if not self.model or not self.tokenizer:
            print('Something wrong has been happened!')
            return
        if len(reviews) != len(labels):
            print('length of inputs and labels is not equal!!')
            return

        dataset = MT5SentimentAnalysisDataset(reviews=reviews, aspects=aspects, labels=labels, tokenizer=self.tokenizer,
                                              max_length=max_length)
        data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)
        print(f'#reviews:{len(reviews)}, #aspects:{len(aspects)}, #labels:{len(labels)}')
        print("#batch:", len(data_loader))

        gc.collect()
        torch.cuda.empty_cache()
        # Tell pytorch to run this model on the GPU.
        if device.type != 'cpu':
            self.model.cuda()
        self.model.eval()

        total_time = 0
        output_predictions = []
        golden_labels, predicted_labels = [], []
        print("Start to evaluate test data ...")
        for step, batch in enumerate(data_loader):
            # move tensors to GPU if CUDA is available
            b_input_ids = batch['input_ids'].to(device)
            b_attention_mask = batch['attention_mask'].to(device)

            # This will return the loss (rather than the model output) because we have provided the `labels`.
            with torch.no_grad():
                start = time.monotonic()
                b_outputs = self.model.generate(input_ids=b_input_ids, attention_mask=b_attention_mask)
                end = time.monotonic()
                total_time += end - start
                print(f'inference time for step {step}: {end - start}')

            b_targets = batch['targets']
            golden_labels.extend(b_targets)

            b_predictions = self.tokenizer.batch_decode(b_outputs, skip_special_tokens=True)
            predicted_labels.extend(b_predictions)

            for i, review in enumerate(batch['review']):
                output_predictions.append((
                    review,
                    batch['aspects'][i],
                    b_targets[i],
                    b_predictions[i]
                ))

        print("total inference time:", total_time)
        print("total inference time / #samples:", total_time / len(reviews))

        # evaluate
        print("Test Accuracy: {}".format(accuracy_score(golden_labels, predicted_labels)))
        print("Test Precision: {}".format(precision_score(golden_labels, predicted_labels, average="weighted")))
        print("Test Recall: {}".format(recall_score(golden_labels, predicted_labels, average="weighted")))
        print("Test F1-Score(weighted average): {}".format(
            f1_score(golden_labels, predicted_labels, average="weighted")))
        print("Test classification Report:\n{}".format(
            classification_report(golden_labels, predicted_labels, digits=10)))
        return output_predictions


In [6]:
model_name='HooshvareLab/bert-fa-base-uncased-sentiment-deepsentipers-binary'
sa_model = SentimentAnalysis(model_name)
print(sa_model.config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=641.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1198122.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=62.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=651458839.0, style=ProgressStyle(descri…


BertConfig {
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "finetuning_task": "deepsentipers",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "negative",
    "1": "positive"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "negative": 0,
    "positive": 1
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.7.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 100000
}



## Sample Inference:

In [7]:
texts = [
    "خوب نبود اصلا",
    "از رنگش خوشم نیومد",
    "کیفیتیش عالی بود"
]

In [8]:
sa_model.sentiment_analysis_inference(texts, device)

[('خوب نبود اصلا', 'positive'),
 ('از رنگش خوشم نیومد', 'negative'),
 ('کیفیتیش عالی بود', 'positive')]

## DeepSentiPers([paper](https://arxiv.org/pdf/2004.05328.pdf))
which is a balanced and augmented version of SentiPers, contains ?12,138 user opinions about digital products labeled with five different classes; two positives (i.e., happy and delighted), two negatives (i.e., furious and angry) and one neutral class. Therefore, this dataset can be utilized for both multi-class and binary classification. In the case of binary classification, the neutral class and its corresponding sentences are removed from the dataset.

Binary:
1. Negative (Furious + Angry)
2. Positive (Happy + Delighted)

Multi:
1. Furious(-2)
2. Angry(-1)
3. Neutral(0)
4. Happy(1)
5. Delighted(2)

Test set statistics (binary version):

|          Label         | # | 
|:------------------------:|:-----------:|
|  Positive  |      915    |
|  Negative |      196      |



		
Download You can download the dataset from 
* [SentiPers](https://github.com/phosseini/sentipers)
* [DeepSentiPers](https://github.com/JoyeBright/DeepSentiPers)

In [9]:
!git clone https://github.com/JoyeBright/DeepSentiPers
!ls DeepSentiPers
!ls DeepSentiPers/Dataset

Cloning into 'DeepSentiPers'...
remote: Enumerating objects: 2264, done.[K
remote: Counting objects: 100% (109/109), done.[K
remote: Compressing objects: 100% (109/109), done.[K
remote: Total 2264 (delta 70), reused 0 (delta 0), pack-reused 2155[K
Receiving objects: 100% (2264/2264), 22.02 MiB | 18.90 MiB/s, done.
Resolving deltas: 100% (1252/1252), done.
 Binary-Classification	 Dataset		     README.md
 _config.yml		 Images			     Results.xlsx
'Data Augmentation'	 Multiclass-Classification
balanced.csv  original.csv  test.csv  translation.csv


Run on `test` set:

In [10]:
test_comments, test_labels = sa_model.load_dataset_test_file(
    dataset_name="deepsentipers", 
    dataset_file="./DeepSentiPers/Dataset/test.csv", 
    label_map={-2: "negative", -1: "negative", 0: None, 1: "positive", 2: "positive"}
)
print(test_comments[:5])
print(test_labels[:5])
print(len(test_comments))
print(len(test_labels))

test part:
 #comment: 1111, #labels: 1111
['اندازه\u200cی خوبی داره.', 'با این چیزا نمیتونه از galaxy s iii بهتر باشه', 'سرعت اجرا بسیار بالا است و مصرف باتری نیز مناسب است.', 'از حساسیت ۴۰۰ مقداری نویز در عکس\u200cها مشاهده می\u200cشود اما همچنان جزئیات عکس\u200cها خیلی خوب پیدا هستند.', 'در کل، با اینکه عکاسی با تبلت را همواره جزو موارد غیر ضروری نامیده\u200cایم، ولی در مورد این دستگاه برای کسانی که علاقه\u200cمند به عکاسی نیز هستند، هواوی شرایطی را فراهم نموده است که کاملا آن\u200cها را راضی خواهد نمود.']
[1, 0, 1, 1, 1]
1111
1111


In [11]:
sa_model.sentiment_analysis_inference(test_comments[:5], device)

[('اندازه\u200cی خوبی داره.', 'positive'),
 ('با این چیزا نمیتونه از galaxy s iii بهتر باشه', 'negative'),
 ('سرعت اجرا بسیار بالا است و مصرف باتری نیز مناسب است.', 'positive'),
 ('از حساسیت ۴۰۰ مقداری نویز در عکس\u200cها مشاهده می\u200cشود اما همچنان جزئیات عکس\u200cها خیلی خوب پیدا هستند.',
  'positive'),
 ('در کل، با اینکه عکاسی با تبلت را همواره جزو موارد غیر ضروری نامیده\u200cایم، ولی در مورد این دستگاه برای کسانی که علاقه\u200cمند به عکاسی نیز هستند، هواوی شرایطی را فراهم نموده است که کاملا آن\u200cها را راضی خواهد نمود.',
  'positive')]

In [12]:
!nvidia-smi
!lscpu

Tue Aug  3 05:13:20 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    74W / 149W |   1289MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [13]:
evaluation_output = sa_model.evaluation(test_comments, test_labels, device, batch_size=128)

label_count: {'negative': 196, 'positive': 915}
#samples: 1111
#batch: 9
Start to evaluate test data ...
inference time for step 0: 0.03284857600002056
inference time for step 1: 0.013714521999986573
inference time for step 2: 0.012826408999984551
inference time for step 3: 0.013038536999999906
inference time for step 4: 0.012753513999996358
inference time for step 5: 0.013067261999992752
inference time for step 6: 0.012315060000048561
inference time for step 7: 0.01261467099999436
inference time for step 8: 0.013332252999987304
average loss: 0.25060727405879235
total inference time: 0.13651080400001092
total inference time / #samples: 0.00012287201080108995
Test Accuracy: 0.9495949594959496
Test Precision: 0.9502752255423563
Test Recall: 0.9495949594959496
Test F1-Score(weighted average): 0.9498919939600413
Test classification Report:
              precision    recall  f1-score   support

    negative  0.8465346535 0.8724489796 0.8592964824       196
    positive  0.9724972497 0.96612

In [14]:
for comment, true_label, predicted_label in evaluation_output[:25]:
  print('{}\t{}\t{}'.format(comment, true_label, predicted_label))

اندازه‌ی خوبی داره.	positive	positive
با این چیزا نمیتونه از galaxy s iii بهتر باشه	negative	negative
سرعت اجرا بسیار بالا است و مصرف باتری نیز مناسب است.	positive	positive
از حساسیت ۴۰۰ مقداری نویز در عکس‌ها مشاهده می‌شود اما همچنان جزئیات عکس‌ها خیلی خوب پیدا هستند.	positive	positive
در کل، با اینکه عکاسی با تبلت را همواره جزو موارد غیر ضروری نامیده‌ایم، ولی در مورد این دستگاه برای کسانی که علاقه‌مند به عکاسی نیز هستند، هواوی شرایطی را فراهم نموده است که کاملا آن‌ها را راضی خواهد نمود.	positive	positive
به هر صورت دیدن یک نمایشگری لمسی بر روی دوربینی در این رده‌ی قیمت بسیار عالیست.	positive	positive
پهنای باند حافظه پهنای باندی که در حافظه‌ی موجود وجود دارد، حدود ۲۵۶ گیگابایت در ثانیه است که ارتباط سریع میان واحدها را با کمترین تاخیر مهیا می‌کند.	positive	positive
سنسور با رزولوشن بالا، توانایی فیلم برداری ۷۲۰p hd و طراحی و ساخت زیبا و باریک نیز جزو ویژگی همه‌ی دوربین‌های سری a امسال محسوب می‌گردند.	positive	positive
همه میدانیم که این گوشی از سیستم عامل ios۶ پشتیبانی میکند سیستم عام

In [15]:
output_file_name = "sentiment_analysis_deepsentipers_binary_testset_{}_outputs.txt".format(model_name.replace('/','-'))
with open(output_file_name, "w", encoding='utf8') as output_file:
  for comment, true_label, predicted_label in evaluation_output:
    output_file.write('{}\t{}\t{}\n'.format(comment, true_label, predicted_label))
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
upload = drive.CreateFile({'title': output_file_name})
upload.SetContentFile(output_file_name)
upload.Upload()