# Classify your text's political stance using pretrained models.

This notes let you classify your own text using pre-trained neural network with BERT layer, and neural network with Longformer layer. (Unfortunately, we have lost the pretrained model of MLP with fasttext.)

First of all, downloads all 'pkl' files from [this link](https://drive.google.com/drive/folders/11sa5UQrHWCLSmS9mHN36JfFrB7BLmf9z?usp=sharing) and modify the path of those pickle file in below. Also, modify the path of your text file and its (expected) political stance in below.

In [1]:
path_pickle_files = '/content/drive/MyDrive/Data_NMA_Polaris/Trained_models/'
path_for_text = './nyt_article.txt' # Change it with your own text files.
political_orientation = 'left' # Choose 'left', 'center', or 'right'

In [2]:
!pip install transformers --quiet
import pandas as pd
import pickle
import numpy as np

# For the Transformers models
import torch
from transformers import BertTokenizer,LongformerModel,LongformerTokenizer
from torch import nn

Next, make your text as a pandas dataframe.

In [3]:
with open(path_for_text) as f:
    lines = f.readlines()
    text = ' '.join(lines)
df = pd.DataFrame([[text,political_orientation]], columns =['text', 'relative_stance']) 

Now, load BERT model first and check its result.

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
labels = {'left':0,
          'center':1,
          'right':2,
          }

class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = [labels[label] for label in df['relative_stance']]
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for text in df['text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y
class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer
with open(path_pickle_files+'model_bert5_from_kaggle.pkl','rb') as f:
  model = pickle.load(f)

test = Dataset(df)
test_dataloader = torch.utils.data.DataLoader(test)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

if use_cuda:
  model = model.cuda()

with torch.no_grad():
  for test_input, test_label in test_dataloader:
    test_label = test_label.to(device)
    mask = test_input['attention_mask'].to(device)
    input_id = test_input['input_ids'].squeeze(1).to(device)
    output = model(input_id, mask)
    result=output.argmax(dim=1).cpu().detach().numpy().tolist()[0]

if (result == 0):
  print('BERT-prediction:left')
elif (result == 1):
  print('BERT-prediction:center')
else:
  print('BERT-prediction:right')

BERT-prediction:left


Now, load Longformer model first and check its result.

In [5]:
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
labels = {'left':0,
          'center':1,
          'right':2}

class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):

        self.labels = [labels[label] for label in df['relative_stance']]
        self.texts = [tokenizer(text, 
                               padding='max_length', max_length = 1000, truncation=True,
                                return_tensors="pt") for text in df['text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):

        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_texts, batch_y

class LongformerClassifier(nn.Module):

    def __init__(self, dropout=0.5):

        super(LongformerClassifier, self).__init__()

        self.bert = LongformerModel.from_pretrained('allenai/longformer-base-4096')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer


with open(path_pickle_files+'Copy of model_Longformer0.pkl','rb') as f:
  model = pickle.load(f)

test = Dataset(df)
test_dataloader = torch.utils.data.DataLoader(test)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

if use_cuda:
  model = model.cuda()

with torch.no_grad():
  for test_input, test_label in test_dataloader:
    test_label = test_label.to(device)
    mask = test_input['attention_mask'].to(device)
    input_id = test_input['input_ids'].squeeze(1).to(device)
    output = model(input_id, mask)
    result=output.argmax(dim=1).cpu().detach().numpy().tolist()[0]

if (result == 0):
  print('Longformer-prediction:left')
elif (result == 1):
  print('Longformer-prediction:center')
else:
  print('Longformer-prediction:right')

Longformer-prediction:left
