In [1]:
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import get_scheduler
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler, random_split
import evaluate as evaluate
from tqdm import tqdm
import matplotlib
import matplotlib.pyplot as plt
import copy
import json

In [2]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cuda'

batch_size = 1
train_mask = range(50)
model_name = 'distilbert-base-uncased'
leaning_model_dir = './saved_models/leaning/'
hyperpartisan_model_dir = './saved_models/hyperpartisan/'


In [35]:
from transformers import DistilBertTokenizerFast

class ArticleDataset(Dataset):
    def __init__(self, json_file):
        with open(json_file, 'r', encoding='utf-8') as input_file:
            self.data = json.load(input_file)
        self.articles = []
        self.articles.append(self.data['match']['docs'][0]['body'][0])
        for doc in self.data['response']['docs']:
            self.articles.append(doc['body'][0])
        self.tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

    def __len__(self):
        return len(self.articles)
    def __getitem__(self, idx):
        x_token = self.tokenizer(self.articles[idx],
                                 padding='max_length',
                                 max_length=512,
                                 truncation=True,
                                 return_tensors='pt')        
        return {'id':x_token['input_ids'][0], 'attention_mask':x_token['attention_mask'][0]}

In [36]:
dataset = ArticleDataset(json_file='./M1_output.json')
dataloader = DataLoader(dataset, shuffle=False, batch_size = 1)

In [38]:
def evaluate_model(model, dataloader, device, acc_only=True):
    """ Evaluate a PyTorch Model
    :param torch.nn.Module model: the model to be evaluated
    :param torch.utils.data.DataLoader test_dataloader: DataLoader containing testing examples
    :param torch.device device: the device that we'll be training on
    :param bool acc_only: return only accuracy if true, else also return ground truth and pred as tuple
    :return accuracy (also return ground truth and pred as tuple if acc_only=False)
    """
    
    # turn model into evaluation mode
    model.eval()

    #Y_true and Y_pred store for epoch
    Y_true = []
    Y_pred = []
    val_acc_batch = []
    
    
    val_accuracy_batch = evaluate.load('accuracy')
    
    label = []
    for batch in dataloader:
        input_ids = batch['id'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        output = model(input_ids=input_ids, attention_mask=attention_mask)
        
       
        predictions = output.logits
        predictions = torch.argmax(predictions, dim=1)
        label.append(predictions)
    
    return label

In [39]:
hyperpartisan_model = AutoModelForSequenceClassification.from_pretrained(hyperpartisan_model_dir)
hyperpartisan_model.to(device)
leaning_model = AutoModelForSequenceClassification.from_pretrained(leaning_model_dir)
leaning_model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [44]:
leaning_predictions = evaluate_model(leaning_model, dataloader, device)
hyperpartisan_predictions = evaluate_model(hyperpartisan_model, dataloader, device)

tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([3], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([2], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0], de

In [47]:
def get_political_perspective(leaning, hyperpartisan):
    return (leaning-1) * (hyperpartisan+1)

In [52]:
coarse_perspectives = []
for i in range(len(leaning_predictions)):
    coarse_perspectives.append(int(get_political_perspective(leaning_predictions[i], hyperpartisan_predictions[i]).data))

In [53]:
print(coarse_perspectives)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, 0, 4, 0, -2, 2, 0, 0, 0]


In [29]:
# def label_to_hyperpartisan(label):
#     if label == 0:
#         return 'false'
#     elif label == 1:
#         return 'true'
    
# def label_to_leaning(label):
#     if label == 0:
#         return 'left'
#     elif label == 1:
#         return 'right'
#     elif label == 2:
#         return 'center'
#     elif label == 3:
#         return 'undefined'

In [30]:
# print(label_to_hyperpartisan(int(hyperpartisan_predictions[0].data)))
# print(label_to_leaning(int(leaning_predictions[0].data)))

true


In [None]:
# print(f"Political Leaning: {label_to_leaning(int(leaning_prediction[0].data))}")
# print(f"Is Hyperpartisan: {label_to_hyperpartisan(int(hyperpartisan_prediction[0].data))}")