In [20]:
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

class BertCNNBiLSTM(nn.Module):
    def __init__(self, bert_model_name='bert-base-uncased', num_classes=3, lstm_hidden_dim=256, cnn_out_channels=128, dropout_rate=0.5):
        super(BertCNNBiLSTM, self).__init__()

        self.bert = BertModel.from_pretrained(bert_model_name)
        self.hidden_size = self.bert.config.hidden_size

        self.conv1 = nn.Conv1d(in_channels=self.hidden_size, out_channels=cnn_out_channels, kernel_size=2)
        self.conv2 = nn.Conv1d(in_channels=self.hidden_size, out_channels=cnn_out_channels, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=self.hidden_size, out_channels=cnn_out_channels, kernel_size=5)

        self.bilstm = nn.LSTM(input_size=cnn_out_channels * 3, hidden_size=lstm_hidden_dim, bidirectional=True, batch_first=True)

        self.fc = nn.Linear(lstm_hidden_dim * 2, num_classes)  # BiLSTM outputs hidden_dim * 2

        self.dropout = nn.Dropout(dropout_rate)

        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state

        x = last_hidden_state.permute(0, 2, 1)

        x1 = torch.relu(self.conv1(x))
        x2 = torch.relu(self.conv2(x))
        x3 = torch.relu(self.conv3(x))

        x1 = torch.max(x1, dim=2).values
        x2 = torch.max(x2, dim=2).values
        x3 = torch.max(x3, dim=2).values

        cnn_features = torch.cat((x1, x2, x3), dim=1)

        lstm_input = cnn_features.unsqueeze(1)

        lstm_out, _ = self.bilstm(lstm_input)
        lstm_out = lstm_out.squeeze(1)

        lstm_out = self.dropout(lstm_out)


        logits = self.fc(lstm_out)

        return self.softmax(logits)


In [21]:
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import numpy as np

class PoliticalDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        text = self.texts[item]
        label = self.labels[item]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.long)
        }

def calculate_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    return accuracy, precision, recall, f1

def train_model(model, train_dataloader, val_dataloader, optimizer, loss_fn, num_epochs=4, device='cuda'):
    model = model.to(device)
    best_f1 = 0
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        all_preds = []
        all_labels = []

        for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            print(f"we have a batch with {len(input_ids)} elements")

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs

            loss = loss_fn(logits, labels)
            total_loss += loss.item()

            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        avg_train_loss = total_loss / len(train_dataloader)
        accuracy, precision, recall, f1 = calculate_metrics(all_preds, all_labels)

        print(f"Epoch {epoch+1}:")
        print(f"Train Loss: {avg_train_loss:.4f}")
        print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

        model.eval()
        val_preds = []
        val_labels = []

        with torch.no_grad():
            for batch in val_dataloader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                logits = outputs

                preds = torch.argmax(logits, dim=1)
                val_preds.extend(preds.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        accuracy, precision, recall, f1 = calculate_metrics(val_preds, val_labels)
        print(f"Validation Metrics - Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    return model

df = pd.read_csv('all_years_merged.csv')
# df = df.sample(n=100, random_state=42)

label_map = {'left': 0, 'right': 1, 'center': 2}
df['Leaning'] = df['Leaning'].map(label_map)

batch_size = 32

train_texts, val_texts, train_labels, val_labels = train_test_split(df['Text'].tolist(), df['Leaning'].tolist(), test_size=0.1)

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

train_dataset = PoliticalDataset(train_texts, train_labels, tokenizer, max_len=128)
val_dataset = PoliticalDataset(val_texts, val_labels, tokenizer, max_len=128)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

model = BertCNNBiLSTM(num_classes=3)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-5)

train_model(model, train_dataloader, val_dataloader, optimizer, loss_fn, num_epochs=4, device='cuda')



Epoch 1/4:   0%|          | 0/27 [00:00<?, ?it/s]

we have a batch with 32 elements


Epoch 1/4:   4%|▎         | 1/27 [00:00<00:19,  1.36it/s]

we have a batch with 32 elements


Epoch 1/4:   7%|▋         | 2/27 [00:01<00:16,  1.52it/s]

we have a batch with 32 elements


Epoch 1/4:  11%|█         | 3/27 [00:01<00:15,  1.58it/s]

we have a batch with 32 elements


Epoch 1/4:  15%|█▍        | 4/27 [00:02<00:14,  1.58it/s]

we have a batch with 32 elements


Epoch 1/4:  19%|█▊        | 5/27 [00:03<00:13,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  22%|██▏       | 6/27 [00:03<00:12,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  26%|██▌       | 7/27 [00:04<00:12,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  30%|██▉       | 8/27 [00:05<00:11,  1.63it/s]

we have a batch with 32 elements


Epoch 1/4:  33%|███▎      | 9/27 [00:05<00:11,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  37%|███▋      | 10/27 [00:06<00:10,  1.63it/s]

we have a batch with 32 elements


Epoch 1/4:  41%|████      | 11/27 [00:06<00:09,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  44%|████▍     | 12/27 [00:07<00:09,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  48%|████▊     | 13/27 [00:08<00:08,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  52%|█████▏    | 14/27 [00:08<00:08,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  56%|█████▌    | 15/27 [00:09<00:07,  1.62it/s]

we have a batch with 32 elements


Epoch 1/4:  59%|█████▉    | 16/27 [00:09<00:06,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  63%|██████▎   | 17/27 [00:10<00:06,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  67%|██████▋   | 18/27 [00:11<00:05,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  70%|███████   | 19/27 [00:11<00:04,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  74%|███████▍  | 20/27 [00:12<00:04,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  78%|███████▊  | 21/27 [00:13<00:03,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  81%|████████▏ | 22/27 [00:13<00:03,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  85%|████████▌ | 23/27 [00:14<00:02,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4:  89%|████████▉ | 24/27 [00:14<00:01,  1.60it/s]

we have a batch with 32 elements


Epoch 1/4:  93%|█████████▎| 25/27 [00:15<00:01,  1.61it/s]

we have a batch with 32 elements


Epoch 1/4: 100%|██████████| 27/27 [00:16<00:00,  1.61it/s]

we have a batch with 21 elements
Epoch 1:
Train Loss: 1.0880
Accuracy: 0.4596, Precision: 0.4744, Recall: 0.4596, F1: 0.4154





Validation Metrics - Accuracy: 0.5789, Precision: 0.7894, Recall: 0.5789, F1: 0.5117


Epoch 2/4:   0%|          | 0/27 [00:00<?, ?it/s]

we have a batch with 32 elements


Epoch 2/4:   4%|▎         | 1/27 [00:00<00:07,  3.59it/s]

we have a batch with 32 elements


Epoch 2/4:   7%|▋         | 2/27 [00:00<00:12,  2.07it/s]

we have a batch with 32 elements


Epoch 2/4:  11%|█         | 3/27 [00:01<00:13,  1.79it/s]

we have a batch with 32 elements


Epoch 2/4:  15%|█▍        | 4/27 [00:02<00:13,  1.71it/s]

we have a batch with 32 elements


Epoch 2/4:  19%|█▊        | 5/27 [00:02<00:13,  1.67it/s]

we have a batch with 32 elements


Epoch 2/4:  22%|██▏       | 6/27 [00:03<00:12,  1.63it/s]

we have a batch with 32 elements


Epoch 2/4:  26%|██▌       | 7/27 [00:04<00:12,  1.61it/s]

we have a batch with 32 elements


Epoch 2/4:  30%|██▉       | 8/27 [00:04<00:11,  1.60it/s]

we have a batch with 32 elements


Epoch 2/4:  33%|███▎      | 9/27 [00:05<00:11,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  37%|███▋      | 10/27 [00:06<00:10,  1.58it/s]

we have a batch with 32 elements


Epoch 2/4:  41%|████      | 11/27 [00:06<00:10,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  44%|████▍     | 12/27 [00:07<00:09,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  48%|████▊     | 13/27 [00:07<00:09,  1.55it/s]

we have a batch with 32 elements


Epoch 2/4:  52%|█████▏    | 14/27 [00:08<00:08,  1.58it/s]

we have a batch with 32 elements


Epoch 2/4:  56%|█████▌    | 15/27 [00:09<00:07,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  59%|█████▉    | 16/27 [00:09<00:07,  1.56it/s]

we have a batch with 32 elements


Epoch 2/4:  63%|██████▎   | 17/27 [00:10<00:06,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  67%|██████▋   | 18/27 [00:11<00:05,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  70%|███████   | 19/27 [00:11<00:05,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  74%|███████▍  | 20/27 [00:12<00:04,  1.58it/s]

we have a batch with 32 elements


Epoch 2/4:  78%|███████▊  | 21/27 [00:13<00:03,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  81%|████████▏ | 22/27 [00:13<00:03,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  85%|████████▌ | 23/27 [00:14<00:02,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  89%|████████▉ | 24/27 [00:14<00:01,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4:  93%|█████████▎| 25/27 [00:15<00:01,  1.57it/s]

we have a batch with 32 elements


Epoch 2/4: 100%|██████████| 27/27 [00:16<00:00,  1.61it/s]

we have a batch with 21 elements
Epoch 2:
Train Loss: 0.9929
Accuracy: 0.6249, Precision: 0.6304, Recall: 0.6249, F1: 0.5890





Validation Metrics - Accuracy: 0.7579, Precision: 0.8000, Recall: 0.7579, F1: 0.7603


Epoch 3/4:   0%|          | 0/27 [00:00<?, ?it/s]

we have a batch with 32 elements


Epoch 3/4:   4%|▎         | 1/27 [00:00<00:07,  3.48it/s]

we have a batch with 32 elements


Epoch 3/4:   7%|▋         | 2/27 [00:00<00:12,  2.00it/s]

we have a batch with 32 elements


Epoch 3/4:  11%|█         | 3/27 [00:01<00:13,  1.76it/s]

we have a batch with 32 elements


Epoch 3/4:  15%|█▍        | 4/27 [00:02<00:13,  1.66it/s]

we have a batch with 32 elements


Epoch 3/4:  19%|█▊        | 5/27 [00:02<00:13,  1.62it/s]

we have a batch with 32 elements


Epoch 3/4:  22%|██▏       | 6/27 [00:03<00:13,  1.60it/s]

we have a batch with 32 elements


Epoch 3/4:  26%|██▌       | 7/27 [00:04<00:12,  1.58it/s]

we have a batch with 32 elements


Epoch 3/4:  30%|██▉       | 8/27 [00:04<00:12,  1.57it/s]

we have a batch with 32 elements


Epoch 3/4:  33%|███▎      | 9/27 [00:05<00:11,  1.56it/s]

we have a batch with 32 elements


Epoch 3/4:  37%|███▋      | 10/27 [00:06<00:10,  1.55it/s]

we have a batch with 32 elements


Epoch 3/4:  41%|████      | 11/27 [00:06<00:10,  1.55it/s]

we have a batch with 32 elements


Epoch 3/4:  44%|████▍     | 12/27 [00:07<00:09,  1.54it/s]

we have a batch with 32 elements


Epoch 3/4:  48%|████▊     | 13/27 [00:08<00:09,  1.54it/s]

we have a batch with 32 elements


Epoch 3/4:  52%|█████▏    | 14/27 [00:08<00:08,  1.54it/s]

we have a batch with 32 elements


Epoch 3/4:  56%|█████▌    | 15/27 [00:09<00:07,  1.53it/s]

we have a batch with 32 elements


Epoch 3/4:  59%|█████▉    | 16/27 [00:10<00:07,  1.53it/s]

we have a batch with 32 elements


Epoch 3/4:  63%|██████▎   | 17/27 [00:10<00:06,  1.53it/s]

we have a batch with 32 elements


Epoch 3/4:  67%|██████▋   | 18/27 [00:11<00:05,  1.53it/s]

we have a batch with 32 elements


Epoch 3/4:  70%|███████   | 19/27 [00:12<00:05,  1.52it/s]

we have a batch with 32 elements


Epoch 3/4:  74%|███████▍  | 20/27 [00:12<00:04,  1.52it/s]

we have a batch with 32 elements


Epoch 3/4:  78%|███████▊  | 21/27 [00:13<00:03,  1.52it/s]

we have a batch with 32 elements


Epoch 3/4:  81%|████████▏ | 22/27 [00:14<00:03,  1.51it/s]

we have a batch with 32 elements


Epoch 3/4:  85%|████████▌ | 23/27 [00:14<00:02,  1.51it/s]

we have a batch with 32 elements


Epoch 3/4:  89%|████████▉ | 24/27 [00:15<00:01,  1.51it/s]

we have a batch with 32 elements


Epoch 3/4:  93%|█████████▎| 25/27 [00:16<00:01,  1.51it/s]

we have a batch with 32 elements


Epoch 3/4: 100%|██████████| 27/27 [00:17<00:00,  1.56it/s]

we have a batch with 21 elements
Epoch 3:
Train Loss: 0.8405
Accuracy: 0.8230, Precision: 0.8240, Recall: 0.8230, F1: 0.8201





Validation Metrics - Accuracy: 0.8737, Precision: 0.8851, Recall: 0.8737, F1: 0.8708


Epoch 4/4:   0%|          | 0/27 [00:00<?, ?it/s]

we have a batch with 32 elements


Epoch 4/4:   4%|▎         | 1/27 [00:00<00:07,  3.37it/s]

we have a batch with 32 elements


Epoch 4/4:   7%|▋         | 2/27 [00:00<00:12,  1.94it/s]

we have a batch with 32 elements


Epoch 4/4:  11%|█         | 3/27 [00:01<00:14,  1.71it/s]

we have a batch with 32 elements


Epoch 4/4:  15%|█▍        | 4/27 [00:02<00:14,  1.60it/s]

we have a batch with 32 elements


Epoch 4/4:  19%|█▊        | 5/27 [00:02<00:14,  1.56it/s]

we have a batch with 32 elements


Epoch 4/4:  22%|██▏       | 6/27 [00:03<00:13,  1.53it/s]

we have a batch with 32 elements


Epoch 4/4:  26%|██▌       | 7/27 [00:04<00:13,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  30%|██▉       | 8/27 [00:05<00:12,  1.52it/s]

we have a batch with 32 elements


Epoch 4/4:  33%|███▎      | 9/27 [00:05<00:11,  1.51it/s]

we have a batch with 32 elements


Epoch 4/4:  37%|███▋      | 10/27 [00:06<00:11,  1.50it/s]

we have a batch with 32 elements


Epoch 4/4:  41%|████      | 11/27 [00:07<00:10,  1.50it/s]

we have a batch with 32 elements


Epoch 4/4:  44%|████▍     | 12/27 [00:07<00:10,  1.49it/s]

we have a batch with 32 elements


Epoch 4/4:  48%|████▊     | 13/27 [00:08<00:09,  1.48it/s]

we have a batch with 32 elements


Epoch 4/4:  52%|█████▏    | 14/27 [00:09<00:08,  1.48it/s]

we have a batch with 32 elements


Epoch 4/4:  56%|█████▌    | 15/27 [00:09<00:08,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  59%|█████▉    | 16/27 [00:10<00:07,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  63%|██████▎   | 17/27 [00:11<00:06,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  67%|██████▋   | 18/27 [00:11<00:06,  1.48it/s]

we have a batch with 32 elements


Epoch 4/4:  70%|███████   | 19/27 [00:12<00:05,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  74%|███████▍  | 20/27 [00:13<00:04,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  78%|███████▊  | 21/27 [00:13<00:04,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  81%|████████▏ | 22/27 [00:14<00:03,  1.46it/s]

we have a batch with 32 elements


Epoch 4/4:  85%|████████▌ | 23/27 [00:15<00:02,  1.46it/s]

we have a batch with 32 elements


Epoch 4/4:  89%|████████▉ | 24/27 [00:15<00:02,  1.47it/s]

we have a batch with 32 elements


Epoch 4/4:  93%|█████████▎| 25/27 [00:16<00:01,  1.46it/s]

we have a batch with 32 elements


Epoch 4/4: 100%|██████████| 27/27 [00:17<00:00,  1.51it/s]

we have a batch with 21 elements
Epoch 4:
Train Loss: 0.6672
Accuracy: 0.9543, Precision: 0.9542, Recall: 0.9543, F1: 0.9542





Validation Metrics - Accuracy: 0.8842, Precision: 0.9005, Recall: 0.8842, F1: 0.8813


BertCNNBiLSTM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise

In [13]:
print(df['Leaning'].unique())


[0 1 2]


In [22]:
import torch
from transformers import BertTokenizer
from torch.nn import functional as F

def classify_text(input_text, model, device='cuda'):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    model.eval()

    encoding = tokenizer.encode_plus(
        input_text,
        add_special_tokens=True,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs

        preds = torch.argmax(logits, dim=1).item()

    label_map_inv = {0: 'left', 1: 'right', 2: 'center'}
    predicted_label = label_map_inv[preds]

    return predicted_label


def is_biased(input_text, model):
    predicted_label = classify_text(input_text, model)
    return predicted_label != 'center'

In [23]:
texts = [
    "Trump is the best",
    "Biden is the worst",
    "The Republican Party stands for freedom and capitalism.",
    "Democrats are champions of equality and social justice.",
    "Progressive policies are the way forward for America.",
    "Right-wing extremists are the biggest threat to democracy.",
    "Neither Democrats nor Republicans have all the answers.",
    "It's best to vote for the Republican candidate, as he will conserve America's values."
]

is_biased_list = [is_biased(text, model) for text in texts]

for text, leaning in zip(texts, is_biased_list):
    print(f"Text: {text}\nIs biased?: {leaning}\n")


Text: Trump is the best
Is biased?: True

Text: Biden is the worst
Is biased?: True

Text: The Republican Party stands for freedom and capitalism.
Is biased?: True

Text: Democrats are champions of equality and social justice.
Is biased?: True

Text: Progressive policies are the way forward for America.
Is biased?: True

Text: Right-wing extremists are the biggest threat to democracy.
Is biased?: True

Text: Neither Democrats nor Republicans have all the answers.
Is biased?: True

Text: It's best to vote for the Republican candidate, as he will conserve America's values.
Is biased?: True



In [12]:
!pip install Dbias
from Dbias.bias_classification import *

# returns classification label for a given sentence fragment.
classifier("Nevertheless, Trump and other Republicans have tarred the protests as havens for terrorists intent on destroying property.")




tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/268M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at d4data/bias-detection-model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


[{'label': 'Biased', 'score': 0.9938021898269653}]

In [40]:
my_model_predictions = [is_biased(text, model) for text in val_texts]

In [41]:
my_model_predictions[4]

False

In [42]:



# Extract Dbias bias predictions (True/False)
dbias_predictions = [classifier(text) for text in val_texts]

In [43]:
dbias_predictions[4]

[{'label': 'Non-biased', 'score': 0.6072980761528015}]

In [44]:
dbias_predictions_bool = [pred[0]['label'] == 'Biased' for pred in dbias_predictions]

In [45]:
for i in range(5):
  print(f"{dbias_predictions[i]} and {dbias_predictions_bool[i]}")

[{'label': 'Biased', 'score': 0.9938315153121948}] and True
[{'label': 'Biased', 'score': 0.7543418407440186}] and True
[{'label': 'Biased', 'score': 0.8397212624549866}] and True
[{'label': 'Biased', 'score': 0.8203869462013245}] and True
[{'label': 'Non-biased', 'score': 0.6072980761528015}] and False


In [47]:
val_labels[:5]

[0, 0, 1, 2, 2]

In [48]:
val_binary_labels = [False if label == 2 else True for label in val_labels]

In [49]:
acc_mine = accuracy_score(val_binary_labels, my_model_predictions)
acc_theirs = accuracy_score(val_binary_labels, dbias_predictions_bool)

print(f"mine={acc_mine} theirs={acc_theirs}")

mine=0.9052631578947369 theirs=0.7473684210526316
