In [1]:
from transformers import AutoTokenizer, AutoModel
from torch import nn
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [52]:
with open('../label_encoder.pkl', 'rb') as f:
    le = pickle.load(f)

array([1305.])

In [3]:


class PolitcalModel(nn.Module):
    def __init__(self, model_name, num_classes):
        # Initialize Longformer
        super(PolitcalModel, self).__init__()
        self.model = AutoModel.from_pretrained(model_name)
        # Create a dropout layer
        self.dropout = nn.Dropout(p=0.2)
        # Create a classification head (Linear layer that maps hidden dim -> num_classes)
        self.topic_head = nn.Linear(self.model.config.hidden_size, num_classes )
        # Create a regression head (Linear layer that maps hidden dim -> 1)
        self.stance_head = nn.Linear(self.model.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        # Pass inputs through Longformer
        outputs = self.model(input_ids = input_ids, attention_mask = attention_mask)
        # Get the pooled output (usually from CLS token or mean of last layer)
        last_hidden_state = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        masked_embeddings = last_hidden_state * mask
        x = masked_embeddings.sum(dim=1) / mask.sum(dim=1)
        # Pass that to dropout
        x = self.dropout(x)
        # Pass into classification head → topic logits
        topic_logits = self.topic_head(x)
        # Pass into regression head → stance prediction
        stance_pred = self.stance_head(x)
        # Return both outputs
        return topic_logits, stance_pred
    



In [4]:
model_name = "allenai/longformer-base-4096"
num_classes = 199
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda")
max_length = 1028

In [5]:
model = PolitcalModel(model_name, num_classes).to(device)
model.load_state_dict(torch.load("D:/Users/imaad/Documents/my-projects/political_app/politicalApp/models/best_model.pt",weights_only = True) )

<All keys matched successfully>

In [6]:
def predict(text):
    try:
        model.eval()
        encoded  = tokenizer(text, padding="max_length", truncation=True, max_length=max_length, return_tensors='pt' )
        input_ids = encoded['input_ids'].to(device)
        attention_mask = encoded['attention_mask'].to(device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)

        return outputs
    except Exception as e:
        print(f"Caught an error: {e}")

In [21]:
a = predict("Reduces federal involvement and promotes private sector solutions in natural gas and oil (including offshore oil and gas). ")

In [22]:
a

(tensor([[-2.4530e+00, -9.5648e-01, -2.9895e-01,  1.3305e-01, -2.2630e+00,
          -9.6890e-02, -1.3016e+00, -9.1696e-02, -2.6454e+00, -1.6153e+00,
          -5.7475e-01, -2.9732e-01,  1.0342e+00, -2.0097e+00, -2.2191e-01,
           2.7754e-01, -1.8237e-01, -1.0240e+00, -1.2024e+00, -1.1210e+00,
          -1.8502e+00, -7.1475e-01, -2.6377e-01, -8.6853e-01, -5.6435e-01,
          -7.7424e-01, -1.9424e+00, -3.3298e-01, -1.9273e-01, -6.9673e-01,
          -1.6139e+00, -1.5080e+00, -1.7254e+00, -5.4615e-01, -3.2262e+00,
          -1.7646e+00, -1.2357e+00, -3.1065e+00, -1.8625e+00, -1.3046e+00,
          -8.2945e-01,  4.6033e-01, -1.2802e+00, -6.9750e-01, -1.7171e+00,
          -2.9095e+00, -1.4115e+00, -1.2217e+00, -8.1341e-01, -1.7344e+00,
          -1.2913e+00, -1.0382e+00, -5.1916e-01, -3.5246e+00, -5.1744e+00,
           2.5338e+00,  4.1997e+00, -6.9841e-01,  1.8079e+00,  1.4915e+00,
          -3.0340e+00, -3.2309e-01,  3.4366e-01, -1.5980e+00,  1.1096e+00,
          -4.9497e-01, -4

In [23]:
b = a[0]

In [24]:
probs = torch.softmax(b, dim=1)

In [25]:
probs

tensor([[1.1363e-05, 5.0746e-05, 9.7941e-05, 1.5086e-04, 1.3739e-05, 1.1987e-04,
         3.5937e-05, 1.2050e-04, 9.3735e-06, 2.6260e-05, 7.4334e-05, 9.8101e-05,
         3.7150e-04, 1.7700e-05, 1.0578e-04, 1.7431e-04, 1.1005e-04, 4.7431e-05,
         3.9684e-05, 4.3049e-05, 2.0762e-05, 6.4623e-05, 1.0145e-04, 5.5411e-05,
         7.5111e-05, 6.0890e-05, 1.8933e-05, 9.4664e-05, 1.0892e-04, 6.5798e-05,
         2.6295e-05, 2.9233e-05, 2.3523e-05, 7.6491e-05, 5.2443e-06, 2.2617e-05,
         3.8381e-05, 5.9107e-06, 2.0507e-05, 3.5828e-05, 5.7620e-05, 2.0927e-04,
         3.6714e-05, 6.5747e-05, 2.3717e-05, 7.1979e-06, 3.2194e-05, 3.8924e-05,
         5.8552e-05, 2.3310e-05, 3.6308e-05, 4.6766e-05, 7.8583e-05, 3.8913e-06,
         7.4742e-07, 1.6643e-03, 8.8049e-03, 6.5687e-05, 8.0533e-04, 5.8686e-04,
         6.3552e-06, 9.5605e-05, 1.8623e-04, 2.6718e-05, 4.0056e-04, 8.0507e-05,
         8.3810e-05, 9.6957e-01, 1.3348e-04, 3.0235e-04, 1.1186e-04, 2.0194e-05,
         6.9949e-05, 6.6500e

In [26]:
topk_vals, topk_idx = torch.topk(probs, k=5, dim=1)

In [50]:
le

<_io.BufferedReader name='../label_encoder.pkl'>

In [None]:
issues = {

    if le.inverse_transform([topk_idx[0][i].item()])[0]: topk_vals[0][i].item()
    for i in range(4)
}


0.9695650339126587

0

In [41]:
stance = a[1]

In [55]:
le.inverse_transform([pred.item()])

array([1525])

In [None]:
(stance*2)-1

tensor([[0.0125]], device='cuda:0')