In [None]:
from transformers import AutoTokenizer, AutoModel
from torch import nn
import torch
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import pickle

In [4]:
with open('../label_encoder.pkl', 'rb') as le:
    pass

array([1305.])

In [4]:


class PolitcalModel(nn.Module):
    def __init__(self, model_name, num_classes):
        # Initialize Longformer
        super(PolitcalModel, self).__init__()
        self.model = AutoModel.from_pretrained(model_name)
        # Create a dropout layer
        self.dropout = nn.Dropout(p=0.2)
        # Create a classification head (Linear layer that maps hidden dim -> num_classes)
        self.topic_head = nn.Linear(self.model.config.hidden_size, num_classes )
        # Create a regression head (Linear layer that maps hidden dim -> 1)
        self.stance_head = nn.Linear(self.model.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        # Pass inputs through Longformer
        outputs = self.model(input_ids = input_ids, attention_mask = attention_mask)
        # Get the pooled output (usually from CLS token or mean of last layer)
        last_hidden_state = outputs.last_hidden_state
        mask = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()
        masked_embeddings = last_hidden_state * mask
        x = masked_embeddings.sum(dim=1) / mask.sum(dim=1)
        # Pass that to dropout
        x = self.dropout(x)
        # Pass into classification head → topic logits
        topic_logits = self.topic_head(x)
        # Pass into regression head → stance prediction
        stance_pred = self.stance_head(x)
        # Return both outputs
        return topic_logits, stance_pred
    



In [5]:
model_name = "allenai/longformer-base-4096"
num_classes = 199
tokenizer = AutoTokenizer.from_pretrained(model_name)
device = torch.device("cuda")
max_length = 1028

In [6]:
model = PolitcalModel(model_name, num_classes).to(device)
model.load_state_dict(torch.load("D:/Users/imaad/Documents/my-projects/political_app/politicalApp/models/best_model.pt",weights_only = True) )

<All keys matched successfully>

In [7]:
def predict(text):
    model.eval()
    encoded  = tokenizer(text, padding="max_length", truncation=True, max_length=max_length, return_tensors='pt' )
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    return outputs


In [None]:
a = predict("To give everyone free money ")

In [39]:
b = a[0]

In [40]:
_, pred = torch.max(b, dim=1)

In [41]:
stance = a[1]

In [42]:
le.inverse_transform([pred.item()])

array([208.])

In [None]:
(stance*2)-1

tensor([[0.0125]], device='cuda:0')