In [6]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# 1. Load your saved model and tokenizer
model_path = "/Users/gwin/Documents/Post Undergrad Work/Tax Search/my_finetuned_model"  # Path where you saved your model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# 2. Prepare your label mappings (ensure these match your training)
id2label = {
    0: "defense|procurement", 
    1: "healthcare|medicaid",
    # ... add all your other labels
}
label2id = {v: k for k, v in id2label.items()}

# 3. Create a prediction function
def predict(text):
    # Tokenize input
    inputs = tokenizer(text, 
                      return_tensors="pt", 
                      truncation=True, 
                      max_length=512,
                      padding=True)
    
    # Get predictions
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Process output
    logits = outputs.logits
    pred_id = torch.argmax(logits, dim=1).item()
    confidence = torch.softmax(logits, dim=1)[0][pred_id].item()
    
    return {
        "label": id2label[pred_id],
        "confidence": round(confidence, 4),
        "raw_output": logits.tolist()[0]
    }

# 4. Test the model
test_texts = [
    "The Pentagon announced new defense contracts worth $2 billion",
    "Medicaid expansion approved for low-income families",
    "The education department issued new grant guidelines"
]

for text in test_texts:
    result = predict(text)
    print(f"Text: {text[:60]}...")
    print(f"Prediction: {result['label']} (Confidence: {result['confidence']:.2%})")
    print("-" * 50)

KeyError: 9

In [2]:
import numpy as np
import torch
print("NumPy version:", np.__version__)  # Should show 1.23.5 or similar 1.x version
print("PyTorch version:", torch.__version__)

NumPy version: 1.23.5
PyTorch version: 2.2.2
