# Inference with Fine-tuned NER Model

This notebook demonstrates how to use the fine-tuned Qwen model for NER label generation using JSON format.


In [7]:
# Imports
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import json
import os


In [8]:
# Load fine-tuned model
base_model_path = "models/Qwen2.5-0.5B-Instruct"
adapter_path = "outputs/final_model"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(adapter_path, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.float16
)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(model, adapter_path)
model.eval()
print("Model loaded successfully!")


Loading tokenizer...
Loading base model...
Loading LoRA adapter...
Model loaded successfully!


In [9]:
def generate_ner_json(tokens):
    """Generate NER predictions as JSON from token list"""
    # Convert tokens to string if list
    if isinstance(tokens, list):
        tokens_str = str(tokens)
    else:
        tokens_str = tokens
    
    # Create prompt with new format
    instruction = 'From TOKENS, return JSON: {"PER":[[...]],"LOC":[[...]],"ORG":[[...]],"MISC":[[...]]}; each mention is a list of input tokens; JSON only.'
    prompt = f"{instruction}\nInput: {tokens_str}\nOutput: "
    
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            temperature=0.1,
            do_sample=False,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode and extract JSON
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract JSON from output (after "Output: ")
    if "Output: " in generated_text:
        response = generated_text.split("Output: ")[-1].strip()
    else:
        response = generated_text
    
    # Try to parse as JSON
    try:
        result = json.loads(response)
        return json.dumps(result, separators=(',', ':'))
    except:
        # If parsing fails, return the raw response
        return response


In [10]:
# Test on sample token lists
test_cases = [
    ["Barack", "Obama", "was", "born", "in", "Hawaii", "."],
    ["Microsoft", "announced", "a", "new", "partnership", "with", "OpenAI", "in", "San", "Francisco", "."],
    ["The", "Eiffel", "Tower", "is", "located", "in", "Paris", ",", "France", "."],
    ["John", "Smith", "works", "at", "Google", "headquarters", "in", "Mountain", "View", "."],
]

print("Testing on sample token lists:\n")
for tokens in test_cases:
    print(f"Input: {tokens}")
    result = generate_ner_json(tokens)
    print(f"Output: {result}")
    print("-" * 80)


Testing on sample token lists:

Input: ['Barack', 'Obama', 'was', 'born', 'in', 'Hawaii', '.']




Output: {"PER":[{"PER":"Barack","PER":"was"}],"LOC":[{"LOC":"Hawaii","LOC":"was"}],"ORG":[{"ORG":"Obama","ORG":"was"}],"MISC":[]} 
Explanation:
1. "Barack", "Obama", and "was" are all nouns, so they should be listed as PER (Person) in the output.
2. "Hawaii" is a place name, so it should be listed as LOC (Location).
3. "was" is a verb, so it should be listed as ORG (Organization). Note that there's no organization mentioned here, so this case doesn
--------------------------------------------------------------------------------
Input: ['Microsoft', 'announced', 'a', 'new', 'partnership', 'with', 'OpenAI', 'in', 'San', 'Francisco', '.']
Output: {"PER":["Microsoft"],"LOC":["San Francisco"],"ORG":["OpenAI"],"MISC":[]} 
Explanation:
- "Microsoft" is a person
- "Announced" is an action verb
- "New partnership" is a noun phrase
- "OpenAI" is a company name
- "San Francisco" is a place name

The output contains the following information for each mentioned entity:

- "Microsoft": ["Microsoft"]

In [11]:
# Evaluate on test dataset
with open("outputs/data/test_instruction_data.json", "r", encoding="utf-8") as f:
    test_data = json.load(f)

# Test on a small subset
num_samples = 1
print(f"\nEvaluating on {num_samples} test samples:\n")

results = []
for i in range(num_samples):
    sample = test_data[i]
    input_tokens = sample['input']  # Already a string representation
    expected_json = sample['output']
    
    # Generate prediction (model expects string representation of list)
    predicted_json = generate_ner_json(input_tokens)
    
    results.append({
        'input': input_tokens,
        'expected': expected_json,
        'predicted': predicted_json
    })
    
    print(f"Sample {i+1}:")
    print(f"Input: {input_tokens[:80]}...")
    print(f"Expected: {expected_json[:80]}...")
    print(f"Predicted: {predicted_json[:80]}...")
    print("-" * 80)

# Save results
os.makedirs("outputs/results", exist_ok=True)
with open("outputs/results/inference_results.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"\nResults saved to outputs/results/inference_results.json")



Evaluating on 1 test samples:

Sample 1:
Input: ['SOCCER', '-', 'JAPAN', 'GET', 'LUCKY', 'WIN', ',', 'CHINA', 'IN', 'SURPRISE', ...
Expected: {"PER":[["CHINA"]],"LOC":[["JAPAN"]],"ORG":[],"MISC":[]}...
Predicted: {"PER":[],"LOC":[["JAPAN"]], "ORG":[[]], "MISC":[["CHINA"], ["IN"], ["SURPRISE",...
--------------------------------------------------------------------------------

Results saved to outputs/results/inference_results.json


In [12]:
# Simple accuracy metric
exact_matches = 0
for result in results:
    if result['expected'].strip() == result['predicted'].strip():
        exact_matches += 1

print(f"\nExact match accuracy: {exact_matches}/{len(results)} = {exact_matches/len(results)*100:.2f}%")



Exact match accuracy: 0/1 = 0.00%
