In [None]:
import spacy
import os
from spacy import displacy # For visualization (optional)


In [None]:
# Define paths relative to this notebook (located in Model Training/)
# --- Paths now point to the 'outputs' folder ---

# --- News Models ---
news_pretrained_model_path = "../../outputs/information_extraction/news/grid_search_models_pretrained_sequential/best_model_pretrained_sequential"
news_blank_model_path = "../../outputs/information_extraction/news/grid_search_models_blank_sequential/best_model_blank_sequential"

# --- Reddit Models ---
reddit_pretrained_model_path = "../../outputs/information_extraction/reddit/grid_search_models_pretrained_sequential/best_model_pretrained_sequential"
reddit_blank_model_path = "../../outputs/information_extraction/reddit/grid_search_models_blank_sequential/best_model_blank_sequential"

# --- Load Models ---
models = {} # Dictionary to store loaded models

# Function to safely load a model
def load_model(name, path):
    print(f"Attempting to load model '{name}' from: {path}") # More explicit print
    if os.path.exists(path):
        try:
            models[name] = spacy.load(path)
            print(f" -> Success.")
        except Exception as e:
            print(f" -> ERROR loading model '{name}': {e}")
    else:
        print(f" -> Path not found.") # Simplified message

# Load the models
load_model("News_Pretrained", news_pretrained_model_path)
load_model("News_Blank", news_blank_model_path)
load_model("Reddit_Pretrained", reddit_pretrained_model_path)
load_model("Reddit_Blank", reddit_blank_model_path)

print(f"\nFinished loading attempts. Successfully loaded {len(models)} models.")


In [None]:
# --- Sample Texts ---
news_sample_text = "France car registrations down 14.54% in March, Tesla sales fall 36.83%. New car registrations in France fell 14.54% in March from a year earlier to 153,842 vehicles, data from French car body PFA showed on Tuesday."

reddit_sample_text = "Thinking of buying more GME and AMC stonks tomorrow. Diamond hands! To the moon! Market might dip 5% but I believe. Fed meeting next week on April 10th might affect things."

# Choose which text to test
text_to_test = news_sample_text
# text_to_test = reddit_sample_text

print(f"--- Running Inference on: ---")
print(text_to_test)
print("-" * 27)

# --- Run Inference with Loaded Models ---
for model_name, nlp in models.items():
    print(f"\n🔍 Results from Model: {model_name}")
    if nlp:
            try:
                doc = nlp(text_to_test)
                if doc.ents:
                    for ent in doc.ents:
                        print(f"  - {ent.text} ({ent.label_}) [{ent.start_char}:{ent.end_char}]")
                    # Optional: Visualize
                    # displacy.render(doc, style="ent", jupyter=True)
                else:
                    print("  (No entities found)")
            except Exception as e:
                print(f"  ERROR during inference with {model_name}: {e}")
    else:
            print(f"  (Model {model_name} not loaded)")


In [None]:
# Install transformers if not already installed
# !pip install transformers torch torchvision torchaudio -q

from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import warnings

# Suppress specific warnings if desired
warnings.filterwarnings("ignore", message="Some weights of the model checkpoint.*")

print("\n--- Comparing with Hugging Face (dslim/bert-base-NER) ---")

try:
    # Load tokenizer and model
    # Using a more robust model like bert-large-NER might give better results
    # hf_model_name = "dslim/bert-large-NER"
    hf_model_name = "dslim/bert-base-NER"
    print(f"Loading HF tokenizer and model: {hf_model_name}")
    tokenizer = AutoTokenizer.from_pretrained(hf_model_name)
    model = AutoModelForTokenClassification.from_pretrained(hf_model_name)

    # Create NER pipeline
    # Set aggregation_strategy for cleaner output (e.g., 'simple', 'first', 'average', 'max')
    nlp_hf = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

    # Run NER on the chosen sample text
    print(f"\nRunning HF NER on:")
    print(text_to_test)
    ner_results_hf = nlp_hf(text_to_test)

    print("\n🔍 Results from Hugging Face Model:")
    if ner_results_hf:
        for entity in ner_results_hf:
                print(f"  - {entity['word']} ({entity['entity_group']}) (Score: {entity['score']:.4f}) [{entity['start']}:{entity['end']}]")
    else:
        print("  (No entities found)")

except ImportError:
    print("\nNOTE: 'transformers' library not installed. Skipping Hugging Face comparison.")
    print("Install using: pip install transformers torch")
except Exception as e:
    print(f"\nERROR during Hugging Face comparison: {e}")
