In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import sys
import warnings
warnings.simplefilter(action='ignore')

import spacy
from spacy.matcher import PhraseMatcher

def main():
    filename = input("Enter text file name: ")
    filepath = os.path.join(sys.path[0], filename)

    try:
        with open(filepath, "r", encoding="utf-8") as file:
            content = file.read()
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        sys.exit(1)

    try:
        nlp = spacy.load("en_core_web_sm")
    except OSError:
        print("SpaCy model 'en_core_web_sm' not found.")
        print("Install it using: python -m spacy download en_core_web_sm")
        sys.exit(1)

    doc = nlp(content)

    print("=== Original Text Sample (First 300 chars) ===")
    print(content[:300])
    print()

    print("=== Matched Athlete Names ===")

    matcher = PhraseMatcher(nlp.vocab)

    athlete_patterns = [
        nlp.make_doc("Sarah Claxton"),
        nlp.make_doc("Sonia O'Sullivan"),
        nlp.make_doc("Irina Shevchenko")
    ]

    matcher.add("ATHLETES", athlete_patterns)

    matches = matcher(doc)

    found = False
    for match_id, start, end in matches:
        span = doc[start:end]
        print(f"- {span.text}")
        found = True

    if not found:
        print("No athlete names found.")
    print()

    print("=== Matched Sports Events ===")

    matcher = PhraseMatcher(nlp.vocab)

    event_patterns = [
        nlp.make_doc("European Indoor Championships"),
        nlp.make_doc("World Cross Country Championships"),
        nlp.make_doc("London marathon"),
        nlp.make_doc("Bupa Great Ireland Run")
    ]

    matcher.add("EVENTS", event_patterns)

    matches = matcher(doc)

    found = False
    for match_id, start, end in matches:
        span = doc[start:end]
        print(f"- {span.text}")
        found = True

    if not found:
        print("No sports events found.")

if __name__ == "__main__":
    main()