In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

# Sample known heading examples to compare similarity
known_heading_examples = [
    "God's Word", "Concepts", "Problems", "Note", "Summary",
    "Workout", "Reading", "Diet", "Reflection", "Learning"
]

heading_docs = [nlp(h.lower()) for h in known_heading_examples]

def is_heading(line):
    text = line.strip()

    # 1. Too short or too long → not a heading
    if len(text.split()) < 1 or len(text.split()) > 6:
        return False

    # 2. If it contains sentence punctuation → not a heading
    if any(p in text for p in [".", "!", "?", ",", ":"]):
        return False

    # 3. Title case boost
    title_case_boost = 1 if text.istitle() else 0

    # 4. NLP similarity check
    doc = nlp(text.lower())
    similarity_scores = [doc.similarity(hd) for hd in heading_docs]
    sim_score = max(similarity_scores)

    # 5. Threshold + title-case boost
    return (sim_score + title_case_boost * 0.10) >= 0.68


ModuleNotFoundError: No module named 'spacy'