### Named Entity Recognition (NER) using Rules

In [None]:
import re

def rule_based_ner(text):
    entities = {"PERSON": [], "ORG": [], "DATE": []}

    person_pattern = re.compile(r"(President|CEO|Dr\.|Mr\.|Ms\.) ([A-Z][a-z]+ [A-Z][a-z]+)")
    entities["PERSON"] += person_pattern.findall(text)

    org_pattern = re.compile(r"\b(DSN|DSN_LAGOS)\b")
    entities["ORG"] += org_pattern.findall(text)

    date_pattern = re.compile(r"\b([A-Z][a-z]+ \d{1,2}, \d{4})\b")
    entities["DATE"] += date_pattern.findall(text)

    return entities

text = "President Bola Tinubu visited DSN on March 15, 2025."
rule_based_ner(text)

{'PERSON': [('President', 'Bola Tinubu')],
 'ORG': ['DSN'],
 'DATE': ['March 15, 2025']}

Text Classification (keyword-based)

In [None]:
def rule_based_classification(text):
    rules = {
        "Business": ["buy", "sale", "discount", "offer"],
        "Technology": ["software", "AI", "machine learning", "robot"],
        "Sports": ["football", "basketball", "Olympics", "goal"]
    }

    for category, keywords in rules.items():
        for keyword in keywords:
            if keyword.lower() in text.lower():
                return category
    return "Unknown"

text = "Huge discounts available on latest software products!"
rule_based_classification(text)

'Business'

Information Extraction

In [None]:
def extract_dates_and_locations(text):
    extracted = {"DATE": [], "LOCATION": []}

    # date extraction
    date_pattern = re.compile(r"\b\d{1,2}/\d{1,2}/\d{4}\b|\b[A-Z][a-z]+ \d{1,2}, \d{4}\b")
    extracted["DATE"] += date_pattern.findall(text)

    # location extraction
    location_pattern = re.compile(r"holding ([A-Z][a-z]+)")
    extracted["LOCATION"] += location_pattern.findall(text)

    return extracted

text = "The meeting is holding Virtually and will be held on 4/04/2025."
extract_dates_and_locations(text)

{'DATE': ['4/04/2025'], 'LOCATION': ['Virtually']}

Sentiment Analysis (rule-based)

In [None]:
def rule_based_sentiment(text):
    positive_keywords = ["happy", "great", "excellent", "good", "fantastic"]
    negative_keywords = ["sad", "bad", "terrible", "horrible", "angry"]

    text_lower = text.lower()
    positive_matches = sum(text_lower.count(word) for word in positive_keywords)
    negative_matches = sum(text_lower.count(word) for word in negative_keywords)

    if positive_matches > negative_matches:
        return "Positive"
    elif negative_matches > positive_matches:
        return "Negative"
    else:
        return "Neutral"

text = "I am happy about the excellent results, but the weather is terrible."
rule_based_sentiment(text)

'Positive'

Spell Checking and Grammer Correction

In [None]:
def rule_based_spell_check(text):
    corrections = {"teh": "the", "adn": "and", "recieve": "receive"}

    words = text.split()
    corrected_words = [corrections.get(word, word) for word in words]

    return " ".join(corrected_words)

text = "Please recieve teh package adn confirm."
rule_based_spell_check(text)

'Please receive the package and confirm.'

### Limitations

Rule-based systems become challenging to scale and maintain when language complexity grows.

May miss entities if rules are overly rigid or too simple.