In [28]:
import dateparser.search

In [29]:
#keywords : Data,Time,Filter
Keywords_Table = { "stress": ["stress", "stress level", "stress score", "tension", "anxiety", "strain", "mental load", "stress pattern", "stress zones", "stress chart"],
    "hr":["heart rate", "hr", "bpm", "resting heart rate", "max heart rate", "pulse","cardio", "hr zone", "heart beat", "heart-rate","heart"],
    "spo2":["spo2", "oxygen", "blood oxygen", "oxygen saturation", "o2 level","breathing", "respiration", "air levels", "oxygen dips", "oxygen score"],
    "steps":["steps", "step count", "walking", "walk", "daily steps", "distance walked","movement", "stride", "pedometer", "step goal"],
    "calorie": ["calories", "calorie burn", "energy burn", "burned", "metabolism","active calories", "basal calories", "kcal", "energy expenditure", "fat burn","cal"],
    "exercise": ["exercise", "workout", "training", "session", "sports", "activity","reps", "sets", "routine", "intensity","activities"],    
    }

In [None]:
# def findTableNTime(keywords,prompt):
#     table_list =[]
#     dates = []
#     for word in prompt.split(" "):
#         for table , keys in keywords.items():
#             for k in keys:
#                 if k in word:
#                     table_list.append(table)
#     dates = dateparser.search.search_dates(prompt)
#     return table_list,dates
    

In [None]:
import spacy
import dateparser
import re

nlp = spacy.load("en_core_web_sm")

def detect_dates_all(keyword_tables,text):
    text = text.lower()    
    doc = nlp(text)
    words_to_dates = []
    dates_total = []

    for ent in doc.ents:
        if ent.label_ == "DATE":
            parsed = dateparser.parse(ent.text)
            if parsed:
                words_to_dates.append(parsed)

    ###########################################################3
    MONTHS = r"(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)"


    patterns = [
        # dd/mm/yyyy | dd-mm-yyyy | dd.mm.yyyy
        r"\b\d{1,2}[\/\-.]\d{1,2}[\/\-.]\d{2,4}\b",

        # yyyy-mm-dd
        r"\b\d{4}-\d{1,2}-\d{1,2}\b",

        # 12 Aug 2025
        rf"\b\d{{1,2}}(?:st|nd|rd|th)?\s+{MONTHS}\s+\d{{4}}\b",

        # Aug 12 2025
        rf"\b{MONTHS}\s+\d{{1,2}}(?:st|nd|rd|th)?\s+\d{{4}}\b",

        # August 12, 2025
        rf"\b{MONTHS}\s+\d{{1,2}}(?:st|nd|rd|th)?,\s+\d{{4}}\b",

        # 12th August
        rf"\b\d{{1,2}}(?:st|nd|rd|th)?\s+{MONTHS}\b",

        # Standalone month
        rf"\b{MONTHS}\b",
    ]

    found = []
    for p in patterns:
        matches = re.findall(p, text)
        for m in matches:
            found.append(m.strip())

    # Remove standalone month if part of a larger match
    filtered = []
    for f in found:
        if any((f != other and f in other) for other in found):
            continue
        filtered.append(f)

    # Unique + order preserved
    dates = list(dict.fromkeys(filtered))
    return dates,




In [None]:
# -------------------------------------------------------------
# TEST SUITE
# -------------------------------------------------------------

test_cases = {
    # Month-only (case variations)
    "aug": ["aug"],
    "Aug": ["aug"],
    "AUG": ["aug"],
    "august": ["august"],
    "August": ["august"],

    # All together
    "aug, Aug, AUG, august, August": ["aug", "aug", "aug", "august", "august"],

    # Numeric formats
    "12/18/25": ["12/18/25"],
    "01/02/2025": ["01/02/2025"],
    "1-2-25": ["1-2-25"],
    "2025-11-18": ["2025-11-18"],

    # Month + day + year
    "12 Aug 2025": ["12 aug 2025"],
    "Aug 12 2025": ["aug 12 2025"],
    "August 12, 2025": ["august 12, 2025"],

    # Ordinal
    "12th August": ["12th august"],
    "August 3rd 2024": ["august 3 2024"],

    # Dot/dash
    "12.10.2025": ["12.10.2025"],
    "3.1.25": ["3.1.25"],

    # Mixed scenarios
    "I will come on 1st January 2025 or maybe Feb 2025":
        ["1st january 2025", "february"],

    "Dates: Aug 24, 2024-11-18, 2025":
        ["aug 24", "2024", "2025"],

    # Noise text
    "no dates here":
        []
}

print("\n=== TEST RESULTS ===")

for text, expected in test_cases.items():
    result = detect_dates_all(Keywords_Table,text)

    
    print(f"\nInput: {text}")
    print(f"Expected: {sorted(set(expected))}")
    print(f"Got      : {sorted(set(result))}")

    if set(result) == set(expected):
        print("➡️ PASS")
    else:
        print("❌ FAIL")

In [131]:
import re
from datetime import datetime

MONTHS = {
    "jan": 1, "january": 1,
    "feb": 2, "february": 2,
    "mar": 3, "march": 3,
    "apr": 4, "april": 4,
    "may": 5,
    "jun": 6, "june": 6,
    "jul": 7, "july": 7,
    "aug": 8, "august": 8,
    "sep": 9, "sept": 9, "september": 9,
    "oct": 10, "october": 10,
    "nov": 11, "november": 11,
    "dec": 12, "december": 12
}

def standardize_date(date_str, current_year=None):
    date_str = date_str.lower().strip()

    if current_year is None:
        current_year = datetime.now().year

    # Remove suffixes: 12th -> 12
    date_str = re.sub(r"(\d+)(st|nd|rd|th)", r"\1", date_str)

    # ---------- CASE 1: YYYY-MM-DD ----------
    if re.match(r"^\d{4}-\d{1,2}-\d{1,2}$", date_str):
        y, m, d = map(int, date_str.split("-"))
        return f"{y:04d}-{m:02d}-{d:02d}"

    # ---------- CASE 2: DD/MM/YY or DD/MM/YYYY ----------
    if "/" in date_str:
        parts = date_str.split("/")
        if len(parts) == 3:
            d, m, y = parts
            d, m, y = int(d), int(m), int(y)
            if y < 100: y += 2000
            return f"{y:04d}-{m:02d}-{d:02d}"

    # ---------- CASE 3: DD.MM.YY or DD.MM.YYYY ----------
    if "." in date_str:
        parts = date_str.split(".")
        if len(parts) == 3:
            d, m, y = parts
            d, m, y = int(d), int(m), int(y)
            if y < 100: y += 2000
            return f"{y:04d}-{m:02d}-{d:02d}"

    # ---------- CASE 4: DD-MM-YY or DD-MM-YYYY ----------
    if "-" in date_str:
        parts = date_str.split("-")
        if len(parts) == 3 and not re.match(r"^\d{4}-", date_str):
            d, m, y = parts
            d, m, y = int(d), int(m), int(y)
            if y < 100: y += 2000
            return f"{y:04d}-{m:02d}-{d:02d}"

    # ---------- CASE 5: Mixed Month + Day + Optional Year ----------
    tokens = date_str.replace(",", "").split()

    # Find month
    month = None
    for t in tokens:
        if t in MONTHS:
            month = MONTHS[t]
            break

    if month:
        # find day (1–31)
        day = None
        for t in tokens:
            if t.isdigit() and 1 <= int(t) <= 31:
                day = int(t)
                break

        # find year ( >31 )
        year = None
        for t in tokens:
            if t.isdigit() and int(t) > 31:
                year = int(t)
                break

        if year is None:
            year = current_year

        if year < 100:
            year += 2000

        if day:
            return f"{year:04d}-{month:02d}-{day:02d}"

    # ---------- CASE 6: Only month → return YYYY-MM-01 ----------
    if date_str in MONTHS:
        return f"{current_year:04d}-{MONTHS[date_str]:02d}-01"

    return None


In [121]:
# standardize_date("12 Aug 2025")      #→ "2025-08-12"
# standardize_date("12th August")      # → "2025-08-12"  (uses current year)
# standardize_date("Aug 24, 2025")     # → "2025-08-24"
# standardize_date("1st January 2025") # → "2025-01-01"
# standardize_date("3.1.25")           #→ "2025-01-03"
# standardize_date("2025-11-18")       # → "2025-11-18"
# standardize_date("aug")               #→ "2025-08-01"
# standardize_date("Aug 12")            #→ "2025-08-12"  (uses current year)
test_inputs = [
    "12 Aug 2025",
    "12th August",
    "Aug 24, 2025",
    "1st January 2025",
    "3.1.25",
    "2025-11-18",
    "aug",
    "Aug 12"
]

for i in test_inputs:
    print(standardize_date(i))

2025-08-12
2025-08-12
2025-08-24
2025-01-01
2025-01-03
2025-11-18
2025-08-01
2025-08-12


In [179]:
# Prompt = input("Enter Prompt:")
Prompt = 'How was my hr on 23 nov and yesterday'
tablentime,words2dates = detect_dates_all(Keywords_Table,Prompt)
print(tablentime)
print(words2dates)
corrected = []
for t in tablentime:
    corrected.append(standardize_date(t))
print(corrected)

['23 nov']
[]
['2025-11-23']


In [187]:
import spacy
import dateparser

nlp = spacy.load("en_core_web_sm")

def detect_datess_all(text):
    doc = nlp(text)
    results = []

    for ent in doc.ents:
        if ent.label_ == "DATE":
            parsed = dateparser.parse(ent.text)
            if parsed:
                results.append((ent.text, parsed))

    return results

Prompt = 'How was my hr on last week'
tablentime = detect_dates_all(Keywords_Table,Prompt)
print(tablentime)


([], [datetime.datetime(2025, 11, 11, 19, 34, 9, 819845)])
