In [12]:
import csv
import re

month_map = {
    'january': '01', 'jan': '01',
    'february': '02', 'feb': '02',
    'march': '03', 'mar': '03',
    'april': '04', 'apr': '04',
    'may': '05',
    'june': '06', 'jun': '06',
    'july': '07', 'jul': '07',
    'august': '08', 'aug': '08',
    'september': '09', 'sep': '09', 'sept': '09',
    'october': '10', 'oct': '10',
    'november': '11', 'nov': '11',
    'december': '12', 'dec': '12'
}

def parse_date(text):
    text = text.lower()
    text = re.sub(r'(\d+)(st|nd|rd|th)', r'\1', text)

    # yyyy.mm.dd
    match = re.search(r'(\d{4})[.](\d{1,2})[.](\d{1,2})', text)
    if match:
        yyyy, mm, dd = match.groups()
        return f"{dd.zfill(2)}-{mm.zfill(2)}-{yyyy}"

    # yyyy-mm-dd or yyyy/mm/dd
    match = re.search(r'(\d{4})[-/](\d{1,2})[-/](\d{1,2})', text)
    if match:
        yyyy, mm, dd = match.groups()
        return f"{dd.zfill(2)}-{mm.zfill(2)}-{yyyy}"

    # mm/dd/yyyy (Assuming US format and converting to DD-MM-YYYY)
    match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{4})', text)
    if match:
        mm, dd, yyyy = match.groups()
        if int(mm) > 12:
            dd, mm = mm, dd  # assume it's DD/MM/YYYY
        return f"{dd.zfill(2)}-{mm.zfill(2)}-{yyyy}"

    # dd.mm.yy or dd/mm/yy
    match = re.search(r'(\d{1,2})[./-](\d{1,2})[./-](\d{2,4})', text)
    if match:
        dd, mm, yy = match.groups()
        if len(yy) == 2:
            yy = '20' + yy if int(yy) < 30 else '19' + yy
        return f"{dd.zfill(2)}-{mm.zfill(2)}-{yy}"

    # e.g. 5 of March 2023
    match = re.search(r'(\d{1,2})\s*(?:of)?\s*([a-z]+)[,]?\s*(\d{4})', text)
    if match:
        dd, month, yyyy = match.groups()
        mm = month_map.get(month.lower())
        if mm:
            return f"{dd.zfill(2)}-{mm}-{yyyy}"

    # e.g. March 5, 2023
    match = re.search(r'([a-z]+)\s+(\d{1,2}),?\s*(\d{4})', text)
    if match:
        month, dd, yyyy = match.groups()
        mm = month_map.get(month.lower())
        if mm:
            return f"{dd.zfill(2)}-{mm}-{yyyy}"

    # e.g. the 4 of July, 2022
    match = re.search(r'the\s+(\d{1,2})\s+(?:of\s+)?([a-z]+),?\s*(\d{4})', text)
    if match:
        dd, month, yyyy = match.groups()
        mm = month_map.get(month.lower())
        if mm:
            return f"{dd.zfill(2)}-{mm}-{yyyy}"

    # Fallback: day+month with year elsewhere (e.g., "25th Dec, including 2024")
    dm_match = re.search(r'(\d{1,2})\s*(?:of\s+)?([a-z]{3,9})', text)
    y_match = re.search(r'(\d{4})', text)
    if dm_match and y_match:
        dd, month = dm_match.groups()
        mm = month_map.get(month.lower())
        yyyy = y_match.group(1)
        if mm:
            return f"{dd.zfill(2)}-{mm}-{yyyy}"

    return "No date found"


# Evaluation function
def evaluate_on_csv(file_path):
    total, correct = 0, 0
    mismatches = []

    with open(file_path, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            input_text = row['Input']
            expected = row['Expected Output']
            predicted = parse_date(input_text)

            if predicted == expected:
                correct += 1
            else:
                mismatches.append((input_text, predicted, expected))
            total += 1

    accuracy = correct / total * 100 if total else 0
    print(f"\n✅ Accuracy: {accuracy:.2f}% ({correct}/{total})\n")

    if mismatches:
        print("❌ Mismatches:")
        for input_text, predicted, expected in mismatches:
            print(f"Input: {input_text}")
            print(f"Predicted: {predicted}")
            print(f"Expected : {expected}")
            print("---")

# Run on your CSV file
evaluate_on_csv("date_parser_testcases.csv")




✅ Accuracy: 95.96% (95/99)

❌ Mismatches:
Input: Her birthday is on 07/08/1990.
Predicted: 08-07-1990
Expected : 07-08-1990
---
Input: The interview is on 1/2/2022.
Predicted: 02-01-2022
Expected : 01-02-2022
---
Input: Her birthday, which she celebrates on 07/08/1990, is coming up soon.
Predicted: 08-07-1990
Expected : 07-08-1990
---
Input: The job interview is on 1/2/2022, don't be late.
Predicted: 02-01-2022
Expected : 01-02-2022
---
