**Part 2**
-parsing date


In [12]:
import pandas as pd
import re
from datetime import datetime

# Your month_map and patterns as provided
month_map = {
    'january': '01', 'february': '02', 'march': '03', 'april': '04', 'may': '05', 'june': '06',
    'july': '07', 'august': '08', 'september': '09', 'october': '10', 'november': '11', 'december': '12',
    'jan': '01', 'feb': '02', 'mar': '03', 'apr': '04', 'may': '05', 'jun': '06',
    'jul': '07', 'aug': '08', 'sep': '09', 'oct': '10', 'nov': '11', 'dec': '12'
}

patterns = [
    r'(?P<month>[a-zA-Z]+)\s+(?P<day>\d{1,2})(?:st|nd|rd|th)?,?\s+(?P<year>\d{4})',
    r'(?P<day>\d{1,2})(?:st|nd|rd|th)?\s+(?:of\s+)?(?P<month>[a-zA-Z]+),?\s+(?P<year>\d{4})',
    r'(?P<year>\d{4})[-./](?P<month>\d{1,2})[-./](?P<day>\d{1,2})',
    r'(?P<day>\d{1,2})[-./](?P<month>\d{1,2})[-./](?P<year>\d{2,4})',
    r'(?P<month>\d{1,2})/(?P<day>\d{1,2})/(?P<year>\d{2,4})'
]



In [13]:
def parse_date_from_text(text):
    text = text.lower()
    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            gd = match.groupdict()
            day = gd.get('day')
            month = gd.get('month')
            year = gd.get('year')

            if month and month.isalpha():
                month = month_map.get(month[:3])
            if year and len(year) == 2:
                year = '20' + year if int(year) < 50 else '19' + year
            if day and len(day) == 1:
                day = '0' + day
            if month and len(month) == 1:
                month = '0' + month

            try:
                date_obj = datetime.strptime(f'{day}/{month}/{year}', '%d/%m/%Y')
                return date_obj.strftime('%d/%m/%Y')
            except:
                continue
    return None



In [16]:
# Load CSV
df = pd.read_csv('/content/date_parser_testcases.csv')

total = len(df)
correct = 0

for idx, row in df.iterrows():
    input_text = row['Input']
    expected_output = row['Expected Output']

    parsed_date = parse_date_from_text(input_text)

    match = (parsed_date == expected_output)
    if match:
        correct += 1

    print(f"Input:    {input_text}")
    print(f"Parsed:   {parsed_date}")
    print(f"Expected: {expected_output}")
    print(f"Match:    {'correct' if match else 'wrong'}")
    print('-' * 50)

print(f"Accuracy: {correct} / {total} ({100*correct/total:.2f}%)")


Input:    The event will take place on March 5, 2023.
Parsed:   05/03/2023
Expected: 05/03/2023
Match:    correct
--------------------------------------------------
Input:    Her birthday is on 07/08/1990.
Parsed:   07/08/1990
Expected: 07/08/1990
Match:    correct
--------------------------------------------------
Input:    The deadline is 2022-12-31.
Parsed:   31/12/2022
Expected: 31/12/2022
Match:    correct
--------------------------------------------------
Input:    We met on 1st of January 2000.
Parsed:   01/01/2000
Expected: 01/01/2000
Match:    correct
--------------------------------------------------
Input:    The concert is scheduled for 15th September, 2021.
Parsed:   15/09/2021
Expected: 15/09/2021
Match:    correct
--------------------------------------------------
Input:    Let's catch up on 02.04.2022.
Parsed:   02/04/2022
Expected: 02/04/2022
Match:    correct
--------------------------------------------------
Input:    The project started on 5/6/19.
Parsed:   05/06/20

In [15]:
sample = "I am coming by 4th of jan 2026."
print(f"Sample Input: {sample}")
print(f"Parsed Date: {parse_date_from_text(sample)}")


Sample Input: I am coming by 4th of jan 2026.
Parsed Date: 04/01/2026
