In [1]:
import re
import pandas as pd

In [3]:
date_patterns = {
    'us_style': r'\b(\d{1,2})[/-](\d{1,2})[/-](\d{2,4})\b',
    'iso_style': r'\b(\d{4})[-/](\d{2})[-/](\d{2})\b',
    'dot_style': r'\b(\d{2})[.](\d{2})[.](\d{2,4})\b',
    'full_year': r'\b(\d{4})\.(\d{2})\.(\d{2})\b',
    'ordinal_day': r'\b(\d{1,2})(?:st|nd|rd|th)?\b',
    'month_name': r'\b(January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b',
    'four_digit_year': r'\b(\d{4})\b'
}

month_lookup = {
    'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06',
    'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12',
    'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'Jun': '06', 'Jul': '07', 'Aug': '08', 'Sep': '09',
    'Oct': '10', 'Nov': '11', 'Dec': '12'
}

date_data = pd.read_csv('date_parser_testcases.csv')

date_data = date_data.drop(50).reset_index(drop=True)

In [4]:
for idx, text_input in enumerate(date_data.Input):
    match_us = re.search(date_patterns['us_style'], text_input)
    if match_us:
        mm, dd, yyyy = match_us.groups()
        yyyy = f"20{yyyy.zfill(2)}" if len(yyyy) == 2 else yyyy
        print(f"{dd.zfill(2)}/{mm.zfill(2)}/{yyyy}")
        continue

    match_iso = re.search(date_patterns['iso_style'], text_input)
    if match_iso:
        yyyy, mm, dd = match_iso.groups()
        print(f"{dd}/{mm}/{yyyy}")
        continue

    match_dot = re.search(date_patterns['dot_style'], text_input)
    if match_dot:
        dd, mm, yyyy = match_dot.groups()
        yyyy = f"20{yyyy.zfill(2)}" if len(yyyy) == 2 else yyyy
        print(f"{dd}/{mm}/{yyyy}")
        continue

    match_yyyy_mm_dd = re.search(date_patterns['full_year'], text_input)
    if match_yyyy_mm_dd:
        yyyy, mm, dd = match_yyyy_mm_dd.groups()
        print(f"{dd}/{mm}/{yyyy}")
        continue

    day_component = re.search(date_patterns['ordinal_day'], text_input)
    month_component = re.search(date_patterns['month_name'], text_input)
    year_component = re.search(date_patterns['four_digit_year'], text_input)

    day = day_component.group(1) if day_component else None
    month = month_lookup.get(month_component.group(0), None) if month_component else None
    year = year_component.group(1) if year_component else None

    if day and month and year:
        print(f"{day.zfill(2)}/{month}/{year}")
    else:
        print("Date not found")

05/03/2023
08/07/1990
31/12/2022
01/01/2000
15/09/2021
02/04/2022
06/05/2019
23/11/1987
25/12/2024
03/04/2020
20/05/1997
02/03/2021
10/11/2021
12/12/2012
15/02/2022
31/08/2021
01/07/2023
04/07/2022
30/10/1995
01/01/2023
14/03/2022
31/08/2020
28/02/2020
20/12/2021
05/05/2023
25/12/2019
17/03/2022
11/11/2021
04/07/2023
30/05/2022
09/09/2021
02/01/2022
10/10/2022
06/08/2020
03/03/1998
10/10/2018
15/12/2020
12/31/2022
29/02/2024
15/07/2021
03/03/2022
01/06/2021
08/08/2020
09/09/2020
01/01/2022
04/07/2023
30/11/2022
15/10/2023
20/05/1990
12/08/2024
05/03/2023
08/07/1990
31/12/2022
01/01/2000
15/09/2021
02/04/2022
06/05/2019
23/11/1987
25/12/2024
03/04/2020
02/03/2021
10/11/2021
12/12/2012
15/02/2022
31/08/2021
01/07/2023
04/07/2022
30/10/1995
01/01/2023
14/03/2022
31/08/2020
28/02/2020
20/12/2021
05/05/2023
25/12/2019
17/03/2022
11/11/2021
04/07/2023
30/05/2022
09/09/2021
02/01/2022
10/10/2022
06/08/2020
03/03/1998
10/10/2018
15/12/2020
12/31/2022
29/02/2024
15/07/2021
03/03/2022
01/06/2021