In [16]:
from typing import Optional
from datetime import datetime, timezone
import re

In [17]:
def parse_date(text: str) -> Optional[datetime]:
    """
    Parse a date from text using multiple formats.
    
    Args:
        text: Text containing a date
        
    Returns:
        Datetime object or None if parsing fails
    """
    try:
        # Clean the input text
        text = text.strip()
        # Remove any parentheses and their contents
        text = re.sub(r'\([^)]*\)', '', text)
        
        # Common date formats to try
        date_formats = [
            "%Y-%m-%d",           # 2025-07-31
            "%B %d, %Y",          # July 31, 2025
            "%d %B %Y",           # 31 July 2025
            "%B, %Y",             # July, 2025
            "%Y/%m/%d",           # 2025/07/31
            "%d/%m/%Y",           # 31/07/2025
            "%d-%m-%Y",           # 31-07-2025
            "%Y.%m.%d",           # 2025.07.31
            "%d.%m.%Y",           # 31.07.2025
            "%Y %B %d",           # 2025 July 31
            "%b %d, %Y",          # Jul 31, 2025
            "%d %b %Y",           # 31 Jul 2025
        ]
        
        # First try to find any date-like patterns in the text
        date_patterns = [
            r'\d{4}-\d{2}-\d{2}',                    # YYYY-MM-DD
            r'\d{2}/\d{2}/\d{4}',                    # DD/MM/YYYY
            r'\d{4}/\d{2}/\d{2}',                    # YYYY/MM/DD
            r'[A-Za-z]+\s+\d{1,2},\s+\d{4}',        # Month DD, YYYY
            r'\d{1,2}\s+[A-Za-z]+\s+\d{4}',         # DD Month YYYY
            r'[A-Za-z]+,\s+\d{4}',                   # Month, YYYY
            r'\d{4}\s+[A-Za-z]+\s+\d{1,2}',         # YYYY Month DD
        ]
        
        # Try to find a date pattern in the text
        for pattern in date_patterns:
            match = re.search(pattern, text)
            if match:
                date_str = match.group()
                # Try parsing with each format
                for date_format in date_formats:
                    try:
                        parsed_date = datetime.strptime(text, date_format)
                        return parsed_date.replace(tzinfo=timezone.utc)
                    except ValueError:
                        continue
                        
        # If no pattern matched, try direct parsing with formats
        for date_format in date_formats:
            try:
                return datetime.strptime(text, date_format)
            except ValueError:
                continue
                
        return None
    except Exception as e:
        logger.error(f"Error parsing date: {e}")
        return None

In [11]:
parse_date("July 31, 2025 (some comment)")

datetime.datetime(2025, 7, 31, 0, 0)

In [15]:
# List of test cases for various date formats
test_dates = [
    "2025-07-31",              # Format: YYYY-MM-DD
    "July 31, 2025 will be end date",           # Format: Month DD, YYYY
    "31 July 2025",            # Format: DD Month YYYY
    "the end date will be July, 2025",              # Format: Month, YYYY
    "2025/07/31",              # Format: YYYY/MM/DD
    "31/07/2025",              # Format: DD/MM/YYYY
    "31-07-2025",              # Format: DD-MM-YYYY
    "2025.07.31",              # Format: YYYY.MM.DD
    "31.07.2025",              # Format: DD.MM.YYYY
    "2025 July 31",            # Format: YYYY Month DD
    "Jul 31, 2025",            # Format: Abbreviated Month DD, YYYY
    "31 Jul 2025",             # Format: DD Abbreviated Month YYYY
    "Some random text 2025-07-31 inside",  # With parentheses and extra text
    "Random text without date",               # No date
    "2025-07-01 extra",        # With extra characters
    "Not a date at all",       # No valid date
    "2025, August",            # Wrong format, not parsed
]

# Loop through each test case and parse the date
for i, text in enumerate(test_dates):
    parsed_date = parse_date(text)
    print(f"Test Case {i + 1}: {text} => Parsed Date: {parsed_date}")

Test Case 1: 2025-07-31 => Parsed Date: 2025-07-31 00:00:00
Test Case 2: July 31, 2025 will be end date => Parsed Date: 2025-07-31 00:00:00
Test Case 3: 31 July 2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 4: the end date will be July, 2025 => Parsed Date: 2025-07-01 00:00:00
Test Case 5: 2025/07/31 => Parsed Date: 2025-07-31 00:00:00
Test Case 6: 31/07/2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 7: 31-07-2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 8: 2025.07.31 => Parsed Date: 2025-07-31 00:00:00
Test Case 9: 31.07.2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 10: 2025 July 31 => Parsed Date: 2025-07-31 00:00:00
Test Case 11: Jul 31, 2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 12: 31 Jul 2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 13: Some random text 2025-07-31 inside => Parsed Date: 2025-07-31 00:00:00
Test Case 14: Random text without date => Parsed Date: None
Test Case 15: 2025-07-01 extra => Parsed Date: 2025-07-01 00:00:00
Test Case 16: Not

In [18]:
# List of test cases for various date formats
test_dates = [
    "2025-07-31",              # Format: YYYY-MM-DD
    "July 31, 2025 will be end date",           # Format: Month DD, YYYY
    "31 July 2025",            # Format: DD Month YYYY
    "the end date will be July, 2025",              # Format: Month, YYYY
    "2025/07/31",              # Format: YYYY/MM/DD
    "31/07/2025",              # Format: DD/MM/YYYY
    "31-07-2025",              # Format: DD-MM-YYYY
    "2025.07.31",              # Format: YYYY.MM.DD
    "31.07.2025",              # Format: DD.MM.YYYY
    "2025 July 31",            # Format: YYYY Month DD
    "Jul 31, 2025",            # Format: Abbreviated Month DD, YYYY
    "31 Jul 2025",             # Format: DD Abbreviated Month YYYY
    "Some random text 2025-07-31 inside",  # With parentheses and extra text
    "Random text without date",               # No date
    "2025-07-01 extra",        # With extra characters
    "Not a date at all",       # No valid date
    "2025, August",            # Wrong format, not parsed
]

# Loop through each test case and parse the date
for i, text in enumerate(test_dates):
    parsed_date = parse_date(text)
    print(f"Test Case {i + 1}: {text} => Parsed Date: {parsed_date}")

Test Case 1: 2025-07-31 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 2: July 31, 2025 will be end date => Parsed Date: None
Test Case 3: 31 July 2025 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 4: the end date will be July, 2025 => Parsed Date: None
Test Case 5: 2025/07/31 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 6: 31/07/2025 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 7: 31-07-2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 8: 2025.07.31 => Parsed Date: 2025-07-31 00:00:00
Test Case 9: 31.07.2025 => Parsed Date: 2025-07-31 00:00:00
Test Case 10: 2025 July 31 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 11: Jul 31, 2025 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 12: 31 Jul 2025 => Parsed Date: 2025-07-31 00:00:00+00:00
Test Case 13: Some random text 2025-07-31 inside => Parsed Date: None
Test Case 14: Random text without date => Parsed Date: None
Test Case 15: 2025-07-01 extra => Parsed Date: None
Test Case 16: Not a date at all => 

In [20]:
# Test cases with random timestamps to check UTC conversion
test_dates = [
    "2025-07-31 12:34:56",            # Random timestamp in YYYY-MM-DD HH:MM:SS
    "March 15, 2024 08:23:45",        # Random timestamp in Month DD, YYYY HH:MM:SS
    "21 September 2023 15:12:34",     # Random timestamp in DD Month YYYY HH:MM:SS
    "2023/11/22 19:45:30",            # Random timestamp in YYYY/MM/DD HH:MM:SS
    "10/15/2022 01:30:59",            # Random timestamp in MM/DD/YYYY HH:MM:SS
    "December, 2021",                 # Random timestamp (no time, just month and year)
    "2025-02-10",                     # Random date (no time)
]

# Loop through each test case and parse the date
for i, text in enumerate(test_dates):
    parsed_date = parse_date(text)
    
    if parsed_date:
        print(f"Test Case {i + 1}: {text} => Parsed Date: {parsed_date}")
    else:
        print(f"Test Case {i + 1}: {text} => No valid date parsed")

Test Case 1: 2025-07-31 12:34:56 => No valid date parsed
Test Case 2: March 15, 2024 08:23:45 => No valid date parsed
Test Case 3: 21 September 2023 15:12:34 => No valid date parsed
Test Case 4: 2023/11/22 19:45:30 => No valid date parsed
Test Case 5: 10/15/2022 01:30:59 => No valid date parsed
Test Case 6: December, 2021 => Parsed Date: 2021-12-01 00:00:00+00:00
Test Case 7: 2025-02-10 => Parsed Date: 2025-02-10 00:00:00+00:00


In [21]:
from pydantic import BaseModel

In [30]:
class Item(BaseModel):
    name: str
    price: float
    in_stock: bool = True

# Validating input
data = {
    "name": "Laptop",
    "price": 59999.99,
    "in_stock": False
}

In [31]:
items = Item(**data)
print(f"Item: {items.name}, Price: {items.price}, In Stock: {items.in_stock}")

Item: Laptop, Price: 59999.99, In Stock: False
