### Handling Unstructured Data with Python
**Description**: Extract structured data from unstructured text using Python.

**Steps**:
1. Load and analyze an unstructured text document.
2. Extract information using regex.

In [1]:
import re
import unittest

def extract_name(text):
    match = re.search(r"Patient\s([A-Za-z\s]+),", text)
    return match.group(1) if match else None

def extract_age(text):
    match = re.search(r"aged\s(\d+)", text)
    return int(match.group(1)) if match else None

def extract_dates(text):
    return re.findall(r"\d{4}-\d{2}-\d{2}", text)

def extract_phone(text):
    match = re.search(r"\d{3}-\d{3}-\d{4}", text)
    return match.group(0) if match else None

def extract_email(text):
    match = re.search(r"[\w\.-]+@[\w\.-]+", text)
    return match.group(0) if match else None


# Example usage
if __name__ == "__main__":
    sample_text = """
    Patient John Doe, aged 45, visited on 2025-05-20 complaining of headache and fever.
    Contact: 555-123-4567. Email: john.doe@example.com.
    Next appointment: 2025-06-15.
    """

    print("Extracted Information:")
    print("Name:", extract_name(sample_text))
    print("Age:", extract_age(sample_text))
    print("Dates:", extract_dates(sample_text))
    print("Phone:", extract_phone(sample_text))
    print("Email:", extract_email(sample_text))


# Unit Tests
class TestDataExtraction(unittest.TestCase):

    def setUp(self):
        self.text = """
        Patient Alice Smith, aged 30, checked in on 2023-11-10.
        Contact: 123-456-7890. Email: alice.smith@mail.com.
        Follow-up: 2023-12-01.
        """

    def test_extract_name(self):
        self.assertEqual(extract_name(self.text), "Alice Smith")
    
    def test_extract_age(self):
        self.assertEqual(extract_age(self.text), 30)
    
    def test_extract_dates(self):
        self.assertEqual(extract_dates(self.text), ["2023-11-10", "2023-12-01"])
    
    def test_extract_phone(self):
        self.assertEqual(extract_phone(self.text), "123-456-7890")
    
    def test_extract_email(self):
        self.assertEqual(extract_email(self.text), "alice.smith@mail.com")

    def test_missing_name(self):
        self.assertIsNone(extract_name("No patient info here"))
    
    def test_missing_age(self):
        self.assertIsNone(extract_age("No age info here"))


if __name__ == "__main__":
    unittest.main()

Extracted Information:
Name: John Doe
Age: 45
Dates: ['2025-05-20', '2025-06-15']
Phone: 555-123-4567
Email: john.doe@example.com.


usage: ipykernel_launcher.py [-h] [-v] [-q] [--locals] [-f] [-c] [-b]
                             [-k TESTNAMEPATTERNS]
                             [tests ...]
ipykernel_launcher.py: error: argument -f/--failfast: ignored explicit argument '/home/vscode/.local/share/jupyter/runtime/kernel-v30881c06868a19006d2478930e1b1f478f6488559.json'


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
