### Handling Unstructured Data with Python
**Description**: Extract structured data from unstructured text using Python.

**Steps**:
1. Load and analyze an unstructured text document.
2. Extract information using regex.

In [None]:
import re
import pandas as pd

# Unstructured text block
unstructured_text = """
Order ID: 23984
Customer: John Doe
Email: john.doe@example.com
Date: April 4, 2024
Product: Wireless Mouse - Model WXM123
Feedback: The mouse works great, but sometimes the scroll wheel gets stuck.
Contact: +1-234-567-8900

Order ID: 23985
Customer: Alice Johnson
Email: alice.j@example.com
Date: April 5, 2024
Product: Mechanical Keyboard - Model KBD456
Feedback: Keys are very responsive. Love the RGB lights!
Contact: +1-678-910-1122
"""

# Function to extract structured data from unstructured text
def extract_structured_data(text_block):
    pattern_dict = {
        "Order ID": r"Order ID:\s*(\d+)",
        "Customer": r"Customer:\s*(.+)",
        "Email": r"Email:\s*(.+)",
        "Date": r"Date:\s*(.+)",
        "Product": r"Product:\s*(.+)",
        "Feedback": r"Feedback:\s*(.+)",
        "Contact": r"Contact:\s*(.+)",
    }
    
    orders = text_block.strip().split("\n\n")
    extracted_data = []

    for order in orders:
        order_data = {}
        for key, pattern in pattern_dict.items():
            try:
                match = re.search(pattern, order)
                order_data[key] = match.group(1).strip() if match else None  # Handle missing fields
            except Exception as e:
                print(f"Error extracting {key}: {e}")
                order_data[key] = None  # In case of error, set as None
        extracted_data.append(order_data)

    # Return as a pandas DataFrame for easier reading
    return pd.DataFrame(extracted_data)

# Extract and display structured data as DataFrame
df_structured = extract_structured_data(unstructured_text)

# Display the structured data
df_structured



test_empty_input (__main__.TestDataExtraction) ... FAIL
test_malformed_text (__main__.TestDataExtraction) ... FAIL
test_missing_customer (__main__.TestDataExtraction) ... ok
test_regular_data (__main__.TestDataExtraction) ... ok

FAIL: test_empty_input (__main__.TestDataExtraction)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_673/386450541.py", line 79, in test_empty_input
    self.assertEqual(df.shape[0], 0)  # No orders should be extracted
AssertionError: 1 != 0

FAIL: test_malformed_text (__main__.TestDataExtraction)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_673/386450541.py", line 87, in test_malformed_text
    self.assertIsNone(df.iloc[0]['Email'])  # Email should be None due to malformed format
AssertionError: 'broken_email_format' is not None

--------------------------------------------------------------------