In [20]:
# Import necessary libraries
import re
import pandas as pd
import spacy

# Load spaCy's English language model
nlp = spacy.load('en_core_web_sm')

# Sample unstructured text
text = """
John Doe, a software engineer at OpenAI, can be reached at john.doe@example.com or (123) 456-7890.
He was born on January 5, 1990, and currently resides in San Francisco.
Jane Smith, working for Google, has the email jane_smith@google.com and phone number 987-654-3210.
Her birthdate is 12/08/1985, and she lives in New York City.
"""

# Function to extract emails
def extract_emails(text):
    email_pattern = r'[\w\.-]+@[\w\.-]+'
    return re.findall(email_pattern, text)

# Function to extract phone numbers
def extract_phone_numbers(text):
    phone_pattern = r'(👦?\d{3}👦?[\s\-]?\d{3}[\s\-]?\d{4})'
    return re.findall(phone_pattern, text)

# Function to extract dates
def extract_dates(text):
    date_pattern = r'(\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\w+\s\d{1,2},\s\d{4}\b)'
    return re.findall(date_pattern, text)

# Function to perform Named Entity Recognition
def extract_entities(text):
    doc = nlp(text)
    entities = {'PERSON': [], 'ORG': [], 'GPE': []}
    for ent in doc.ents:
        if ent.label_ in entities:
            entities[ent.label_].append(ent.text)
    return entities

# Extract information
emails = extract_emails(text)
phones = extract_phone_numbers(text)
dates = extract_dates(text)
entities = extract_entities(text)

# Combine extracted data into a structured format
data = []
for i in range(len(emails)):
    person = entities['PERSON'][i] if i < len(entities['PERSON']) else None
    org = entities['ORG'][i] if i < len(entities['ORG']) else None
    location = entities['GPE'][i] if i < len(entities['GPE']) else None
    birthdate = dates[i] if i < len(dates) else None
    phone = phones[i] if i < len(phones) else None
    email = emails[i] if i < len(emails) else None
    data.append({
        'Name': person,
        'Organization': org,
        'Location': location,
        'Birthdate': birthdate,
        'Phone': phone,
        'Email': email
    })

# Create a DataFrame
df = pd.DataFrame(data)

# Display the structured data
print(df)

         Name Organization       Location        Birthdate         Phone  \
0    John Doe       OpenAI  San Francisco  January 5, 1990  987-654-3210   
1  Jane Smith       Google  New York City       12/08/1985          None   

                   Email  
0   john.doe@example.com  
1  jane_smith@google.com  
