In [1]:
import pandas as pd
import re

# Load the previously created CSV file with filtered and categorized emails
file_path = '/content/68b1acd44f393_Sample_Support_Emails_Dataset (2).csv'
try:
    df = pd.read_csv(file_path)
except FileNotFoundError:
    print(f"File not found: {file_path}. Please make sure you have run the previous code cell to create this file.")
    exit()

# Define regular expressions for extracting information
# This regex looks for patterns that resemble a phone number
phone_regex = r'\b(\+\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b'
# This regex looks for a standard email address format
email_regex = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'

# Function to extract information using regex
def extract_info(text, regex):
    if pd.isna(text):
        return None
    found_info = re.findall(regex, str(text))
    return ', '.join(found_info) if found_info else None

# Apply the extraction functions to the 'body' column
df['extracted_phone'] = df['body'].apply(lambda x: extract_info(x, phone_regex))
df['extracted_email'] = df['body'].apply(lambda x: extract_info(x, email_regex))

# Display the updated DataFrame with the new columns
print("\nDataFrame with extracted information:")
print(df[['sender', 'subject', 'body', 'extracted_phone', 'extracted_email']].to_markdown(index=False, numalign="left", stralign="left"))

# Save the updated DataFrame to a new CSV file
df.to_csv('emails_with_extracted_info.csv', index=False)


DataFrame with extracted information:
| sender              | subject                                    | body                                                                                                       | extracted_phone   | extracted_email   |
|:--------------------|:-------------------------------------------|:-----------------------------------------------------------------------------------------------------------|:------------------|:------------------|
| eve@startup.io      | Help required with account verification    | Do you support integration with third-party APIs? Specifically, I’m looking for CRM integration options.   |                   |                   |
| diana@client.co     | General query about subscription           | Hi team, I am unable to log into my account since yesterday. Could you please help me resolve this issue?  |                   |                   |
| eve@startup.io      | Immediate support needed for billing error | Hello, I wanted to u