In [28]:
import pandas as pd
import re

# Function to extract domain from email address
def extract_domain(email):
    return re.search(r"@([\w.-]+)", email).group(1)

# Function to perform keyword matching
def keyword_match(email, domain, keywords):
    for keyword in keywords:
        if (keyword.lower() in email.lower()) or (keyword.lower() in domain.lower()):
            return keyword
    return None

# Read Excel file and validate email addresses
def process_excel_file(file_path):
    # Read keywords from the second sheet
    keywords_df = pd.read_excel(file_path, sheet_name='Keywords for matching', header=None)
    keywords_str = ' '.join(keywords_df.iloc[1:].values.flatten().tolist())
    keywords = [word.strip("'") for word in keywords_str.split(', ')]

    # Read emails from the first sheet
    emails_df = pd.read_excel(file_path, sheet_name='ALL EMAILS', header=None)
    emails = emails_df.iloc[1:].values.flatten().tolist()

    # Initialize lists to store extracted data
    data = []

    # Extract domains and perform keyword matching
    for email in emails:
        domain = extract_domain(email)
        keyword = keyword_match(email, domain, keywords)

        if keyword:
            data.append([keyword, domain, email])

    # Create DataFrame
    df = pd.DataFrame(data, columns=['Keyword', 'Domain', 'Email'])

    return df

# Example usage
file_path = "/content/100 Emails.xlsx"
result_df = process_excel_file(file_path)

# Print the result DataFrame in table format
print("Result:")
print(result_df.to_string(index=False))


Result:
Keyword                     Domain                                  Email
    cap             altariscap.com              aaron.bell@altariscap.com
capital         maranoncapital.com     aaron.simkovich@maranoncapital.com
    cap             collidecap.com                   aaron@collidecap.com
capital bostoncommunitycapital.org    abeattie@bostoncommunitycapital.org
capital            baincapital.com                  aberg@baincapital.com
   fund               caifunds.com                    ablack@caifunds.com
    cap                 novacap.ca                acappabianca@novacap.ca
capital           kaynecapital.com               acathey@kaynecapital.com
capital     victoryparkcapital.com achandrasekhara@victoryparkcapital.com
   fund                serafund.co                       adam@serafund.co
     gp            stgpartners.com                   adam@stgpartners.com
    cap             suncappart.com                 aharman@suncappart.com
capital          capitalagroup