In [4]:
import re
from typing import Dict, Optional

# Extract from the email body
def extract_email_info(email_text: str) -> Dict[str, Optional[str]]:

    patterns = {
        # Person Name includes First Name, Last Name
        'name': r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]*\.?)*)\s+([A-Z][a-z]+)\b',
        
        'title': r'\b(Dr\.?|Mr\.?|Ms\.?|Mrs\.?|Miss\.?|Prof\.?|Professor\.?|Frau\.?|Herr\.?|Mme\.?|M\.?|Mlle\.?|Sir\.?|Madam\.?)\s*',
        
        # Company Name (Inc, Ltd, Corp, GmbH)
        'company': r'\b([A-Z][A-Za-z\s&.,]+(?:Inc\.?|Ltd\.?|Corp\.?|Co\.?|LLC|Company|Corporation|GmbH))\b',
        
        'position': r'\b(CEO|CTO|CFO|COO|President|Vice President|VP|Director|Manager|Senior|Lead|Principal|Engineer|Developer|Designer|Analyst|Specialist|Coordinator|Assistant|Representative|Sales|Marketing|HR|Human Resources|Chief|Head|Team Lead|Customer Service|Support)\b',

        'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
        
        'phone': r'(?:\+?\d{1,3}[-.\s]?)?(?:\(?\d{2,4}\)?[-.\s]?)?[\d\s.-]{7,15}\d',

        'website': r'(?:https?:\/\/)?(?:www\.)?[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]?\.[a-zA-Z]{2,}(?:\/[^\s]*)?',
    #    'domain_only': r'(?:www\.)?([a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]?\.[a-zA-Z]{2,})'    
    }
    
    result = {
        'first_name': None,
        'last_name': None,
        'title' : None,
        'company_name': None,
        'person_position': None,
        'email_address': None,
        'phone_number': None,
        'website': None
    }

    title_matches = re.findall(patterns['title'], email_text, re.IGNORECASE)
    if title_matches:
        result['title'] = title_matches[0].rstrip('.')  # remove dot
    
    email_matches = re.findall(patterns['email'], email_text, re.IGNORECASE)
    if email_matches:
       result['email_address'] = email_matches[0]
   
    phone_matches = re.findall(patterns['phone'], email_text)
    if phone_matches:
        # the longest match in the phone number
       result['phone_number'] = max(phone_matches, key=len).strip()
    
    name_matches = re.findall(patterns['name'], email_text, re.IGNORECASE)
    if name_matches:
        result['first_name'] = name_matches[0][0]
        result['last_name'] = name_matches[0][1]
    
    company_matches = re.findall(patterns['company'], email_text, re.IGNORECASE)
    if company_matches:
        result['company_name'] = company_matches[0].strip()
    
    position_matches = re.findall(patterns['position'], email_text, re.IGNORECASE)
    if position_matches:
        result['person_position'] = position_matches[0]
    
    website_matches = re.findall(patterns['website'], email_text, re.IGNORECASE)
    if website_matches:
        # URL regularization
        website = website_matches[0]
        if not website.startswith(('http://', 'https://')):
            if website.startswith('www.'):
                website = 'https://' + website
            else:
                website = 'https://www.' + website
        result['website'] = website
    
    return result




In [11]:
extract_email_info(email_text_1)

{'first_name': 'Hey',
 'last_name': 'Dana',
 'title': 'm',
 'company_name': None,
 'person_position': 'support',
 'email_address': None,
 'phone_number': '30 364 286 880',
 'website': 'https://n26.com/en/support/'}

In [12]:
extract_email_info(email_text_2)

{'first_name': 'Hey',
 'last_name': 'folks',
 'title': 'M',
 'company_name': None,
 'person_position': 'Manager',
 'email_address': 'filip@spiced-academy.com',
 'phone_number': '24.06.2025',
 'website': 'https://www.discord.com'}

In [7]:
# Extract from the signature
def extract_from_signature(email_text: str) -> Dict[str, Optional[str]]:
 
    signature_patterns = [
        r'Best,?\s*\n(.*?)$',
        r'Warm,?\s*\n(.*?)$',
        r'Sincere,?\s*\n(.*?)$',
        r'Thanks,?\s*\n(.*?)$',
        r'Regards,?\s*\n(.*?)$',
        r'regards,?\s*\n(.*?)$',
        r'Kind regards,?\s*\n(.*?)$',
        r'--\s*\n(.*?)$',
        r'___+\s*\n(.*?)$',
        r'Mit freundlichen\s*Gr[üu][ßs]en,?\s*\n(.*?)$',
        r'Gru[ßs],?\s*\n(.*?)$' 
    ]
    
    result = {
        'first_name': None,
        'last_name': None,
        'title': None,
        'company_name': None,
        'person_position': None,
        'email_address': None,
        'phone_number': None,
        'website': None,
        'Address': None
    }
    
    signature_text = ""
    for pattern in signature_patterns:
        matches = re.search(pattern, email_text, re.DOTALL | re.IGNORECASE)
        if matches:
            signature_text = matches.group(1)
            break
    
    # If there's no signature, then I want def body function to work 
    if not signature_text:
        return result
    
    patterns = {
        'name': r'^\s*([A-Z][a-zäöüß]+(?:\s+[A-Z][a-zäöüß]*\.?)*)\s+([A-Z][a-zäöüß]+)',
        
        'title': r'\b(Dr\.?|Mr\.?|Ms\.?|Mrs\.?|Miss\.?|Prof\.?|Professor\.?|Frau\.?|Herr\.?|Mme\.?|M\.?|Mlle\.?|Sir\.?|Madam\.?)\s*',
        
        'company': r'\b([A-Z][A-Za-z\s&.,]+(?:Inc\.?|Ltd\.?|Corp\.?|Co\.?|LLC|Company|Corporation|GmbH))\b',
        
        'position': r'\b(CEO|CTO|CFO|COO|President|Vice President|VP|Director|Manager|Senior|Lead|Principal|Engineer|Developer|Designer|Analyst|Specialist|Coordinator|Assistant|Representative|Sales|Marketing|HR|Human Resources|Chief|Head|Team Lead|Customer Service|Support)\b',
        
        'email': r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b',
        
        'phone': r'(?:\+\d{1,3}[-\s]?)?(?:\(\d+\)[-\s]?)?(?:[-\s\./\d]){7,}',
        
        'website': r'(?:https?:\/\/)?(?:www\.)?[a-zA-Z0-9][a-zA-Z0-9-]{0,61}[a-zA-Z0-9]?\.[a-zA-Z]{2,}(?:\/[^\s]*)?',
        
        'address': r'(?:[A-Z][a-zäöüß]+\.?\s*)+\d+[a-z]?(?:[-|,|\s]+\d{5}\s+[A-Z][a-zäöüß]+)'
    }
    
    title_matches = re.findall(patterns['title'], signature_text, re.IGNORECASE)
    if title_matches:
        result['title'] = title_matches[0].rstrip('.')  # remove dot
    
    email_matches = re.findall(patterns['email'], signature_text, re.IGNORECASE)
    if email_matches:
       result['email_address'] = email_matches[0]
   
    phone_matches = re.findall(patterns['phone'], signature_text)
    if phone_matches:
        # the longest match in the phone number
       result['phone_number'] = max(phone_matches, key=len).strip()
    
    name_matches = re.findall(patterns['name'], signature_text, re.IGNORECASE)
    if name_matches:
        result['first_name'] = name_matches[0][0]
        result['last_name'] = name_matches[0][1]
    
    company_matches = re.findall(patterns['company'], signature_text, re.IGNORECASE)
    if company_matches:
        result['company_name'] = company_matches[0].strip()
    
    position_matches = re.findall(patterns['position'], signature_text, re.IGNORECASE)
    if position_matches:
        result['person_position'] = position_matches[0]
    
    website_matches = re.findall(patterns['website'], signature_text, re.IGNORECASE)
    if website_matches:
        # URL regularization
        website = website_matches[0]
        if not website.startswith(('http://', 'https://')):
            if website.startswith('www.'):
                website = 'https://' + website
            else:
                website = 'https://www.' + website
        result['website'] = website

    address_matches = re.findall(patterns['address'], signature_text)
    if address_matches:
        result['address'] = address_matches[0].strip()
        
    return result

In [13]:
extract_from_signature(email_text_1)

{'first_name': None,
 'last_name': None,
 'title': None,
 'company_name': None,
 'person_position': 'Customer Service',
 'email_address': None,
 'phone_number': '+49 (0) 30 364 286 880',
 'website': None,
 'Address': None,
 'address': 'Customer Service \nKlosterstraße 62 | 10179 Berlin'}

In [14]:
extract_from_signature(email_text_2)

{'first_name': 'Filip',
 'last_name': 'Vukovi',
 'title': 'M',
 'company_name': None,
 'person_position': 'Manager',
 'email_address': 'filip@spiced-academy.com',
 'phone_number': '10969',
 'website': 'https://www.spiced-academy.com',
 'Address': None}

In [10]:
email_text_1 = """Hey Dana, 
steps: 

1. Please check if the current shipping address is correct. You can update your shipping address anytime online in your account settings. 

2. Please give us the name that is displayed on the mailbox if the card has to be delivered to your workplace or an address which does not show your name on the mailbox. 

3. Please give us a call or reply to this e-mail, once your shipping address is up-to-date. Afterwards we will re-send your MasterCard to you and you can start enjoying N26 right away. 

4. If we do not receive a reply from you by 31.08.2016, we will assume you are probably not interested in N26 anymore. In that case we will have to cancel your account. 

If you have any further questions, please do not hesitate to contact us. 
Or check out our support center to find answers right away: https://n26.com/en/support/ 

Kind regards, 

Beatrice 
+49 (0) 30 364 286 880 
N26 Customer Service 
Klosterstraße 62 | 10179 Berlin"""

email_text_2 = """Hey folks,

We’re excited to welcome you at Spiced this coming Tuesday, 24.06.2025!

In this email, you'll find all the details you'll need for your first day. For questions or concerns, please do not hesitate to reach out.

Spiced Address:

Ritterstr 12-14, 10969
Berlin Closest U-Bahn Moritzplatz (U8)
We're located to the right as you walk into the Hof.

Mobility
If you are coming to the school by bike you can park it in Hof 4. 

Arrival Time
Please arrive by 9:00 AM so that you can settle in and get some coffee/tea before diving into your first day. We will kick off the day at 9:15 AM with check-ins, a tour and welcome session - then move on to our regular curriculum. We'll finish the day by 6:00 PM.

Please bring the following items:

Laptop & power cord
Adapter (if you are from outside Germany)
20 EUR cash - exact change - as a deposit for your entry key card. You will get this back when you return the key card to us after the course
If you prefer to work with a monitor, you will need a cable to connect your laptop to a monitor (Thunderbolt - HDMI)
Headphones
SPICED provides coffee, tea and milk in our kitchen area. There are kettles, microwaves and fridges for your use.

Your Communication Platform: Discord
We’ll be using Discord as our main platform for announcements, group collaboration, and community chats throughout the Bootcamp.

How to Get Started:
Create a Discord account at discord.com
Join our server 👉 https://discord.gg/TRbqwQv4s7

Set up your profile with your real name and assign yourself the role for your Bootcamp (e.g., Web Dev, UX/UI, Cybersecurity).

Choose your cohort channel and start exploring!

💬 You’ll find channels for updates, Q&A, teamwork, and casual conversations.
🔔 Don’t forget to adjust your notification settings so you stay in the loop without the noise.

For more details on how Discord handles your data, you can check their Privacy Policy

Read through our Student Guidebook. You have already signed the document as part of your student agreement.

And one last thing - please make sure to fill out our Emergency Contact form before the start of the bootcamp.
We are looking forward to meeting you in person! 🌶️
Keep it spicy,

Your Program Team

--
Filip Vuković (he/him)
Program Manager


filip@spiced-academy.com

www.spiced-academy.com

Ritterstrasse 12-14, 10969 Berlin
"""