Write a Python function using regular expressions to extract all email addresses from a given string. Test it with the input: 'Contact us at support@example.com and sales@example.org.'


In [4]:
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import re
import requests
from bs4 import BeautifulSoup

# Download necessary datasets (only required once)
nltk.download('punkt')

def clean_text(text):
    # Remove special characters and convert to lowercase
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', text).lower()
    return cleaned_text

def extract_emails(text):
    # Regular expression pattern for extracting email addresses
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
    return re.findall(email_pattern, text)

def fetch_webpage_title(url):
    # Fetch the webpage content
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup.title.string if soup.title else 'No title found'

# Sample paragraph
paragraph = "Natural Language Processing (NLP) is a fascinating field of Artificial Intelligence. It enables computers to understand and process human language. Applications of NLP include chatbots, translation, and sentiment analysis."

# Tokenizing into sentences
sentences = sent_tokenize(paragraph)
print("Sentences:")
print(sentences)

# Tokenizing into words
words = word_tokenize(paragraph)
print("\nWords:")
print(words)

# Test the clean_text function
test_text = 'Hello, World! Welcome to NLP 101.'
cleaned_test_text = clean_text(test_text)
print("\nCleaned Text:")
print(cleaned_test_text)

# Test the extract_emails function
test_email_text = 'Contact us at support@example.com and sales@example.org.'
extracted_emails = extract_emails(test_email_text)
print("\nExtracted Emails:")
print(extracted_emails)

# Test the fetch_webpage_title function
test_url = 'https://example.com'
webpage_title = fetch_webpage_title(test_url)
print("\nWebpage Title:")
print(webpage_title)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lokes\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Sentences:
['Natural Language Processing (NLP) is a fascinating field of Artificial Intelligence.', 'It enables computers to understand and process human language.', 'Applications of NLP include chatbots, translation, and sentiment analysis.']

Words:
['Natural', 'Language', 'Processing', '(', 'NLP', ')', 'is', 'a', 'fascinating', 'field', 'of', 'Artificial', 'Intelligence', '.', 'It', 'enables', 'computers', 'to', 'understand', 'and', 'process', 'human', 'language', '.', 'Applications', 'of', 'NLP', 'include', 'chatbots', ',', 'translation', ',', 'and', 'sentiment', 'analysis', '.']

Cleaned Text:
hello world welcome to nlp 101

Extracted Emails:
['support@example.com', 'sales@example.org']

Webpage Title:
Example Domain
