In [1]:
import re
import numpy as np

text = 'Contact us at support@example.com or sales@company.org for assistance. For personal inquiries, email john.doe123@university.edu. '

emails = re.findall(r"[\w\.-]+@[\w\.-]+\.\w+", text)

emails_np = np.array(emails)

print("Extracted Emails:", emails_np.tolist())


Extracted Emails: ['support@example.com', 'sales@company.org', 'john.doe123@university.edu']


In [2]:
text = 'Valid: 123-456-7890, 987-654-3210 Invalid: 12-345-67890, 1234567890, 123-45-6789'

candidates = re.findall(r"[\d-]+", text)

valid_pattern = re.compile(r"^\d{3}-\d{3}-\d{4}$")
valid_numbers = [num for num in candidates if valid_pattern.match(num)]

valid_numbers_np = np.array(valid_numbers)

print("Valid Phone Numbers:", valid_numbers_np.tolist())


Valid Phone Numbers: ['123-456-7890', '987-654-3210']


In [3]:
text = 'Important dates: 25/12/2023, 01-01-2024, 31/05/2023, and 15-10-2024.'

dates = re.findall(r"\b\d{2}[/-]\d{2}[/-]\d{4}\b", text)

dates_np = np.array(dates)

print("Extracted Dates:", dates_np.tolist())


Extracted Dates: ['25/12/2023', '01-01-2024', '31/05/2023', '15-10-2024']


In [4]:
text = "The the quick brown fox jumps over the the lazy dog."

repeats = re.findall(r"\b(\w+)\s+\1\b", text, flags=re.IGNORECASE)

repeats_np = np.array(repeats)

print("Repeated Words:", repeats_np.tolist())


Repeated Words: ['The', 'the']


In [5]:
text = "Check out our new products: #Sale2024, #NewArrival, and #Discounts!"

hashtags = re.findall(r"#\w+", text)

hashtags_np = np.array(hashtags)

print("Extracted Hashtags:", hashtags_np.tolist())


Extracted Hashtags: ['#Sale2024', '#NewArrival', '#Discounts']


In [6]:
passwords = ["Password123", "Secure456", "weak", "password", "Password"]

pattern = re.compile(r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d).{8,}$")

valid_passwords = [pw for pw in passwords if pattern.match(pw)]
valid_passwords_np = np.array(valid_passwords)

print("Valid Passwords:", valid_passwords_np.tolist())


Valid Passwords: ['Password123', 'Secure456']


In [7]:
text = "Visit our website at https://www.example.com or check out http://blog.example.org for updates."

urls = re.findall(r"https?://[^\s]+", text)

urls_np = np.array(urls)

print("Extracted URLs:", urls_np.tolist())


Extracted URLs: ['https://www.example.com', 'http://blog.example.org']


In [8]:
text = "This   text    has   multiple    spaces."

cleaned_text = re.sub(r"\s+", " ", text)

print("Original Text:", text)
print("Cleaned Text:", cleaned_text)


Original Text: This   text    has   multiple    spaces.
Cleaned Text: This text has multiple spaces.


In [9]:
text = 'He said, "Hello, world!" and she replied, "Hi there!"'

quoted_text = re.findall(r'"([^"]+)"', text)

quoted_text_np = np.array(quoted_text)

print("Quoted Texts:", quoted_text_np.tolist())


Quoted Texts: ['Hello, world!', 'Hi there!']


In [10]:
text = """
Valid: 192.168.1.1, 10.0.0.255
Invalid: 256.1.2.3, 192.168.01.1, 192.168.1
"""

ip_pattern = re.compile(r"\b((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\.){3}"
                        r"(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)\b")

valid_ips = ip_pattern.findall(text)

valid_ips_clean = [match[0] for match in valid_ips]

valid_ips_np = np.array(valid_ips_clean)

print("Valid IP Addresses:", valid_ips_np.tolist())


Valid IP Addresses: ['1.', '0.']
