# **Part 3 – Advanced (Lookarounds, Backreferences, Performance) – 30 Tasks**

# Task 61: Match numbers only if they are followed by "kg"

In [1]:
import re

text = "The weight is 50kg and the height is 180cm."
numbers = re.findall(r"\d+(?=kg)", text)

print(f"Numbers followed by 'kg': {numbers}")

Numbers followed by 'kg': ['50']


# Task 62: Match numbers only if they are not followed by "kg"

In [2]:
import re

text = "The weight is 50kg and the height is 180cm."
numbers = re.findall(r"\d+(?!kg)", text)

print(f"Numbers not followed by 'kg': {numbers}")

Numbers not followed by 'kg': ['5', '180']


# Task 63: Extract words only if they are followed by a comma

In [3]:
import re

text = "apple, banana, orange, grape"
words = re.findall(r"\b\w+(?=,)", text)

print(f"Words followed by a comma: {words}")

Words followed by a comma: ['apple', 'banana', 'orange']


# Task 64: Extract words only if they are preceded by #

In [4]:
import re

text = "I love #Python and #Regex."
words = re.findall(r"(?<=#)\w+", text)

print(f"Words preceded by a hash: {words}")

Words preceded by a hash: ['Python', 'Regex']


# Task 65: Match "Python" only if it’s not followed by "3"

In [5]:
import re

text = "I prefer Python over Python3."
word = re.search(r"Python(?!3)", text)

if word:
    print(f"Matched word: {word.group(0)}")

Matched word: Python


# Task 66: Find overlapping matches of "aba" in "ababa"

In [6]:
import re

text = "ababa"
matches = re.findall(r"(?=aba)", text)

print(f"Overlapping matches found: {len(matches)}")

Overlapping matches found: 2


# Task 67: Match numbers between 100 and 999

In [7]:
import re

text = "10, 50, 150, 999, 1000"
numbers = re.findall(r"\b\d{3}\b", text)

print(f"Numbers between 100 and 999: {numbers}")

Numbers between 100 and 999: ['150', '999']


# Task 68: Match floating-point numbers with optional sign (+/-)

In [8]:
import re

text = "The temperature is +25.5 and -10 degrees."
numbers = re.findall(r"[+-]?\d+\.?\d*", text)

print(f"Floating-point numbers found: {numbers}")

Floating-point numbers found: ['+25.5', '-10']


# Task 69: Match strings without the letter "e"

In [9]:
import re

text = "The quick brown fox jumps over the lazy dog."
words = re.findall(r"\b[^eE\s]+\b", text)

print(f"Words without the letter 'e': {words}")

Words without the letter 'e': ['quick', 'brown', 'fox', 'jumps', 'lazy', 'dog']


# Task 70: Validate IPv6 addresses

In [10]:
import re

text1 = "2001:0db8:85a3:0000:0000:8a2e:0370:7334"
text2 = "FE80:0000:0000:0000:0202:B3FF:FE1E:8329"
text3 = "This is not an IP address."

pattern = r"([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}"

print(f"'{text1}' is a valid IPv6: {bool(re.fullmatch(pattern, text1))}")
print(f"'{text2}' is a valid IPv6: {bool(re.fullmatch(pattern, text2))}")
print(f"'{text3}' is a valid IPv6: {bool(re.fullmatch(pattern, text3))}")

'2001:0db8:85a3:0000:0000:8a2e:0370:7334' is a valid IPv6: True
'FE80:0000:0000:0000:0202:B3FF:FE1E:8329' is a valid IPv6: True
'This is not an IP address.' is a valid IPv6: False


# Task 71: Match HTML opening tags without closing tags

In [11]:
import re

text = "<div><p>This is a paragraph.</p><span>This is a span.<div>"
# Matches any opening tag that is not followed by a closing tag with the same name.
pattern = r"<(\w+)[^>]*>(?!.*<\/\1>)"
invalid_tags = re.findall(pattern, text)

print(f"Opening tags without a closing tag: {invalid_tags}")

Opening tags without a closing tag: ['div', 'span', 'div']


# Task 72: Match a repeated word using a backreference

In [12]:
import re

text = "This is is a test test."
repeated_words = re.findall(r"\b(\w+)\s+\1\b", text)

print(f"Repeated words found: {repeated_words}")

Repeated words found: ['is', 'test']


# Task 73: Match palindromes of 3 letters

In [13]:
import re

text = "madam, mom, dad, refer, level, aba, abc"
palindromes = re.findall(r"\b(\w)\w\1\b", text)

print(f"Three-letter palindromes found: {palindromes}")

Three-letter palindromes found: ['m', 'd', 'a']


# Task 74: Extract all filenames without extensions

In [14]:
import re

text = "file.txt file2.pdf image.jpeg"
filenames = re.findall(r"(\w+)(?=\.\w+)", text)

print(f"Filenames without extensions: {filenames}")

Filenames without extensions: ['file', 'file2', 'image']


# Task 75: Validate credit card numbers (simple pattern)

In [15]:
import re

text1 = "1234-5678-9012-3456"
text2 = "12345-6789-0123-4567"

pattern = r"^\d{4}-\d{4}-\d{4}-\d{4}$"

print(f"'{text1}' is a valid credit card: {bool(re.fullmatch(pattern, text1))}")
print(f"'{text2}' is a valid credit card: {bool(re.fullmatch(pattern, text2))}")

'1234-5678-9012-3456' is a valid credit card: True
'12345-6789-0123-4567' is a valid credit card: False


# Task 76: Match only words with alternating vowels and consonants.

In [16]:
import re

text = "rhythm, team, code, regular"
pattern = r"\b((?:[aeiou][^aeiou])+|[^aeiou][aeiou])+\b"
words = re.findall(pattern, text, re.IGNORECASE)

print(f"Words with alternating vowels and consonants: {words}")

Words with alternating vowels and consonants: ['am', 'de']


# Task 77: Validate a URL

In [17]:
import re

text1 = "https://www.google.com"
text2 = "ftp://example.co.uk/path"
text3 = "This is not a URL."

pattern = r"^(https?|ftp):\/\/[^\s\/$.?#].[^\s]*$"

print(f"'{text1}' is a valid URL: {bool(re.fullmatch(pattern, text1))}")
print(f"'{text2}' is a valid URL: {bool(re.fullmatch(pattern, text2))}")
print(f"'{text3}' is a valid URL: {bool(re.fullmatch(pattern, text3))}")

'https://www.google.com' is a valid URL: True
'ftp://example.co.uk/path' is a valid URL: True
'This is not a URL.' is a valid URL: False


# Task 78: Match all words longer than 10 characters

In [18]:
import re

text = "This is a very long word, and anotherwordisprettybig."
long_words = re.findall(r"\b\w{11,}\b", text)

print(f"Words longer than 10 characters: {long_words}")

Words longer than 10 characters: ['anotherwordisprettybig']


# Task 79: Match words with at least 3 vowels

In [19]:
import re

text = "beautiful, aeon, cat, regular, expression"
pattern = r"\b(?=\w*([aeiouAEIOU]\w*){3})\w+\b"
words_with_vowels = re.findall(pattern, text)

print(f"Words with at least 3 vowels: {words_with_vowels}")

Words with at least 3 vowels: ['ul', 'on', 'ar', 'on']


# Task 80: Match numbers divisible by 5 (ending with 0 or 5)

In [20]:
import re

text = "10, 25, 33, 40, 55, 101"
numbers = re.findall(r"\b\d*[05]\b", text)

print(f"Numbers divisible by 5: {numbers}")

Numbers divisible by 5: ['10', '25', '40', '55']


# Task 81: Match text between two specific words

In [21]:
import re

text = "start This is the text in between end"
match = re.search(r"(?<=start\s).*(?=\send)", text)

if match:
    print(f"Text between 'start' and 'end': '{match.group(0)}'")

Text between 'start' and 'end': 'This is the text in between'


# Task 82: Find words containing double letters

In [22]:
import re

text = "Hello, I am looking for a word with double letters."
words = re.findall(r"\b\w*(\w)\1\w*\b", text)

print(f"Words with double letters: {words}")

Words with double letters: ['l', 'o', 't']


# Task 83: Match numbers with commas (e.g., 1,000, 20,345)

In [23]:
import re

text = "Sales figures: 1,000, 20,345, 1234567, 987,654"
numbers = re.findall(r"\b\d{1,3}(?:,\d{3})*\b", text)

print(f"Numbers with commas: {numbers}")

Numbers with commas: ['1,000', '20,345', '987,654']


# Task 84: Match Roman numerals

In [24]:
import re

text = "I, II, IV, VI, VIII, IX, X, C, D, M"
pattern = r"\bM{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})\b"
roman_numerals = re.findall(pattern, text, re.IGNORECASE)

print(f"Roman numerals found: {roman_numerals}")

Roman numerals found: [('', '', 'I'), ('', '', ''), ('', '', 'II'), ('', '', ''), ('', '', 'IV'), ('', '', ''), ('', '', 'VI'), ('', '', ''), ('', '', 'VIII'), ('', '', ''), ('', '', 'IX'), ('', '', ''), ('', 'X', ''), ('', '', ''), ('C', '', ''), ('', '', ''), ('D', '', ''), ('', '', ''), ('', '', ''), ('', '', '')]


# Task 86: Match a repeated sequence of characters like "abcabc"

In [25]:
import re

text = "abcabc is a pattern, but abc is not."
match = re.search(r"(\w{3})\1", text)

if match:
    print(f"Repeated sequence found: {match.group(0)}")

Repeated sequence found: abcabc


# Task 87: Match binary numbers containing only 0 and 1

In [26]:
import re

text = "Binary numbers: 10101, 1100, 202"
binary_numbers = re.findall(r"\b[01]+\b", text)

print(f"Binary numbers found: {binary_numbers}")

Binary numbers found: ['10101', '1100']


# Task 88: Validate MAC addresses

In [27]:
import re

text1 = "01-23-45-67-89-ab"
text2 = "01:23:45:67:89:AB"
text3 = "01:23:45:67:89:G"

pattern = r"^([0-9a-fA-F]{2}[-:]){5}[0-9a-fA-F]{2}$"

print(f"'{text1}' is a valid MAC address: {bool(re.fullmatch(pattern, text1))}")
print(f"'{text2}' is a valid MAC address: {bool(re.fullmatch(pattern, text2))}")
print(f"'{text3}' is a valid MAC address: {bool(re.fullmatch(pattern, text3))}")

'01-23-45-67-89-ab' is a valid MAC address: True
'01:23:45:67:89:AB' is a valid MAC address: True
'01:23:45:67:89:G' is a valid MAC address: False


# Task 89: Extract the first and last name from "John Doe"

In [28]:
import re

text = "John Doe"
match = re.search(r"^(\w+)\s(\w+)$", text)

if match:
    first_name = match.group(1)
    last_name = match.group(2)
    print(f"First Name: {first_name}")
    print(f"Last Name: {last_name}")

First Name: John
Last Name: Doe


# Task 90: Match all HTML comments.

In [29]:
import re

text = "<p>Some text</p> "
comments = re.findall(r"", text, re.DOTALL)

print(f"HTML comments found: {comments}")

HTML comments found: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
