In [None]:
# Regular Expression
# Regular expressions (regex) in Python are used to match patterns in strings.
# Useful for tasks like validation, searching, or substitution.

In [1]:
import re

In [2]:
# Introduction

# 1. re.search() - checks for a match anywhere in the string
match = re.search(r"\d+", "Captain Cool's number is 7.")
print(match.group())

# 2. re.match() - checks for a match at the beginning
match = re.match(r"\d+", "7th July is Mahi's Birthday")
print(match.group())

# 3. re.findall() - returns all matches in a list
numbers = re.findall(r"\d+", "Captain Cool's Birthday is 7-7-1981")
print(numbers)

# 4. re.sub() - replaces matches
clean_text = re.sub(r"\d+", "#", "ABC: 2025, XYZ: 2026")
print(clean_text)

# 5. re.split() - splits string by the pattern
words = re.split(r"\s+", "Mahendra Singh Dhoni")
print(words)  # Output: ['Split', 'this', 'sentence', 'into', 'words']

7
7
['7', '7', '1981']
ABC: #, XYZ: #
['Mahendra', 'Singh', 'Dhoni']


In [None]:
# Sequence Characters in Regular Expressions
'''
| Sequence | Description                         | Example Match        |
|----------|-------------------------------------|----------------------|
| `\d`     | Any digit (0–9)                     | `'4'`, `'0'`         |
| `\D`     | Any non-digit                       | `'a'`, `'#'`         |
| `\w`     | Any alphanumeric character
              (a–z, A–Z, 0–9, _) | `'A'`,
              `'9'`, `'_'` |
| `\W`     | Any non-word character              | `'@'`, `'!'`         |
| `\s`     | Any whitespace character            | `' '`, `'\n'`        |
| `\S`     | Any non-whitespace character        | `'a'`, `'1'`         |
| `\b`     | Word boundary                       | Matches boundary     |
| `\B`     | Non-word boundary                   | Matches inside words |
| `\\`     | Backslash character                 | `'\\'`               |
| `\t`     | Tab                                 | Tab space            |
| `\n`     | Newline                             | Newline              |
| `.`      | Any character except newline        | `'a'`, `'1'`, etc.   |
'''

In [3]:
print("=== Sequence Characters in Regular Expressions ===")

# \d => matches any digit (0-9)
print("Digits:", re.findall(r'\d', "mahi7781"))
# ['7', '7', '8', '1']

# \D => matches any non-digit
print("Non-digits:", re.findall(r'\D', "mahi7781"))
# ['m', 'a', 'h', 'i']

# \w => matches any alphanumeric character or underscore (a-z, A-Z, 0-9, _)
print("Word characters (alphanumeric or _):", re.findall(r'\w', "dhoni@7"))
# ['d', 'h', 'o', 'n', 'i', '7']

# \W => matches any non-word character
print("Non-word characters (symbols):", re.findall(r'\W', "dhoni@7"))
# ['@']

# \s => matches any whitespace character (space, tab, newline)
print("Whitespace characters (tab, space, newline):", re.findall(r'\s', "Dhoni\tZiva \nCSK"))
# ['\t', ' ', '\n']

# \S => matches any non-whitespace character
# All visible characters excluding tab, space, newline
print("Non-whitespace characters:", re.findall(r'\S', "Dhoni\tZiva \nCSK"))
# ['D', 'h', 'o', 'n', 'i', 'Z', 'i', 'v', 'a', 'C', 'S', 'K']

# \b => matches word boundaries (start or end of a word)
print("Word boundary before 'MSD':", re.findall(r'\bMSD', "hi there, this is MSD"))
# ['MSD']

# \B => matches inside a word (no boundary before 'MSD')
print("No boundary before 'MSD':", re.findall(r'\BMSD', "hi there, this is CaptainCoolMSDhoni."))
# ['MSD']

# \\ => matches a backslash
print("Backslashes:", re.findall(r'\\', "C:\\Users\\Mahi"))
# ['\\', '\\']

# \t => matches tab character
print("Tab character:", re.findall(r'\t', "Hello\tWorld"))
# ['\t']

# \n => matches newline character
print("Newline character:", re.findall(r'\n', "Captain\nCool"))
# ['\n']

# . => matches any character except newline
print("Any character between 'a' and 'b':", re.findall(r'a.b', "a1b a_b a b aXb"))
# ['a1b', 'a_b', 'aXb']

=== Sequence Characters in Regular Expressions ===
Digits: ['7', '7', '8', '1']
Non-digits: ['m', 'a', 'h', 'i']
Word characters (alphanumeric or _): ['d', 'h', 'o', 'n', 'i', '7']
Non-word characters (symbols): ['@']
Whitespace characters (tab, space, newline): ['\t', ' ', '\n']
Non-whitespace characters: ['D', 'h', 'o', 'n', 'i', 'Z', 'i', 'v', 'a', 'C', 'S', 'K']
Word boundary before 'MSD': ['MSD']
No boundary before 'MSD': ['MSD']
Backslashes: ['\\', '\\']
Tab character: ['\t']
Newline character: ['\n']
Any character between 'a' and 'b': ['a1b', 'a_b', 'a b', 'aXb']


In [None]:
# Quantifiers in Regular Expressions
'''
| Quantifier |        Description           |              Example Match             |
|------------|------------------------------|----------------------------------------|
| `*`        | 0 or more times              | `'ab*'` → `'a'`, `'ab'`, `'abbb'`      |
| `+`        | 1 or more times              | `'ab+'` → `'ab'`, `'abbb'`             |
| `?`        | 0 or 1 time                  | `'ab?'` → `'a'`, `'ab'`                |
| `{n}`      | Exactly `n` times            | `'a{3}'` → `'aaa'`                     |
| `{n,}`     | At least `n` times           | `'a{2,}'` → `'aa'`, `'aaa'`            |
| `{n,m}`    | Between `n` and `m` times    | `'a{2,4}'` → `'aa'`, `'aaa'`, `'aaaa'` |
'''

In [4]:
print("=== Quantifiers in Regular Expressions ===")

# * => 0 or more of the preceding character
print("'*' (0 or more 'b's after 'a'):", re.findall(r'ab*', "a ab abb abbb a"))
# ['a', 'ab', 'abb', 'abbb', 'a']

# + => 1 or more of the preceding character
print("'+' (1 or more 'b's after 'a'):", re.findall(r'ab+', "a ab abb abbb a"))
# ['ab', 'abb', 'abbb']

# ? => 0 or 1 of the preceding character
print("'?' (0 or 1 'b' after 'a'):", re.findall(r'ab?', "a ab abb abbb a"))
# ['a', 'ab', 'ab', 'ab', 'a']

# {n} => exactly n times
print("'{3}' (exactly 3 'a's):", re.findall(r'a{3}', "a aa aaa aaaa aaaaa"))
# ['aaa', 'aaa']

# {n,} => at least n times
print("'{2,}' (2 or more 'a's):", re.findall(r'a{2,}', "a aa aaa aaaa aaaaa"))
# ['aa', 'aaa', 'aaaa', 'aaaaa']

# {n,m} => between n and m times (inclusive)
print("'{2,4}' (between 2 and 4 'a's):", re.findall(r'a{2,4}', "a aa aaa aaaa aaaaa"))
# ['aa', 'aaa', 'aaaa']

=== Quantifiers in Regular Expressions ===
'*' (0 or more 'b's after 'a'): ['a', 'ab', 'abb', 'abbb', 'a']
'+' (1 or more 'b's after 'a'): ['ab', 'abb', 'abbb']
'?' (0 or 1 'b' after 'a'): ['a', 'ab', 'ab', 'ab', 'a']
'{3}' (exactly 3 'a's): ['aaa', 'aaa', 'aaa']
'{2,}' (2 or more 'a's): ['aa', 'aaa', 'aaaa', 'aaaaa']
'{2,4}' (between 2 and 4 'a's): ['aa', 'aaa', 'aaaa', 'aaaa']


In [None]:
# Methods in re Module
'''
| Method        |                Description                                |
|---------------|-----------------------------------------------------------|
| `findall()`   | Returns all non-overlapping matches as a list             |
| `finditer()`  | Returns an iterator yielding match objects                |
| `search()`    | Returns the first match object or `None`                  |
| `match()`     | Checks for a match at the beginning of a string           |
| `sub()`       | Replaces matches with a string                            |
| `compile()`   | Compiles a regex pattern into a regex object              |
'''

In [5]:
print("=== Regex Methods in Python ===")

# findall() => Returns all non-overlapping matches as a list
print("findall():", re.findall(r'\d+', "Mahi7781 and MSD07"))
# ['7781', '07']

# finditer() => Returns an iterator of match objects
print("finditer():")
for match in re.finditer(r'\d+', "Mahi7781 and MSD07"):
    print(" Match found:", match.group(), "at position", match.start())

# search() => Returns the first match object or None
result = re.search(r'MSD', "Mahi7781 and MSD07")
print("search():", result.group() if result else "Not Found")
# MSD

# match() => Checks for a match only at the beginning of the string
result = re.match(r'Mahi', "Mahi7781 and MSD07")
print("match():", result.group() if result else "No Match")
# Mahi

# sub() => Replaces all matches with a specified string
print("sub():", re.sub(r'\d+', '###', "Mahi7781 and MSD07"))
# Mahi### and MSD###

# compile() => Compiles a pattern into a regex object for reuse
pattern = re.compile(r'\b\w{4}\b')  # words with exactly 4 letters
print("compile():", pattern.findall("Mahi loves Ziva and CSK team"))
# ['Mahi', 'Ziva', 'team']

=== Regex Methods in Python ===
findall(): ['7781', '07']
finditer():
 Match found: 7781 at position 4
 Match found: 07 at position 16
search(): MSD
match(): Mahi
sub(): Mahi### and MSD###
compile(): ['Mahi', 'Ziva', 'team']


In [None]:
# Special Characters in Regular Expressions
'''
| Character |             Description                    |
|-----------|--------------------------------------------|
| `.`       | Matches any character except newline       |
| `^`       | Matches the beginning of the string        |
| `$`       | Matches the end of the string              |
| `*`       | 0 or more repetitions                      |
| `+`       | 1 or more repetitions                      |
| `?`       | 0 or 1 repetition                          |
| `{}`      | Specifies the number of repetitions        |
| `[]`      | Matches any one character in brackets      |
| `[^]`     | Matches any character *not* in brackets    |
| `|`       | Acts as OR (`a|b` matches `a` or `b`)      |
| `()`      | Groups patterns                            |
| `\`       | Escapes special characters                 |
'''

In [6]:
print("=== Special Characters in Regular Expressions ===")

# . => Matches any character except newline
print("'.' (any character except newline):", re.findall(r'a.b', "a1b a_b a b aXb"))
# ['a1b', 'a_b', 'aXb']

# ^ => Matches the beginning of the string
print("'^' (start of string):", re.findall(r'^Mahi', "Mahi is MSD"))
# ['Mahi']

# $ => Matches the end of the string
print("'$' (end of string):", re.findall(r'MSD$', "He is MSD"))
# ['MSD']

# * => 0 or more repetitions
print("'*' (zero or more 'b' after 'a'):", re.findall(r'ab*', "a ab abb abbb a"))
# ['a', 'ab', 'abb', 'abbb', 'a']

# + => 1 or more repetitions
print("'+' (one or more 'b' after 'a'):", re.findall(r'ab+', "a ab abb abbb a"))
# ['ab', 'abb', 'abbb']

# ? => 0 or 1 repetition
print("'?' (zero or one 'b' after 'a'):", re.findall(r'ab?', "a ab abb abbb a"))
# ['a', 'ab', 'ab', 'ab', 'a']

# {} => Specifies the number of repetitions
print("'{}' (exactly 3 a's):", re.findall(r'a{3}', "a aa aaa aaaa"))
# ['aaa', 'aaa']

# [] => Matches any one character in brackets
print("'[]' (match 'a', 'b' or 'c'):", re.findall(r'[abc]', "xaybzcp"))
# ['a', 'b', 'c']

# [^] => Matches any character not in brackets
print("'[^]' (not 'a', 'b', or 'c'):", re.findall(r'[^abc]', "xaybzcp"))
# ['x', 'y', 'z', 'p']

# | => Acts as OR
print("'|' (a or b):", re.findall(r'a|b', "xaybzcp"))
# ['a', 'b']

# () => Groups patterns
print("'()' (group 'ab' repeated):", re.findall(r'(ab)+', "ab abab ababab ab"))
# ['ab', 'ab', 'ab', 'ab']

# \ => Escapes special characters
print("'\\' (escape dot to match actual .):", re.findall(r'MSD\.7', "He is MSD.7 not MSDX7"))
# ['MSD.7']

=== Special Characters in Regular Expressions ===
'.' (any character except newline): ['a1b', 'a_b', 'a b', 'aXb']
'^' (start of string): ['Mahi']
'$' (end of string): ['MSD']
'*' (zero or more 'b' after 'a'): ['a', 'ab', 'abb', 'abbb', 'a']
'+' (one or more 'b' after 'a'): ['ab', 'abb', 'abbb']
'?' (zero or one 'b' after 'a'): ['a', 'ab', 'ab', 'ab', 'a']
'{}' (exactly 3 a's): ['aaa', 'aaa']
'[]' (match 'a', 'b' or 'c'): ['a', 'b', 'c']
'[^]' (not 'a', 'b', or 'c'): ['x', 'y', 'z', 'p']
'|' (a or b): ['a', 'b']
'()' (group 'ab' repeated): ['ab', 'ab', 'ab', 'ab']
'\' (escape dot to match actual .): ['MSD.7']


In [7]:
# Program 1 - Email Validation

email = "msdhoni7781@gmail.com"
pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'

if re.match(pattern, email):
    print("Valid Email")
else:
    print("Invalid Email")

Valid Email


In [None]:
# pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'

'''
^ → Start of string
[\w\.-]+ → One or more of (word characters, dot or hyphen) → before @
@ → The @ symbol
[\w\.-]+ → One or more of (word characters, dot or hyphen) → domain
\. → A literal dot (escaped)
\w+ → One or more word characters (for domain suffix like com)
$ → End of string
'''

In [8]:
# Program 2 - Extract Phone Numbers from Text

text = "Call me at 9876543210 or at the office 0422-12345678."
pattern = r'\b(?:\d{10}|\d{3,4}-\d{6,8})\b'

print("Phone Numbers Found:", re.findall(pattern, text))

Phone Numbers Found: ['9876543210', '0422-12345678']


In [None]:
# pattern = r'\b(?:\d{10}|\d{3,4}-\d{6,8})\b'
'''
\b → Word boundary (ensures exact match, not inside another word)
(?:...) → Non-capturing group (choose one format)
\d{10} → 10 digits (mobile number)
| → OR
\d{3,4}-\d{6,8} → Landline format like o422-12345678
\b → Word boundary
'''

In [9]:
# Program 3 - Check Strong Password (at least 1 uppercase, lowercase, digit, special char, 8+ chars)

password = "MSD@CaptainCool7781"

pattern = r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@#$%^&+=!]).{8,}$'

if re.match(pattern, password):
    print("Strong Password")
else:
    print("Weak Password")


Strong Password


In [None]:
# pattern = r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@#$%^&+=!]).{8,}$'

'''
^ → Start of string
(?=.*[a-z]) → At least one lowercase letter
(?=.*[A-Z]) → At least one uppercase letter
(?=.*\d) → At least one digit
(?=.*[@#$%^&+=!]) → At least one special character
.{8,} → Minimum 8 characters of any kind
$ → End of string
'''

In [10]:
# Program 4 - Extract Hashtags from a Social Media Post

post = "The loudest cheer, from the closest hearts! 💛 #CSK #WhistlePodu #Yellove #MSDhoni🦁💛"
hashtags = re.findall(r'#\w+', post)

print("Hashtags:", hashtags)

Hashtags: ['#CSK', '#WhistlePodu', '#Yellove', '#MSDhoni']


In [None]:
# pattern = r'#\w+'

'''
# → The hashtag symbol
\w+ → One or more word characters (letters, numbers, underscore)
'''

In [11]:
# Program 5 - Validate Indian Vehicle Number Plate Format

plate = "TN-07-MS-7781"
pattern = r'^[A-Z]{2}-\d{2}-[A-Z]{1,2}-\d{4}$'

if re.match(pattern, plate):
    print("Valid Vehicle Number")
else:
    print("Invalid Number")

Valid Vehicle Number


In [None]:
# pattern = r'^[A-Z]{2}-\d{2}-[A-Z]{1,2}-\d{4}$'

'''
^[A-Z]{2} → 2 capital letters (state code)
- → Hyphen
\d{2} → 2 digits (district code)
- → Hyphen
[A-Z]{1,2} → 1 or 2 capital letters (series)
- → Hyphen
\d{4} → 4 digits
$ → End of string
'''

In [12]:
# PRogram 6 - Remove HTML Tags

html = "<h1>Welcome to Master Coding from Scratch! </h1><p>Today we're discussing about Regula Expressions in Python.</p>"
clean = re.sub(r'<.*?>', '', html)

print("Clean Text:", clean)

Clean Text: Welcome to Master Coding from Scratch! Today we're discussing about Regula Expressions in Python.


In [None]:
# pattern = r'<.*?>'

'''
< → Opening angle bracket
.*? → Match anything (non-greedy)
> → Closing bracket
→ Matches anything like <h1>, <p> etc.
'''

In [13]:
# Program 7 - Extract All Dates in DD-MM-YYYY Format

text = "Birthdays: 06-02-2015, 07-07-1981."
dates = re.findall(r'\b\d{2}-\d{2}-\d{4}\b', text)

print("Dates Found:", dates)

Dates Found: ['06-02-2015', '07-07-1981']


In [None]:
# pattern = r'\b\d{2}-\d{2}-\d{4}\b'

'''
\b → Word boundary
\d{2} → Two digits (day)
- → Hyphen
\d{2} → Two digits (month)
- → Hyphen
\d{4} → Four digits (year)
\b → Word boundary
'''

In [None]:
# Application
# Resume Parser
'''
You are tasked with building a Resume Parser that automatically extracts key details from a candidate's resume.
The input will be the text of the resume, and the output should include the following information:
Name of the candidate
Email address
Phone number
Skills listed in the resume
Education level or degree
Years of experience in the field

Requirements:
Name Extraction: Extract the candidate’s name, which may include a first name, middle name, and last name.
Email Validation: Ensure the email address is valid and extract it from the resume text.
Phone Number Extraction: Support different phone number formats, including international numbers with country codes,
area codes with parentheses, and different separators (spaces, dashes).
Skills Extraction: Extract the skills mentioned in the resume (such as programming languages, frameworks, or tools) from a predefined list.
Education Extraction: Extract the candidate’s educational background (e.g., "Bachelor", "Master", "PhD").
Experience Extraction: Extract the candidate’s work experience, specifying the number of years of experience (e.g., "5 years" or "3 yrs").

Assumptions:
The resume will be provided in plain text format.
The skills list can include common programming languages and frameworks (e.g., Python, Java, SQL, React, Node.js).
The phone number and email will be in standard formats but may vary in delimiters or spacing.
The name may include a first, middle, and last name.
'''

In [14]:
import re

def parse_resume(resume_text):
    # Regex for matching name (handles first, middle, last names)
    name_pattern = r'\b([A-Z][a-z]+(?: [A-Z][a-z]+)*)\b'
    name_match = re.search(name_pattern, resume_text)
    name = name_match.group(0) if name_match else "Name not found"

    # Regex for matching email with more accurate format
    email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'
    email_match = re.search(email_pattern, resume_text)
    email = email_match.group(0) if email_match else "Email not found"

    # Regex for matching phone numbers (handles different formats)
    phone_pattern = r'\b(?:\+?\d{1,2}\s?)?(\(?\d{3}\)?[\s-]?)?[\d{7}]+(?:[\s-]?\d{1,4})?\b'
    phone_match = re.search(phone_pattern, resume_text)
    phone = phone_match.group(0) if phone_match else "Phone number not found"

    # Regex for matching skills (expanded list)
    skills_pattern = r'\b(HTML|CSS|JavaScript|Python|Java|SQL|C\+\+|PHP|React|Node\.js|Ruby|Go|Swift|R|TypeScript|Django|C#|Scala|Kotlin|Rust)\b'
    skills_matches = re.findall(skills_pattern, resume_text)
    skills = ', '.join(skills_matches) if skills_matches else "Skills not found"

    # Regex for matching education (simple matching for degree names)
    education_pattern = r'\b(Bachelor|Master|Ph\.?D|Diploma)\b'
    education_match = re.search(education_pattern, resume_text)
    education = education_match.group(0) if education_match else "Education not found"

    # Regex for matching years of experience (simple matching for years like "5 years")
    experience_pattern = r'\b(\d{1,2})\s?(year|yr|yrs|years)\b'
    experience_match = re.search(experience_pattern, resume_text)
    experience = experience_match.group(0) if experience_match else "Experience not found"

    return {
        "Name": name,
        "Email": email,
        "Phone": phone,
        "Skills": skills,
        "Education": education,
        "Experience": experience
    }

# Sample resume text
resume_text = """
MS Dhoni
Email: mahi7781@gmail.com
Phone: 12345 67890
Skills: Python, Java, SQL, HTML, CSS, JavaScript
Education: Bachelor of Science in Computer Science
Experience: 5 years in software development
"""

# Parse the resume
parsed_info = parse_resume(resume_text)

# Display parsed information
print("Parsed Resume Information:")
for key, value in parsed_info.items(): # We'll discuss dict soon!
    print(f"{key}: {value}")


Parsed Resume Information:
Name: Dhoni
Email: mahi7781@gmail.com
Phone: 12345 67890
Skills: Python, Java, SQL, HTML, CSS, JavaScript
Education: Bachelor
Experience: 5 years


In [None]:
'''
Name Pattern (name_pattern)
r'\b([A-Z][a-z]+(?: [A-Z][a-z]+)*)\b'

\b: Word boundary. This ensures that the match is a complete word, and we don't match part of another word.
[A-Z]: Matches a single uppercase letter (for the first letter of the name).
[a-z]+: Matches one or more lowercase letters (the rest of the name).
(?: [A-Z][a-z]+)*: This part matches middle names or last names:
(?: ... ): A non-capturing group. It groups the pattern but doesn't create a separate match for it.
[A-Z][a-z]+: Matches a space followed by a capital letter and then lowercase letters (i.e., another part of the name).
The * after this allows it to repeat, so it can handle multiple names (e.g., "John Doe" or "John Michael Doe").
\b: Word boundary again to ensure the match ends after the name.
Example match: "MS Dhoni".
----------------------------------------------------------------------------------------------------------------------
2. Email Pattern (email_pattern)
r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b'

\b: Word boundary, ensuring the email is matched as a whole.
[A-Za-z0-9._%+-]+: Matches the username part of the email, which can include:
Letters (A-Z and a-z), digits (0-9), underscores (_), periods (.), percent signs (%), pluses (+), and hyphens (-).
The + means that one or more of these characters can be used.
@: Matches the @ symbol that separates the username and domain.
[A-Za-z0-9.-]+: Matches the domain name part, which can contain letters, digits, dots, or hyphens.
\.: Matches the dot (.) before the domain extension.
[A-Z|a-z]{2,7}: Matches the domain extension, like .com, .org, .net. It allows 2 to 7 characters (case-insensitive).
\b: Word boundary, ensuring the email is a whole match.
Example match: "mahi7781@gmail.com".
----------------------------------------------------------------------------------------------------------------------
3. Phone Number Pattern (phone_pattern)
r'\b(?:\+?\d{1,2}\s?)?(\(?\d{3}\)?[\s-]?)?[\d{7}]+(?:[\s-]?\d{1,4})?\b'

\b: Word boundary to ensure we match a complete phone number.
(?:\+?\d{1,2}\s?)?: Matches an optional country code (e.g., +1, +91). It can have:
\+?: An optional plus sign (+).
\d{1,2}: One or two digits for the country code.
\s?: An optional space.
The entire country code pattern is optional because of the ? at the end.
(\(?\d{3}\)?[\s-]?)?: Matches the area code:
\(?\d{3}\)?: Matches the area code, which may be enclosed in parentheses (e.g., (123)).
[\s-]?: Optionally matches a space or hyphen after the area code.
[\d{7}]+: Matches the main 7 digits of the phone number.
(?:[\s-]?\d{1,4})?: Matches an optional extension (e.g., 1234), allowing for space or hyphen between the main digits and the extension.
\b: Word boundary to ensure the phone number is matched fully.
Example match: "12345 67890" or "+1 123-456-7890".
----------------------------------------------------------------------------------------------------------------------
4. Skills Pattern (skills_pattern)
r'\b(HTML|CSS|JavaScript|Python|Java|SQL|C\+\+|PHP|React|Node\.js|Ruby|Go|Swift|R|TypeScript|Django|C#|Scala|Kotlin|Rust)\b'

\b: Word boundary to ensure that we match complete skills.
(HTML|CSS|JavaScript|Python|...): This part lists all the skills you're interested in.
The | acts as an OR operator, meaning it will match any of the skills listed.
For example, it will match "Python", "Java", "React", etc.
\b: Word boundary again to ensure the skill name is complete.
Example match: "Python", "Java", "SQL", "React".
----------------------------------------------------------------------------------------------------------------------
5. Education Pattern (education_pattern)
r'\b(Bachelor|Master|Ph\.?D|Diploma)\b'

\b: Word boundary to ensure it's a full word match.
(Bachelor|Master|Ph\.?D|Diploma): Matches one of the educational degrees listed:
Bachelor, Master, Ph\.?D (with or without a period), and Diploma.
The \ is used to escape the dot in "Ph.D." to make sure it's treated as a literal period, not a wildcard.
\b: Word boundary to end the match.
Example match: "Bachelor", "Master", "Ph.D.".
----------------------------------------------------------------------------------------------------------------------
6. Experience Pattern (experience_pattern)
r'\b(\d{1,2})\s?(year|yr|yrs|years)\b'

\b: Word boundary to ensure a complete match.
(\d{1,2}): Matches one or two digits for the number of years of experience.
\s?: Optionally matches a space between the number and the word "year" or "years".
(year|yr|yrs|years): Matches the word "year", "yr", "yrs", or "years".
\b: Word boundary to end the match.
Example match: "5 years", "3 yrs", "1 year".
'''