# Match
This function attempts to match RE pattern to string with optional flags.

Here is the syntax for this function −

<pre>
    re.match(pattern, string, flags=0)
</pre>

In [23]:
import re

line = "Cats are smarter than dogs"

matchObj = re.match( r'(.*) are (.*?) .*', line, re.M|re.I)

if matchObj:
    print("matchObj.group() : ", matchObj.group())
    print("matchObj.group(1) : ", matchObj.group(1))
    print("matchObj.group(2) : ", matchObj.group(2))
else:
    print("No match!!")

matchObj.group() :  Cats are smarter than dogs
matchObj.group(1) :  Cats
matchObj.group(2) :  smarter


# The search Function
This function searches for first occurrence of RE pattern within string with optional flags.

Here is the syntax for this function −
<pre>
    re.search(pattern, string, flags=0)
 </pre>

In [24]:
import re

line = "Cats are smarter than dogs";

searchObj = re.search( r'(.*) are (.*?) .*', line, re.M|re.I)

if searchObj:
    print( "searchObj.group() : ", searchObj.group())
    print("searchObj.group(1) : ", searchObj.group(1))
    print("searchObj.group(2) : ", searchObj.group(2))
else:
    print("Nothing found!!")

searchObj.group() :  Cats are smarter than dogs
searchObj.group(1) :  Cats
searchObj.group(2) :  smarter


# Search and Replace

Syntax
re.sub(pattern, repl, string, max=0)

This method replaces all occurrences of the RE pattern in string with repl, substituting all occurrences unless max provided. This method returns modified string.

In [16]:
#example

In [19]:
phone = "2004-959-559 # This is Phone Number"
# Delete Python-style comments
num = re.sub(r'#.*$', "", phone)
print("Phone Num : ", num)

# Remove anything other than digits
num = re.sub(r'\D', "", phone)    
print("Phone Num : ", num)

Phone Num :  2004-959-559 
Phone Num :  2004959559


# Regular Expression Modifiers: Option Flags
|Sl No.| Modifier | Description |
|---|------|--------------------|
| 1 | re.I | Performs case-insensitive matching. |
| 2 | re.L | Interprets words according to the current locale. |
| 3 | re.M | Makes $ match the end of a line (not just the end of the string) and makes ^ match the start of any line (not just the start of the string).|
| 4 | re.S | Makes a period (dot) match any character, including a newline. |
| 5 | re.U | Interprets letters according to the Unicode character set. This flag affects the behavior of \w, \W, \b, \B. |
| 6 | re.X | Permits "cuter" regular expression syntax. It ignores whitespace (except inside a set [] or when escaped by a backslash) and treats unescaped # as a comment marker. |

# Regular Expression Patterns
Except for control characters, (+ ? . * ^ $ ( ) [ ] { } | \), all characters match themselves. You can escape a control character by preceding it with a backslash.

|Sl No.| Modifier | Description |
|---|------|--------------------|
| 1 | ^ |  |
| 2 | $ |  |
| 3 | . |  |
| 4 | [...] |  |
| 5 | [^...] |  |
| 6 | re* |  |
| 7 | re+ |  |
| 8 | re? |  |
| 9 | re{n} |  |
| 10 | re{n,} |  |
| 11 | re{n,m} |  |
| 12 | a\|b |  |
| 13 | (re) |
| 14 | (?imx) |
| 15 | (?-imx) |
| 16 | (?-re) |
| 17 | (?imx:re) |
| 18 | (?-imx:re) |
| 19 | (?#...) |  |
| 20 | (?=re) |  |
| 21 | (?!re) |  |
| 22 | (?>re |  |
| 23 | \w |  |
| 24 | \W |  | 
| 25 | \s |  |
| 26 | \S |  |
| 27 | \d |  |
| 28 | \D |  |
| 29 | \A |  |
| 30 | \Z |  |
| 31 | \z |  |
| 31 | \G |  |
| 32 | \b |  |
| 33 | \B |  |
| 34 | \n, \t, ect. |  |
| 35 | \1...\9 |  |
| 36 | \10 |  |

# Regular Expression Examples

In [2]:
import re

## Literal characters

In [3]:
re.match("python", "python in NLP")

<re.Match object; span=(0, 6), match='python'>

## Character classes

In [8]:
## Match "Python" or "python

In [6]:
re.match('[Pp]ython',"Python in NLP")

<re.Match object; span=(0, 6), match='Python'>

In [7]:
re.match('[Pp]ython',"python in NLP")

<re.Match object; span=(0, 6), match='python'>

In [13]:
## Match "ruby" or "rube"
re.match("rub[ye]", "rube is a gem stone")

<re.Match object; span=(0, 4), match='rube'>

In [37]:
## Match any one lowercase vowel
re.match("[aeiou]", "emerold is a gem stone")

<re.Match object; span=(0, 1), match='e'>

In [39]:
#Match any digit; same as [0123456789]
re.match("[0-9]", "1st in the game")

<re.Match object; span=(0, 1), match='1'>

In [41]:
#Match any lowercase ASCII letter
re.match("[a-z]", "fantastic in the game")

<re.Match object; span=(0, 1), match='f'>

In [43]:
#Match any uppercase ASCII letter
re.match("[A-Z]", "Fantastic in the game")

<re.Match object; span=(0, 1), match='F'>

In [44]:
#Match any of the above
re.match("[a-zA-Z0-9]", "Fantastic in the game")

<re.Match object; span=(0, 1), match='F'>

In [47]:
#Match anything other than a lowercase vowel
re.match("[^aeiou]", "fantastic in the game")

<re.Match object; span=(0, 1), match='f'>

In [48]:
#Match anything other than a digit
re.match("[^0-9]", "fantastic in the game")

<re.Match object; span=(0, 1), match='f'>

# Special Character Classes

In [49]:
#Match any character except newline
re.match(".", "fantastic in the game")

<re.Match object; span=(0, 1), match='f'>

In [51]:
#Match a digit: [0-9]
re.search("\d", "earth is 3rd planet in solar system")

<re.Match object; span=(9, 10), match='3'>

In [54]:
#Match a nondigit: [^0-9]
re.search("\D", "earth is 3rd planet in solar system")

<re.Match object; span=(0, 1), match='e'>

In [55]:
#Match a whitespace character: [ \t\r\n\f]
re.search("\s", "earth is 3rd planet in solar system")

<re.Match object; span=(5, 6), match=' '>

In [56]:
#Match nonwhitespace: [^ \t\r\n\f]
re.search("\S", "earth is 3rd planet in solar system")

<re.Match object; span=(0, 1), match='e'>

In [57]:
# Match a single word character: [A-Za-z0-9_]
re.search("\w", "earth is 3rd planet in solar system")

<re.Match object; span=(0, 1), match='e'>

In [58]:
# Match a nonword character: [^A-Za-z0-9_]
re.search("\W", "earth is 3rd planet in solar system")

<re.Match object; span=(5, 6), match=' '>

# Repetition Cases

In [63]:
# Match "rub" or "ruby": the y is optional
re.match("ruby?", "ruby is a gem")

<re.Match object; span=(0, 4), match='ruby'>

In [65]:
# Match "rub" or "ruby": the y is optional
print(re.match("ruby*", "rub"))
print(re.match("ruby*", "ruby"))
print(re.match("ruby*", "rubyyyyy"))

print(re.match("ruby*", "ru"))

<re.Match object; span=(0, 3), match='rub'>
<re.Match object; span=(0, 4), match='ruby'>
<re.Match object; span=(0, 8), match='rubyyyyy'>
None


In [67]:
# Match "rub" plus 1 or more ys
print(re.match("ruby+", "rub"))
print(re.match("ruby+", "ruby"))
print(re.match("ruby+", "rubyyyyy"))


None
<re.Match object; span=(0, 4), match='ruby'>
<re.Match object; span=(0, 8), match='rubyyyyy'>


In [70]:
# Match exactly 3 digits
print(re.match("\d{3}", "123"))
print(re.match("\d{3}", "1234"))
print(re.match("\d{3}", "12"))

<re.Match object; span=(0, 3), match='123'>
<re.Match object; span=(0, 3), match='123'>
None


In [71]:
# Match 3 or more digits
print(re.match("\d{3,}", "123"))
print(re.match("\d{3,}", "1234"))
print(re.match("\d{3,}", "12"))

<re.Match object; span=(0, 3), match='123'>
<re.Match object; span=(0, 4), match='1234'>
None


In [72]:
# Match 3, 4, or 5 digits
print(re.match("\d{3,5}", "123"))
print(re.match("\d{3,5}", "1234"))
print(re.match("\d{3,5}", "123456"))
print(re.match("\d{3,5}", "12"))

<re.Match object; span=(0, 3), match='123'>
<re.Match object; span=(0, 4), match='1234'>
<re.Match object; span=(0, 5), match='12345'>
None


# Nongreedy repetition

In [76]:
# Greedy repetition
print(re.match("<.*>", "<python>perl>"))

<re.Match object; span=(0, 13), match='<python>perl>'>


In [77]:
# Nongreedy
print(re.match("<.*?>", "<python>perl>"))

<re.Match object; span=(0, 8), match='<python>'>


# Alternatives

In [84]:
print(re.match(r"rub(y|le)", "ruby"))
print(re.match(r"rub(y|le)", "ruble"))
print(re.match(r"rub(y|le)", "rube"))

<re.Match object; span=(0, 4), match='ruby'>
<re.Match object; span=(0, 5), match='ruble'>
None


In [88]:
# "Python" followed by one or more ! or one ?
print(re.match(r"Python(!+|\?)", "Python!!!"))

<re.Match object; span=(0, 9), match='Python!!!'>


# Anchors

In [96]:
# Match "Python" at the start of a string or internal line
print(re.match("^Python", "Python"))
print(re.match("\APython", "Python"))
print(re.match("\AJupyter", "python"))

<re.Match object; span=(0, 6), match='Python'>
<re.Match object; span=(0, 6), match='Python'>
None


In [97]:
# Match "Python" at the start of a string or internal line
print(re.match("Python$", "Python"))
print(re.match("Python\Z", "Python"))
print(re.match("Jupyter\Z", "python"))

<re.Match object; span=(0, 6), match='Python'>
<re.Match object; span=(0, 6), match='Python'>
None


In [92]:
# Match "Python", if followed by an exclamation point.
print(re.match("Python(?=!)", "Python"))
print(re.match("Python(?=!)", "Python!"))

None
<re.Match object; span=(0, 6), match='Python'>


In [91]:
# Match "Python", if not followed by an exclamation point.
print(re.match("Python(?!!)", "Python"))
print(re.match("Python(?!!)", "Python!"))

<re.Match object; span=(0, 6), match='Python'>
None


> End of Program