# Quantifiers — * + ? and {}
    abc*        matches a string that has ab followed by zero or more c 
    abc+        matches a string that has ab followed by one or more c
    abc?        matches a string that has ab followed by zero or one c
    abc{2}      matches a string that has ab followed by 2 c
    abc{2,}     matches a string that has ab followed by 2 or more c
    abc{2,5}    matches a string that has ab followed by 2 up to 5 c
    ( )         Group
    []          Matches Characters in brackets  
    a(bc)*      matches a string that has a followed by zero or more copies of the sequence bc
    a(bc){2,5}  matches a string that has a followed by 2 up to 5 copies of the sequence bc

In [1]:
import re

In [3]:
text = """Regular expressions (regex or regexp). 123 are \extremely useful in extracting information 
from @any text by searching for one or more matches of a specific search pattern 
(i.e. a specific sequence of ASCII or unicode characters)."""

In [5]:
to_found = re.compile(r'reg*')
for match in to_found.finditer(text):
    print(match)

<re.Match object; span=(11, 13), match='re'>
<re.Match object; span=(21, 24), match='reg'>
<re.Match object; span=(30, 33), match='reg'>
<re.Match object; span=(44, 46), match='re'>
<re.Match object; span=(51, 53), match='re'>
<re.Match object; span=(133, 135), match='re'>


## finditer()

### Find all substrings where the RE matches, and returns them as an iterator.

In [8]:
to_found = re.compile(r'reg+')
found_regex = [word for word in to_found.finditer(text)]  #list comprehension
found_regex

[<re.Match object; span=(21, 24), match='reg'>,
 <re.Match object; span=(30, 33), match='reg'>]

#### span= is the index position of the matches found

In [9]:
to_found = re.compile(r'reg?')
for match in to_found.finditer(text):
    print(match)

<re.Match object; span=(11, 13), match='re'>
<re.Match object; span=(21, 24), match='reg'>
<re.Match object; span=(30, 33), match='reg'>
<re.Match object; span=(44, 46), match='re'>
<re.Match object; span=(51, 53), match='re'>
<re.Match object; span=(133, 135), match='re'>


###  MetaCharacters are special characters  that have a function when used in regex
###  . ^ $ * + ? { } [ ] \ | ( )


#### !!! so that python know how to distinct the two of them when we want to search for the literal metacharacter we have to use \ before it  !!!

In [17]:
to_found = re.compile(r'specific\.*')  # \. search for a literal .
for match in to_found.finditer(text):
    print(match)

<re.Match object; span=(149, 157), match='specific'>
<re.Match object; span=(182, 190), match='specific'>


In [20]:
to_found = re.compile(r'specific\.+') #because there is no string with 'specific.' returns no match
for match in to_found.finditer(text): 
    print(match)

In [21]:
to_found = re.compile(r'reg{1}')
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(21, 24), match='reg'>
<re.Match object; span=(30, 33), match='reg'>


In [28]:
to_found = re.compile(r'reg{1,}')
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(21, 24), match='reg'>
<re.Match object; span=(30, 33), match='reg'>


In [30]:
to_found = re.compile(r'reg{2}')  # there is no 'regg'  in the text
for match in to_found.finditer(text): 
    print(match)

In [32]:
to_found = re.compile(r'regexp\)*')  # \)   search for a literal )
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(30, 37), match='regexp)'>


In [33]:
to_found = re.compile(r're(ge)*')  # search string containg  're' followed by zero or more 'ge'
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(11, 13), match='re'>
<re.Match object; span=(21, 25), match='rege'>
<re.Match object; span=(30, 34), match='rege'>
<re.Match object; span=(44, 46), match='re'>
<re.Match object; span=(51, 53), match='re'>
<re.Match object; span=(133, 135), match='re'>


In [35]:
to_found = re.compile(r're(ge)+') 
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(21, 25), match='rege'>
<re.Match object; span=(30, 34), match='rege'>


In [41]:
to_found = re.compile(r'e(xt)+') 
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(48, 51), match='ext'>
<re.Match object; span=(68, 71), match='ext'>
<re.Match object; span=(103, 106), match='ext'>


In [42]:
text = """Regular expressions (regex or regexp). 123 are \extremely useful in extracting information 
from @any text by searching for one or more matches of a specific search pattern 
(i.e. a specific sequence of ASCII or unicode characters)."""

In [45]:
to_found = re.compile(r'ec(if)+') ## don't forget the difference between the re methods
print(to_found.search(text))

<re.Match object; span=(151, 155), match='ecif'>


In [43]:
to_found = re.compile(r'ec(if)+') 
for match in to_found.finditer(text): 
    print(match)

<re.Match object; span=(151, 155), match='ecif'>
<re.Match object; span=(184, 188), match='ecif'>
