In [78]:
import re

### 1. Meta Characters : Metacharacters are characters with a special meaning

In [79]:
txt = "The rain in Spain"

re.search("^The.*Spain$", txt)

<re.Match object; span=(0, 17), match='The rain in Spain'>

In [80]:
re.findall("[a-p]", txt)

['h', 'e', 'a', 'i', 'n', 'i', 'n', 'p', 'a', 'i', 'n']

In [81]:
s = "ramu is 1 cricke0ter in team 789"
re.findall("\d", s)  #\d is used to find digits or special sequence

['1', '0', '7', '8', '9']

In [82]:
# any characters in between
re.findall("c.*r",s)

['cricke0ter']

In [83]:
# starts with
re.findall("^ramu.*", s)

['ramu is 1 cricke0ter in team 789']

In [84]:
re.search("^r",s)

<re.Match object; span=(0, 1), match='r'>

In [85]:
re.findall("^r.*9$",s)

['ramu is 1 cricke0ter in team 789']

In [86]:
# one or more occurances '+'

re.findall("^r.+",s)

['ramu is 1 cricke0ter in team 789']

In [87]:
# Zero or one occurrences '?'

re.findall("^r.?", s)

['ra']

In [88]:
# Exactly the specified number of occurrences '{}'

re.findall('^r.{3}', s)

['ramu']

In [89]:
# Either or '|'
s1 = "Ramesh is Good boy"
re.findall("Good | Bad", s1)

['Good ']

### 2. Special sequences: A special sequence is a \ followed by one of the characters in the list below, and has a special meaning

In [90]:
# "\A"  Returns a match if the specified characters are at the beginning of the string

re.findall("\Aram", s)

['ram']

In [91]:
# "\b"  Returns a match where the specified characters are at the beginning or at the end of a word
s2 = "ram naam ram satya hai ram"
re.findall(r"\bram", s2)

['ram', 'ram', 'ram']

In [92]:
re.findall(r"\bam", s2)

[]

### "\B" Returns a match where the specified characters are present, but NOT at the beginning (or at the end) of a word

In [93]:
re.findall("\Baam", s2)

['aam']

In [94]:
re.findall("\Bam", s2)

['am', 'am', 'am', 'am']

In [95]:
# "\d" Returns a match where the string contain digits (0 - 9)

# "\D" Returns a match where the string DOES NOT contain digits

s3 = "Hello Ji"
s4 = "Hello 911 Help!"

re.findall("\D", s3)

['H', 'e', 'l', 'l', 'o', ' ', 'J', 'i']

In [96]:
# "\s" Returns a match where the string contain a white space character

# "\S" Returns a match where the string DOES NOT contain a white space character

s5 = "Tathastu_vatsaa"

re.findall("\s", s4)

[' ', ' ']

In [97]:
re.findall("\d", s4)

['9', '1', '1']

In [98]:
re.findall("\S", s5)

['T', 'a', 't', 'h', 'a', 's', 't', 'u', '_', 'v', 'a', 't', 's', 'a', 'a']

In [99]:
# "\w" Returns a match where the string contains any word characters 
#(characters from a to Z, digits from 0-9, and the underscore _ character)

# "\W" Returns a match where the string DOES NOT contain any word characters

s6 = "Ram is 1 _name"
s7 = "### wq"
re.findall("\w", s6)

['R', 'a', 'm', 'i', 's', '1', '_', 'n', 'a', 'm', 'e']

In [100]:
re.findall("\W", s7) # it detects space and special characters

['#', '#', '#', ' ']

In [101]:
# "\Z"  Returns a match if the specified characters are at the end of the string

s8 = "Curry is GOAT"
re.findall("GOAT\Z", s8)

['GOAT']

### 3. Sets : A set is a set of characters inside a pair of square brackets [] with a special meaning 

In [102]:
# [arn]  Returns a match where one of the specified characters (a, r, or n) is present
#(you can write anything in place of arn)
s10 = "Who am i to question your efforts"

re.findall("[Wiqs]", s10)

['W', 'i', 'q', 's', 'i', 's']

In [103]:
# [a-n]  Returns a match for any lower case character, alphabetically between a and n
# (you can write anything in place of a-n)

re.findall("[a-n]", s10)

['h', 'a', 'm', 'i', 'e', 'i', 'n', 'e', 'f', 'f']

In [104]:
# [^arn]  Returns a match for any character EXCEPT a, r, and n
#(you can write anything in place of arn)
s11 = "abronil is passed"
re.findall("[^abrns]", s11)

['o', 'i', 'l', ' ', 'i', ' ', 'p', 'e', 'd']

In [105]:
# [0123]  Returns a match where any of the specified digits (0, 1, 2, or 3) are present
# (you can write anything in place of 0123)
s12 = "My rank is 134021 in JEE"

re.findall("[0142]", s12)

['1', '4', '0', '2', '1']

In [106]:
# [0-9] Returns a match for any digit between 0 and 9
# (you can write anything in place of 0-9)

re.findall("[0-2]", s12)

['1', '0', '2', '1']

In [107]:
# [0-5][0-9] Returns a match for any two-digit numbers from 00 and 59

# (you can write anything in place of 0-5 and 0-9)

s13 = "my roll no is 45, day of birth is 30 and i scored 90%"

re.findall("[0-5][0-9]", s13)

['45', '30']

In [108]:
# [a-zA-Z]  Returns a match for any character alphabetically between a and z, lower case OR upper case

re.findall("[a-zA-Z]", s12)

['M', 'y', 'r', 'a', 'n', 'k', 'i', 's', 'i', 'n', 'J', 'E', 'E']

In [109]:
# [+] In sets, +, *, ., |, (), $,{} has no special meaning, so [+] means: 
# return a match for any + character in the string
s14 = "1 + (2  * 9)"

re.findall("[*]", s14)

['*']

### 1. The findall() function returns a list containing all matches.

**See above examples**

1. Matches are returned in order they are found
2. If no match is there then empty list is returned

### 2. The search() function searches the string for a match, and returns a Match object if there is a match. 

1. If there is more than one match, only the first occurrence of the match will be returned
2. If no matches are found, the value None is returned

In [110]:
txt = "The rain in Spain"

re.search("^The.*Spain$", txt)

<re.Match object; span=(0, 17), match='The rain in Spain'>

In [111]:
txt = "It rains in Shimla"

print(re.search("^The.*Spain$", txt))

None


### 3. The split() function returns a list where the string has been split at each match

In [112]:
s15 = "Neeraj Chopra is an Indian track and field athlete."

re.split("\s", s15)

['Neeraj', 'Chopra', 'is', 'an', 'Indian', 'track', 'and', 'field', 'athlete.']

**You can control the number of occurrences by specifying the maxsplit parameter**

In [113]:
re.split("\s", s15, 2)

['Neeraj', 'Chopra', 'is an Indian track and field athlete.']

### 4. The sub() function replaces the matches with the text of your choice

In [114]:
re.sub("\s", "#", s15)

'Neeraj#Chopra#is#an#Indian#track#and#field#athlete.'

**You can control the number of replacements by specifying the count parameter**

In [115]:
re.sub("\s", "#", s15,3)

'Neeraj#Chopra#is#an Indian track and field athlete.'

## A Match Object is an object containing information about the search and the result.

1. If there is no match, the value None will be returned, instead of the Match Object.

In [116]:
x = re.search("ai", txt)
print(x)

<re.Match object; span=(4, 6), match='ai'>


**The Match object has properties and methods used to retrieve information about the search, and the result**

1. .span() returns a tuple containing the start-, and end positions of the match.
2. .string returns the string passed into the function
3. .group() returns the part of the string where there was a match

In [117]:
x = re.search(r"\bS\w+", txt)
print(x.span())

(12, 18)


In [118]:
x = re.search(r"\bS\w+", txt)
print(x.string)

It rains in Shimla


In [119]:
x = re.search(r"\bS\w+", txt)
print(x.group())

Shimla
