# Raw strings
- can be created using `r` or `R` followed by a sequence of characters in quotes
- special type of strings which  treat backslashes `\` as normal characters instead of escape characters 
- useful when working with regular expressions

In [4]:
r'apple'==R"apple"

True

In [5]:
print(r"foo\nbar") #ignores escape sequence characters

foo\nbar


In [6]:
r"abc"=='abc'

True

`re`: python library used to process regular expressions

In [1]:
import re

`^`: Matches the start of a string
<br>re.match()_ is used to match the raw string pattern in the given string

In [8]:
pattern = r"^start"
print(bool(re.match(pattern, "start here") )) # Matches
print(bool(re.match(pattern, "not starting")))  # Does not match

True
False


`$`: Matches the end of a string

In [9]:
print(bool(re.search(r"end$", "the end"))) # Matches
print(bool(re.search(r"end$", "ending") )) # Does not match

True
False


`[ ]`: Defines a set of characters, any one of which can match at that position

In [10]:
re.findall(r"[a-z]", "abc123") # defines set of all lowercase alphabets

['a', 'b', 'c']

In [11]:
re.findall(r"[0-9]", "abc123") # defines set of all digits

['1', '2', '3']

`[^ ]`: Defines a set of characters that should not match

In [12]:
re.findall(r"[^0-9]", "abc#@4123₹") # matches any character that is not a digit

['a', 'b', 'c', '#', '@', '₹']

Predefined Character Classes

In [13]:
re.findall(r"\d", "abc123") #matches any digit 

['1', '2', '3']

In [14]:
re.findall(r"\w", "abc_123") #matches any word(alphanumeric and underscore) character 

['a', 'b', 'c', '_', '1', '2', '3']

In [15]:
re.findall(r"\s", "hello world") # matches any tabs, whitespaces or line feed 

[' ']

`*`: Matches 0 or more repetitions of the preceding element.

In [16]:
re.findall(r"a*", "aaa")

['aaa', '']

`+`: Matches 1 or more repetitions of the preceding element

In [17]:
re.findall(r"a+", "aaap praan nam")

['aaa', 'aa', 'a']

`?`: Matches 0 or 1 occurrence of the preceding element.

In [18]:
re.findall(r"a?", "aaa")

['a', 'a', 'a', '']

`{n}`: Matches exactly n occurrences of the preceding element

In [19]:
re.findall(r"a{2}", "baap laaaaap trap")

['aa', 'aa', 'aa']

`{n,}`: Matches n or more occurrences of the preceding element.

In [20]:
re.findall(r"a{2,}", "baap laaap trap")

['aa', 'aaa']

`{n,m}`: Matches between n and m occurrences of the preceding element.

In [21]:
re.findall(r"a{2,4}", "aaa")

['aaa']

`|`: Matches either the expression before or the expression after it

In [22]:
re.findall(r"cat|dog", "I have a cat ,bcat and a dog ddog.")

['cat', 'cat', 'dog', 'dog']

In [2]:
mystr="ah( sD'j^  s23q& Hd"
''.join([s for s in re.split(r'[^a-zA-Z]',mystr)]) # re.split(r'[^a-zA-Z\s]', wild_str)

'ahsDjsqHd'