# Character Identifiers

![image.png](attachment:d35335d3-bf3a-4db9-a337-2a80c9bd72b2.png)

In [1]:
import re

In [2]:
text = 'My phone number is 408-555-1234'
text1 = 'Now the phone number has changed but the old one remains for something else 408-555-1234'

In [3]:
phone = re.search(r'\d{3}-\d{3}-\d{4}', text1)

In [4]:
phone

<re.Match object; span=(76, 88), match='408-555-1234'>

In [5]:
phone.group()

'408-555-1234'

# Quantifiers

![image.png](attachment:70d33ea7-9d9a-4f7e-91dc-b50045363dbd.png)

In [6]:
phone = re.search(r'\d{3}-\d{3}-\d{4}', text)

In [7]:
phone

<re.Match object; span=(19, 31), match='408-555-1234'>

In [8]:
phone_number = re.compile(r'(\d{3})-(\d{3})-(\d{4})')

In [9]:
phone_number

re.compile(r'(\d{3})-(\d{3})-(\d{4})', re.UNICODE)

In [10]:
results = re.search(phone_number, text1)

In [11]:
results

<re.Match object; span=(76, 88), match='408-555-1234'>

In [12]:
results.group()

'408-555-1234'

In [13]:
results.group(0)

'408-555-1234'

In [14]:
results.group(1)

'408'

In [15]:
results.group(2)

'555'

In [16]:
results.group(3)

'1234'

In [17]:
results.group(4)

IndexError: no such group

# Additional Regex Syntax

In [19]:
re.search(r'cat', 'The cat is here')

<re.Match object; span=(4, 7), match='cat'>

In [20]:
re.search(r'cat|dog', 'The dog cat is here') # | 'pipeline' > OR operator

<re.Match object; span=(4, 7), match='dog'>

In [21]:
re.findall(r'at', 'The cat with the hat sat there on the mat')

['at', 'at', 'at', 'at']

In [22]:
re.findall(r'.at', 'The cat in the hAt at went splat.') #use period as wildcard

['cat', ' at', 'lat']

In [23]:
re.findall(r'...at', 'The cat in the hat at went splat.')

['e cat', 'e hat', 'splat']

In [24]:
re.findall(r'^\S', ' The number  is the start digit.') 
#use Caret '^' > everything that starts with a number

[]

In [25]:
re.findall(r'^\d', 'Hi, the start digit.')

[]

In [26]:
re.findall(r'^\w{2,}', '5555 Hi is the first digit.')

['5555']

In [27]:
re.findall(r'^\D', ' is 1 digit.')

[' ']

In [28]:
re.findall(r'\W{3}$', '505464 564651+$%')

['+$%']

In [29]:
re.findall(r'\d$', 'The digit starts with 2') # dollar $ > used to specify the last character is a digit

['2']

In [30]:
re.findall(r'\d$', '3 The digit starts with 33 and ends with 3000')

['0']

In [31]:
#exclude the numbers in the pattern

In [32]:
phrase = 'there are 3 numbers 3 4 in 123side 55556454 in this sentence'

In [33]:
pattern = r'[^\d]' #use [^] for exclusions in a group

In [34]:
re.findall(pattern, phrase)

['t',
 'h',
 'e',
 'r',
 'e',
 ' ',
 'a',
 'r',
 'e',
 ' ',
 ' ',
 'n',
 'u',
 'm',
 'b',
 'e',
 'r',
 's',
 ' ',
 ' ',
 ' ',
 'i',
 'n',
 ' ',
 's',
 'i',
 'd',
 'e',
 ' ',
 ' ',
 'i',
 'n',
 ' ',
 't',
 'h',
 'i',
 's',
 ' ',
 's',
 'e',
 'n',
 't',
 'e',
 'n',
 'c',
 'e']

In [35]:
pattern = r'[^\d]+'

In [36]:
re.findall(pattern, phrase)

['there are ', ' numbers ', ' ', ' in ', 'side ', ' in this sentence']

In [37]:
test_phrase = 'This is a string! But it has punctuation. How can we remove it?'

In [38]:
clean_line = re.findall(r'[^!.? ]+', test_phrase)

In [39]:
clean_line

['This',
 'is',
 'a',
 'string',
 'But',
 'it',
 'has',
 'punctuation',
 'How',
 'can',
 'we',
 'remove',
 'it']

In [40]:
' '.join(clean_line)

'This is a string But it has punctuation How can we remove it'

In [41]:
#find out words with hyphen in between

In [42]:
text = 'only find the hyphen-words in this sentence. Also which has long-ish sentence.'

In [43]:
pattern = r'[\w]+-[\w]+'

In [44]:
print(pattern)

[\w]+-[\w]+


In [45]:
re.findall(pattern, text)

['hyphen-words', 'long-ish']

In [46]:
x = "\\t I want to print  the back slash \n Hello"

In [47]:
print(x)

\t I want to print  the back slash 
 Hello


In [54]:
textOne = 'This line has catfish at first then caterpillar'
textTwo = 'This line has caterpillar in the second'
textThree = 'This line has cat Tom at third!'

In [55]:
re.search(r'cat(fish|erpillar|Tom)', textOne)

<re.Match object; span=(14, 21), match='catfish'>

In [56]:
re.search(r'cat(fish|erpillar|Tom)', textTwo)

<re.Match object; span=(14, 25), match='caterpillar'>

In [57]:
re.search(r'cat(fish|erpillar|Tom)', textThree)