### Expressions :
***
`\d`                         Any numeric digit from `0` to `9`.

`\D`                         Matches any character which is not a decimal digit. This is the opposite of `\d`.
                           
`\w`                         Any letter, numeric digit, or the underscore
                           character.  (Think of this as matching
                           "word" characters.)
                           
`\W`                         Any character that is not a letter,
                           numeric digit, or the underscore character.
                           
`\s`                         Any space, tab, or newline character.  (
                           Think of this as matching white-space
                           characters.)
                           
`\S`                         Any character that is not a space, tab,
                           or newline.
***

In [1]:
import pandas as pd
import re

In [2]:
text = "A78L41K"

In [3]:
num = re.search('\d\d', text)
num

<re.Match object; span=(1, 3), match='78'>

In [5]:
num.group()

'78'

In [15]:
text = "8PM19MIN"

In [16]:
num2 = re.search('\D', text)
num2

<re.Match object; span=(1, 2), match='P'>

In [17]:
num2.group()

'P'

In [6]:
text = 'My phone number is 5556667777'
n = re.findall('[\d]+', text)

In [32]:
n

['5556667777']

In [9]:
n2 = re.search('\d'*10, text)

In [10]:
print(n2.group())

5556667777


In [20]:
text = 'My phone number is 415-555-1212'
s = re.search('[\d]+[^-].*', text)
a = re.search('(\d+-*)+', text)

In [21]:
print(a.group())

415-555-1212


In [12]:
print(s.group())

415-555-1212


In [42]:
with open ('text.txt', 'w') as file:
    file.write(text)

In [43]:
with open ('text.txt', 'r') as file:
    txt = file.read()
print(txt)

My phone number is 415-555-1212


In [63]:
value = "O 1, t 10, o 100. 100000"

In [69]:
sayi = re.findall('[0-9]+', value)

In [70]:
sayi

['1', '10', '100', '100000']

In [72]:
sayi = re.findall('\d{2}', value)
sayi # iki basamaklilari buluyor

['10', '10', '10', '00', '00']

In [73]:
phone = "2004-959-559 # This is Phone Number"

In [75]:
output = re.sub('\D', '.', phone)
print(output)

# \D ile digit'ler disindaki herseyi bulup . ile replace ediyor

2004.959.559.......................


In [77]:
output = re.sub('\d', '~', phone)
print(output)

# Burada ise \d ile digit'leri bulup onlari ~ ile replace ediyor

~~~~-~~~-~~~ # This is Phone Number


### Special Characters
___
``"[]"``	  A set of characters	``"[a-m]"``

``"\"``	      Signals a special sequence (can also be used to escape special characters)

``"."``	      Any character (except newline character)

``"^"``	      Starts with	``"^hello"``

``"$"``	      Ends with	``"world$"``

``"*"``	      Zero or more occurrences

`"+"`	      One or more occurrences

`"{}"`	  Exactly the specified number of occurrences

`"|"`	      Either or	`"falls|stays"`

`"()"`	  Capture and group
___

In [78]:
txt = "1 person against 100 people"

In [81]:
a = re.findall('[0-9]+',txt)
a

['1', '100']

In [82]:
b = re.findall('\d+', txt)
b

['1', '100']

In [84]:
txt = 'Hello world'
a = re.findall('(^\S+).*', txt)
a

['Hello']

In [95]:
b = re.findall('.*\s(\S+$)', txt)
b

# re.findall'da eger () kullaniliyor ise ()'nun icinde kalan ogelerin elde edilmesi amaclaniyordur

['world']

In [96]:
s = pd.Series(['a3', 'b4', 'c5', 'd'])

In [97]:
s.str.contains('(\d)')

  return func(self, *args, **kwargs)


0     True
1     True
2     True
3    False
dtype: bool

In [98]:
s

0    a3
1    b4
2    c5
3     d
dtype: object

In [101]:
s.str.extract('([0-9]+)')

Unnamed: 0,0
0,3.0
1,4.0
2,5.0
3,


In [102]:
s.str.extract('(\d)')

Unnamed: 0,0
0,3.0
1,4.0
2,5.0
3,


In [105]:
s.str.extract('(\w)')

Unnamed: 0,0
0,a
1,b
2,c
3,d


In [124]:
s = pd.Series(['a3aa', 'b4aa', 'c5aa'])

In [125]:
s

0    a3aa
1    b4aa
2    c5aa
dtype: object

In [127]:
s.str.extract('([0-9].*)')

Unnamed: 0,0
0,3aa
1,4aa
2,5aa


In [183]:
s= pd.Series(['40 l/100 km (comb)',
        '38 l/100 km (comb)', '6.4 l/100 km (comb)',
       '8.3 kg/100 km (comb)', '5.1 kg/100 km (comb)',
       '5.4 l/100 km (comb)', '6.7 l/100 km (comb)',
       '6.2 l/100 km (comb)', '7.3 l/100 km (comb)',
       '6.3 l/100 km (comb)', '5.7 l/100 km (comb)',
       '6.1 l/100 km (comb)', '6.8 l/100 km (comb)',
       '7.5 l/100 km (comb)', '7.4 l/100 km (comb)',
       '3.6 kg/100 km (comb)', '0 l/100 km (comb)',
       '7.8 l/100 km (comb)'])

In [184]:
s

0       40 l/100 km (comb)
1       38 l/100 km (comb)
2      6.4 l/100 km (comb)
3     8.3 kg/100 km (comb)
4     5.1 kg/100 km (comb)
5      5.4 l/100 km (comb)
6      6.7 l/100 km (comb)
7      6.2 l/100 km (comb)
8      7.3 l/100 km (comb)
9      6.3 l/100 km (comb)
10     5.7 l/100 km (comb)
11     6.1 l/100 km (comb)
12     6.8 l/100 km (comb)
13     7.5 l/100 km (comb)
14     7.4 l/100 km (comb)
15    3.6 kg/100 km (comb)
16       0 l/100 km (comb)
17     7.8 l/100 km (comb)
dtype: object

In [192]:
a = s.str.extract('(\S+).*/(\S+)')
a

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [185]:
b = s.str.extract('(^\d*.\d*) \w*/(\d*)')
b

Unnamed: 0,0,1
0,40.0,100
1,38.0,100
2,6.4,100
3,8.3,100
4,5.1,100
5,5.4,100
6,6.7,100
7,6.2,100
8,7.3,100
9,6.3,100


In [193]:
s = pd.Series(['06/2020\n\n4.9 l/100 km (comb)',
'11/2020\n\n166 g CO2/km (comb)',
'10/2019\n\n5.3 l/100 km (comb)',
'05/2022\n\n6.3 l/100 km (comb)',
'07/2019\n\n128 g CO2/km (comb)',
'06/2022\n\n112 g CO2/km (comb)',
'01/2022\n\n5.8 l/100 km (comb)',
'11/2020\n\n106 g CO2/km (comb)',
'04/2019\n\n105 g CO2/km (comb)',
'08/2020\n\n133 g CO2/km (comb)',
'04/2022\n\n133 g CO2/km (comb)'])

In [194]:
s

0     06/2020\n\n4.9 l/100 km (comb)
1     11/2020\n\n166 g CO2/km (comb)
2     10/2019\n\n5.3 l/100 km (comb)
3     05/2022\n\n6.3 l/100 km (comb)
4     07/2019\n\n128 g CO2/km (comb)
5     06/2022\n\n112 g CO2/km (comb)
6     01/2022\n\n5.8 l/100 km (comb)
7     11/2020\n\n106 g CO2/km (comb)
8     04/2019\n\n105 g CO2/km (comb)
9     08/2020\n\n133 g CO2/km (comb)
10    04/2022\n\n133 g CO2/km (comb)
dtype: object

In [197]:
t = s.str.extract('(\S+)/(\S+)')
t

Unnamed: 0,0,1
0,6,2020
1,11,2020
2,10,2019
3,5,2022
4,7,2019
5,6,2022
6,1,2022
7,11,2020
8,4,2019
9,8,2020
