## RegEx
#### 1. findall: Returns a list containing all matches
#### 2. search: Returns a Match object if there is a match anywhere in the string
#### 3. split: Returns a list where the string has been split at each match
#### 4. sub: Replaces one or many matches with a string

![image-2.png](attachment:image-2.png)

![image.png](attachment:image.png)

![image-2.png](attachment:image-2.png)

### 1. findall()

In [1]:
import re

txt = "The rain in Spain"
x = re.findall("ai", txt)
print(x)

['ai', 'ai']


In [2]:
txt = "The rain in Spain"
x = re.findall("Portugal", txt)
print(x)

[]


### 2. search()

In [3]:
txt = "The rain in Spain"
x = re.search("\s", txt)

print("The first white-space character is located in position:", x.start())

The first white-space character is located in position: 3


In [4]:
txt = "The rain in Spain"
x = re.search("Portugal", txt)
print(x)

None


### 3. split()

In [5]:
txt = "The rain in Spain"
x = re.split("\s", txt)
print(x)

['The', 'rain', 'in', 'Spain']


In [6]:
txt = "The rain in Spain"
x = re.split("\s", txt, 1)
print(x)

['The', 'rain in Spain']


### 4. sub()

In [7]:
txt = "The rain in Spain"
x = re.sub("\s", "9", txt)
print(x)

The9rain9in9Spain


In [8]:
txt = "The rain in Spain"
x = re.sub("\s", "9", txt, 2)
print(x)

The9rain9in Spain


### 5. Match Object
![image.png](attachment:image.png)

In [9]:
txt = "The rain in Spain"
x = re.search("ai", txt)
print(x) #this will print an object

<re.Match object; span=(5, 7), match='ai'>


In [10]:
import re

txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.span())

(12, 17)


In [11]:
txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.string)

The rain in Spain


In [12]:
txt = "The rain in Spain"
x = re.search(r"\bS\w+", txt)
print(x.group())

Spain


In [13]:
fruits = ['apple', 'mango', 'banana', 'cherry', 'apricot', 'raspberry', 'avocado']
filtered_fruits = filter(lambda fruit: re.match('^a', fruit), fruits)

# convert the new fruits to another list and print it
print(list(filtered_fruits)) # ['apple', 'apricot', 'avocado']

['apple', 'apricot', 'avocado']


In [14]:
fruits2 = ['opple', 'bonono', 'cherry', 'dote', 'berry']
modified_fruits = map(lambda fruit: re.sub('o', 'a', fruit), fruits2)

# convert the new fruits to another list and print it
print(list(modified_fruits)) # ['apple', 'banana', 'cherry', 'date', 'berry']

['apple', 'banana', 'cherry', 'date', 'berry']


In [15]:
fruits = [ 'banana', 'fig', 'grapefruit']

# sort fruits based on the number of vowels
fruits.sort(key=lambda x: len(re.findall('[aeiou]', x)))

print(fruits) #['fig', 'banana', 'grapefruit']

['fig', 'banana', 'grapefruit']


In [16]:
fruits = [ 'banana', 'fig', 'grapefruit']

# sort fruits based on the number of vowels
fruits.sort(key=lambda x: len(re.findall('[aeiou]', x)), reverse=True)

print(fruits) # ['grapefruit', 'banana', 'fig']

['grapefruit', 'banana', 'fig']


### Example

In [17]:
import pandas as pd
name=['John Smith','Valarie Green','Wiley Anthony','Olga Van Dike', 'George Nelson']
age=[29,34,54,37, 28]
email=['john.smith@finance.google.com','valarie.green2@humanresources.google.com','wiley.anthony@design.google.com', 'olga.dike@technology.vendor.com', 'george.nelson@technology.vendor.com']

emp_df=pd.DataFrame(zip(name,age,email), columns=['Name','Age','E-mail'])
#print(emp_df)

dept_list = emp_df.apply(lambda row: re.search(r"\b@\w+", row['E-mail']).group().strip('@'), axis=1)
'''result = re.search(r"\b@\w+", 'john.smith@finance.google.com').group().strip('@')
print(result)'''
emp_df['Department'] = dept_list
emp_df


status_list = emp_df.apply(lambda row: "Contract" if re.search(r".google.com", row['E-mail']) == None else "Permanent", axis=1)
emp_df['Status'] = status_list
emp_df

Unnamed: 0,Name,Age,E-mail,Department,Status
0,John Smith,29,john.smith@finance.google.com,finance,Permanent
1,Valarie Green,34,valarie.green2@humanresources.google.com,humanresources,Permanent
2,Wiley Anthony,54,wiley.anthony@design.google.com,design,Permanent
3,Olga Van Dike,37,olga.dike@technology.vendor.com,technology,Contract
4,George Nelson,28,george.nelson@technology.vendor.com,technology,Contract
