# NLP: Regular Expression

### Retrieve Mobile Number

In [7]:
import re

In [8]:
chat1 = "Mr. Devil Coder: You ask lot of question for email id abc_123@gmail.com or a2wQ@abc.com and mobile number 9639055432 and 9899470499"
chat2 = "Mr. Devil Coder: Here it is your mobile number (123)-456-7890 and email id xyz@gmail.com"
chat3 = "Mr. Devil Coder: Your email id qaz@yahoo.io and mobile number 0987654321000"

In [9]:
pattern_mobile_number = r"\b\d{10}\b|\(\d{3}\)-\d{3}-\d{4}"

In [10]:
matches_mobile_number = re.findall(pattern_mobile_number, chat1)
print(matches_mobile_number)

['9639055432', '9899470499']


In [11]:
matches_mobile_number = re.findall(pattern_mobile_number, chat2)
print(matches)

['(123)-456-7890']


### Retrieve Email ID

In [12]:
pattern_email_id = r"[a-zA-Z0-9_]*@[a-zA-Z0-9]*\.[a-zA-Z]*"

In [13]:
matches_email_id = re.findall(pattern_email_id, chat1)

In [14]:
matches_email_id

['abc_123@gmail.com', 'a2wQ@abc.com']

In [15]:
matches_email_id = re.findall(pattern_email_id, chat2)

In [16]:
matches_email_id

['xyz@gmail.com']

In [17]:
matches_email_id = re.findall(pattern_email_id, chat3)

In [18]:
matches_email_id

['qaz@yahoo.io']

### Retrieve Order Number

In [39]:
chat1='Mr. Devil Coder: Hello, I am having an issue with my order # 412889912'
chat2='Mr. Devil Coder: I have a problem with my order number 412884123112'
chat3='Mr. Devil Coder: My order 4128899198992 is having an issue, I was charged 300$ when online it says 280$'

In [40]:
pattern_order_number = r"order[^\d]*(\d*)"

In [41]:
matches_order_number = re.findall(pattern_order_number, chat1)

In [42]:
matches_order_number

['412889912']

In [43]:
matches_order_number = re.findall(pattern_order_number, chat2)

In [44]:
matches_order_number

['412884123112']

In [45]:
matches_order_number = re.findall(pattern_order_number, chat3)

In [46]:
matches_order_number

['4128899198992']

### Information Extraction

In [47]:
text='''
Born	Elon Reeve Musk
June 28, 1971 (age 50)
Pretoria, Transvaal, South Africa
Citizenship	
South Africa (1971–present)
Canada (1971–present)
United States (2002–present)
Education	University of Pennsylvania (BS, BA)
Title	
Founder, CEO and Chief Engineer of SpaceX
CEO and product architect of Tesla, Inc.
Founder of The Boring Company and X.com (now part of PayPal)
Co-founder of Neuralink, OpenAI, and Zip2
Spouse(s)	
Justine Wilson
​
​(m. 2000; div. 2008)​
Talulah Riley
​
​(m. 2010; div. 2012)​
​
​(m. 2013; div. 2016)
'''

In [48]:
def get_pattern_match(pattern, text):
    matches = re.findall(pattern, text)
    if matches:
        return matches[0]

In [49]:
get_pattern_match(r'age (\d+)', text)

'50'

In [50]:
get_pattern_match(r'Born(.*)\n', text)

'\tElon Reeve Musk'

In [51]:
get_pattern_match(r'Born(.*)\n', text).strip()

'Elon Reeve Musk'

In [53]:
get_pattern_match(r'Born.*\n(.*)\(age', text).strip()

'June 28, 1971'

In [54]:
get_pattern_match(r'\(age.*\n(.*)', text)

'Pretoria, Transvaal, South Africa'

In [57]:
def extract_personal_information(text):
    age = get_pattern_match(r'age (\d+)', text)
    name = get_pattern_match(r'Born(.*)\n', text).strip()
    dob = get_pattern_match(r'Born.*\n(.*)\(age', text).strip()
    place = get_pattern_match(r'\(age.*\n(.*)', text)

    return {
        'age': int(age),
        'name': name,
        'birth_date': dob,
        'birth_place': place
    }

In [58]:
extract_personal_information(text)

{'age': 50,
 'name': 'Elon Reeve Musk',
 'birth_date': 'June 28, 1971',
 'birth_place': 'Pretoria, Transvaal, South Africa'}

In [59]:
text = '''
Born	Mukesh Dhirubhai Ambani
19 April 1957 (age 64)
Aden, Colony of Aden
(present-day Yemen)[1][2]
Nationality	Indian
Alma mater	
St. Xavier's College, Mumbai
Institute of Chemical Technology (B.E.)
Stanford University (drop-out)
Occupation	Chairman and MD, Reliance Industries
Spouse(s)	Nita Ambani ​(m. 1985)​[3]
Children	3
Parent(s)	
Dhirubhai Ambani (father)
Kokilaben Ambani (mother)
Relatives	Anil Ambani (brother)
Tina Ambani (sister-in-law)
'''

In [60]:
extract_personal_information(text)

{'age': 64,
 'name': 'Mukesh Dhirubhai Ambani',
 'birth_date': '19 April 1957',
 'birth_place': 'Aden, Colony of Aden'}