In [1]:
import pandas as pd

In [2]:
zipCodes = pd.Series({'Boston': '02215', 'Miami': '3310'})

In [3]:
zipCodes

Boston    02215
Miami      3310
dtype: object

In [5]:
zipCodes.str.match(r'\d{5}')

Boston     True
Miami     False
dtype: bool

In [6]:
cities = pd.Series(['Boston, MA 02215', 'Miami, FL 33101'])

In [7]:
cities

0    Boston, MA 02215
1     Miami, FL 33101
dtype: object

In [8]:
cities.str.contains(r' [A-Z]{2}')

0    True
1    True
dtype: bool

## Transforming the Data
* Here we are changing the format of the phone number and adding column names.

In [9]:
contacts = [['Mike Green', 'demo1@deitel.com', '9876543456'],
['Sue Brown', 'demo2@deitel.com', '9615243678']]

In [10]:
contacts

[['Mike Green', 'demo1@deitel.com', '9876543456'],
 ['Sue Brown', 'demo2@deitel.com', '9615243678']]

In [14]:
contactsdf = pd.DataFrame(contacts, columns=['Name', 'Email', 'Phone_Number'])

In [15]:
contactsdf

Unnamed: 0,Name,Email,Phone_Number
0,Mike Green,demo1@deitel.com,9876543456
1,Sue Brown,demo2@deitel.com,9615243678


In [16]:
import re

In [17]:
def get_formatted_phone_number(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [18]:
formatted_contact_number = contactsdf['Phone_Number'].map(get_formatted_phone_number)

In [19]:
formatted_contact_number

0    987-654-3456
1    961-524-3678
Name: Phone_Number, dtype: object

In [20]:
contactsdf['Phone_Number'] = formatted_contact_number

In [21]:
contactsdf

Unnamed: 0,Name,Email,Phone_Number
0,Mike Green,demo1@deitel.com,987-654-3456
1,Sue Brown,demo2@deitel.com,961-524-3678


# Self Check

In [22]:
import pandas as pd

In [23]:
import re

In [24]:
contacts = [['Mike Green', 'apple@fruits.com', '9876456789'],
['Sue Brown', 'demo2@deitel.com', '9615243678']]

In [27]:
contacts_data_frame = pd.DataFrame(contacts, columns=['Name', 'Email', 'Phone'])
contacts_data_frame

Unnamed: 0,Name,Email,Phone
0,Mike Green,apple@fruits.com,9876456789
1,Sue Brown,demo2@deitel.com,9615243678


In [26]:
def get_formatted_phone_number(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    if result:
        part1, part2, part3 = result.groups()
        return '(' + part1 + ') ' + part2 + '-' + part3
    else:
        return value

In [28]:
contacts_data_frame['Phone'] = contacts_data_frame['Phone'].map(get_formatted_phone_number)

In [29]:
contacts_data_frame

Unnamed: 0,Name,Email,Phone
0,Mike Green,apple@fruits.com,(987) 645-6789
1,Sue Brown,demo2@deitel.com,(961) 524-3678
