8.2.1 Formatting Strings - Presentation types: python assumes the value for a placeholder should be displayed as a tring unless another type is specified. 

In [2]:
f'{17.489:.2f}' #python supports precision only for floating-point and decimal values. Formatting is type-dependent

'17.49'

In [3]:
# integers: the d presentation type formats integer values as strings
f'{10:d}'

'10'

In [6]:
#characters: the c presentation type formats an integers character code as the corresponding character
f'{65:c} {97:c}'

'A a'

In [7]:
# strings: the s presentation type is the defaule, if you specify s the value to format must be a variable that references a string, an expression that produces a string or a string literal.
# if a type is not specified, non-string values like the integer 7 are converted to strings
# use "", cannot place single quotes inside a single quoted string

f'{"hello":s} {7}'

'hello 7'

In [11]:
#floating-point and decimal values. Exponential notation can be used

from decimal import Decimal

In [12]:
f'{Decimal("10000000000000000000000.0"):.3f}'

'10000000000000000000000.000'

In [13]:
f'{Decimal("10000000000000000000000.0"):.3e}'

'1.000e+22'

In [14]:
print(f' {58:c}{45:c}{41:c}')

 :-)


In [18]:
print(f'Kim Leach')

Kim Leach


8.2.2 Field widths and alignment: python righ-aligns numbers and left-aligns other values such as strings

In [19]:
f'[{27:10d}]'

'[        27]'

In [20]:
f'[{3.5:10f}]'

'[  3.500000]'

In [21]:
f'[{"hello":10}]'

'[hello     ]'

In [22]:
#explicitly specifying left and right alignment in a field
f'[{27:<15d}]'

'[27             ]'

In [23]:
f'[{3.5:<15f}]'

'[3.500000       ]'

In [24]:
f'[{"hello":>15}]'

'[          hello]'

In [25]:
# center values

f'[{27:^7d}]'

'[  27   ]'

In [26]:
f'[{3.5:^7.1f}]'

'[  3.5  ]'

In [28]:
f'[{"hello":^7}]'

'[ hello ]'

In [33]:
print(f'[{"Kim":>10}]\n[{"KimLeach":^10}]\n[{"Leach":<10}]')

[       Kim]
[ KimLeach ]
[Leach     ]


8.2.3 Numeric Formatting

In [34]:
#formatting positive numbers with signs
f'[{27:+10d}]'

'[       +27]'

In [35]:
f'[{27:+010d}]'

'[+000000027]'

In [36]:
print(f'{27:3}\n{27: d}\n{-27: d}')

 27
 27
-27


In [38]:
#grouping digits, format with thousands separators by useing a comma:
f'{12345678:,d}'

'12,345,678'

In [39]:
f'{123456.78:,.2f}'

'123,456.78'

In [41]:
print(f'{10240.473:+10,.2f}\n{-3210.9521:+10,.2f}')

+10,240.47
 -3,210.95


8.2.4 String's format method: 

In [42]:
'{:.2f}'.format(17.489)

'17.49'

In [43]:
#multiple placeholders
'{} {}'.format('Kim','Leach')

'Kim Leach'

In [44]:
'{0} {0} {1}'.format('Happy', 'Birthday')

'Happy Happy Birthday'

In [45]:
'{first} {last}'.format(first='Kim', last='Leach')

'Kim Leach'

In [46]:
'{last} {first}'.format(first='Kim', last='Leach')

'Leach Kim'

In [50]:
print('{:c}{:c}{:c}'.format(58, 45, 41))

:-)


In [51]:
print('[{0:>10}]\n[{0:^10}]\n[{0:<10}]'.format('Kim'))

[       Kim]
[   Kim    ]
[Kim       ]


In [52]:
print('{:+10,.2f}\n{:+10,.2f}'.format(10240.473, -3210.9521))

+10,240.47
 -3,210.95


8.3 Concatenating and repeating strings

In [53]:
s1 = 'Kim'

In [54]:
s2 = 'Leach'

In [55]:
s1 += ' ' + s2

In [56]:
s1

'Kim Leach'

In [57]:
symbol = '>'

In [58]:
symbol *= 5

In [59]:
symbol

'>>>>>'

In [60]:
name = 'Kim'

In [61]:
name += ' Leach'

In [62]:
bar = '*'

In [63]:
bar *= len(name)

In [64]:
print(f'{bar}\n{name}\n{bar}')

*********
Kim Leach
*********


8.4 Stripping whitespace from strings

In [65]:
# removing leading and trailing whitespace
sentence = '\t \n This is a test string. \t\t \n'

In [67]:
sentence.strip()

'This is a test string.'

In [68]:
# removing leading whitespace
sentence.lstrip()

'This is a test string. \t\t \n'

In [69]:
# removing trailing whitespace
sentence.rstrip()

'\t \n This is a test string.'

In [70]:
name = '     Kim Leach     '
name.strip()

'Kim Leach'

In [71]:
name.rstrip()

'     Kim Leach'

In [72]:
name.lstrip()

'Kim Leach     '

8.5 Changing character case: previously used methods.lower and .upper to convert strings to all lower or uppercase letters. Can also use .capitalize and .title

In [73]:
'kim leach'.capitalize()

'Kim leach'

In [74]:
'kim leach'.title()

'Kim Leach'

In [77]:
test_string = 'happy new year'

In [78]:
test_string.title()

'Happy New Year'

In [79]:
test_string.capitalize()

'Happy new year'

Comparison Operators for Strings: recall that strings are compared on their underlying integer numeric values so uppercase letters compare as less than lowercase letters. eg 'A' is 65 and 'a' is 97

In [80]:
print(f'A: {ord("A")}; a: {ord("a")}')

A: 65; a: 97


In [81]:
'Orange' == 'orange'

False

In [82]:
'Orange' != 'orange'

True

In [83]:
'Orange' < 'orange'

True

In [84]:
'Orange' <= 'orange'

True

In [85]:
'Orange' > 'orange'

False

In [86]:
'Orange' >= 'orange'

False

8.7 Searching for substrings: you can search in a string using one or more adjacent characters known as a substring to count the number of occurrences, determine whether a string contains a substring, or determine the index at which a substring resides in a string

In [87]:
#counting occurrences
sentence = 'to be or not to be that is the question'

In [89]:
sentence.count('to')

2

In [90]:
sentence.count('to', 12)

1

In [91]:
sentence.count('that', 12, 25)

1

In [92]:
#locating a substring in a string
sentence.index('be')

3

In [93]:
#r.index returns the last index at which the substring is found
sentence.rindex('be')

16

In [94]:
# determining whether a string contains a substring
'that' in sentence

True

In [95]:
'THAT' in sentence

False

In [96]:
'THAT' not in sentence

True

In [97]:
# locating a substring at the beginning or end of a string using startswith ad endswith
sentence.startswith('to')

True

In [98]:
sentence.startswith('be')

False

In [99]:
sentence.endswith('question')

True

In [100]:
sentence.endswith('quest')

False

In [101]:
for word in 'to be or not to be that is the question'.split():
    if word.startswith('t'):
        print(word, end=' ')

to to that the 

8.8 replacing substrings

In [102]:
values = '1\t2\t3\t4\t5'

In [103]:
values.replace('\t', ',')

'1,2,3,4,5'

In [104]:
'1 2 3 4 5'.replace(' ', ' --> ')

'1 --> 2 --> 3 --> 4 --> 5'

8.9 Splitting and Joining Strings

In [105]:
# splitting strings
letters = 'A, B, C, D'

In [107]:
letters.split(', ') #.rsplit processes the max number of splits from the end of the string toward the beginning

['A', 'B', 'C', 'D']

In [108]:
# joining strings
letters_list = ['A', 'B', 'C', 'D']

In [109]:
','.join(letters_list)

'A,B,C,D'

In [110]:
','.join([str(i) for i in range(10)])

'0,1,2,3,4,5,6,7,8,9'

In [113]:
# string methods partition and rpartition
'Kim Leach: 89, 97, 92'.partition(': ')

('Kim Leach', ': ', '89, 97, 92')

In [114]:
url = 'http://www.deitel.com/books/PyCDS/table_of_contents.html'

In [116]:
rest_of_url, separator, document = url.rpartition('/')

In [117]:
document

'table_of_contents.html'

In [118]:
rest_of_url

'http://www.deitel.com/books/PyCDS'

In [119]:
#string method splitlines
lines = """This is line 1
    This is line2
    This is line3"""

In [120]:
lines

'This is line 1\n    This is line2\n    This is line3'

In [121]:
lines.splitlines()

['This is line 1', '    This is line2', '    This is line3']

In [122]:
lines.splitlines(True)

['This is line 1\n', '    This is line2\n', '    This is line3']

In [123]:
', '.join(reversed('Kim Leach'.split()))

'Leach, Kim'

In [124]:
url = 'http://www.deitel.com/books/PyCDS/table_of_contents.html'

In [125]:
protocol, separator, rest_of_url = url.partition('://')

In [126]:
host, separator, document_with_path = rest_of_url.partition('/')

In [127]:
host

'www.deitel.com'

In [128]:
path, separator, document = document_with_path.rpartition('/')

In [129]:
path

'books/PyCDS'

8.10 Characters and character-testing methods

In [130]:
'-27'.isdigit()

False

In [131]:
'27'.isdigit()

True

In [132]:
'A9876'.isalnum()

True

In [134]:
'123 Main Street'.isalnum()

False

8.11 Raw strings

In [140]:
file_path = 'C:\\MyFolder\\MySubFolder\\MyFile.txt'

In [137]:
file_path

'C:\\MyFolder\\ySubFolder\\MyFile.txt'

In [141]:
file_path = r'C:\MyFolder\MySubFolder\MyFile.txt'

In [142]:
file_path

'C:\\MyFolder\\MySubFolder\\MyFile.txt'

8.12 Introduction to regular expressions

8.12.1 re modlule and function fullmatch

In [143]:
import re

In [144]:
pattern = '02215'

In [145]:
'Match' if re.fullmatch(pattern, '02215') else 'No Match'

'Match'

In [146]:
'Match' if re.fullmatch(pattern, '51220') else 'No Match'

'No Match'

In [147]:
#metacharacters, character classes and quantifiers
'Valid' if re.fullmatch(r'\d{5}', '02215') else 'Invalid'

'Valid'

In [148]:
'Valid' if re.fullmatch(r'\d{5}', '9876') else 'Invalid'

'Invalid'

In [149]:
#custom character 
'Valid' if re.fullmatch('[A-Z][a-z]*', 'Wally') else 'Invalid'

'Valid'

In [150]:
'Valid' if re.fullmatch('[A-Z][a-z]*', 'eva') else 'Invalid'

'Invalid'

In [151]:
'Match' if re.fullmatch('[^a-z]', 'A') else 'No match'

'Match'

In [152]:
'Match' if re.fullmatch('[^a-z]', 'a') else 'No match'

'No match'

In [153]:
'Match' if re.fullmatch('[*+$]', '*') else 'No match'

'Match'

In [154]:
'Match' if re.fullmatch('[*+$]', '!') else 'No match'

'No match'

In [155]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'Wally') else 'Invalid'

'Valid'

In [156]:
'Valid' if re.fullmatch('[A-Z][a-z]+', 'eva') else 'Invalid'

'Invalid'

In [157]:
'Match' if re.fullmatch('labell?ed', 'labelled') else 'No match'

'Match'

In [158]:
'Match' if re.fullmatch('labell?ed', 'labeled') else 'No match'

'Match'

In [159]:
'Match' if re.fullmatch('labell?ed', 'labellled') else 'No match'

'No match'

In [160]:
'Match' if re.fullmatch(r'\d{3,}', '123') else 'No match'

'Match'

In [161]:
'Match' if re.fullmatch(r'\d{3,}', '1234567890') else 'No match'

'Match'

In [162]:
'Match' if re.fullmatch(r'\d{3,}', '12') else 'No match'

'No match'

In [163]:
'Match' if re.fullmatch(r'\d{3,6}', '123') else 'No match'

'Match'

In [164]:
'Match' if re.fullmatch(r'\d{3,6}', '123456') else 'No match'

'Match'

In [165]:
'Match' if re.fullmatch(r'\d{3,6}', '1234567') else 'No match'

'No match'

In [166]:
'Match' if re.fullmatch(r'\d{3,6}', '12') else 'No match'

'No match'

8.12.2 replacing substrings and splitting strings

In [167]:
import re

Three required arguments
    the pattern to match (the tab character'\t'
    the replacement text (', ')
    the string to be searched ('1\t2\t3\t4')

In [169]:
re.sub(r'\t', ', ', '1\t2\t3\t4')

'1, 2, 3, 4'

In [170]:
re.sub(r'\t', ', ', '1\t2\t3\t4', count=2)

'1, 2, 3\t4'

In [171]:
# function split 
re.split(r',\s*', '1, 2, 3,4,    5,6,7,8')

['1', '2', '3', '4', '5', '6', '7', '8']

In [172]:
re.split(r',\s*', '1, 2, 3,4,    5,6,7,8', maxsplit=3)

['1', '2', '3', '4,    5,6,7,8']

In [173]:
re.sub(r'\t+', ', ', 'A\tB\tC\tD')

'A, B, C, D'

In [174]:
re.split('\$+', '123$Main$$Street')

['123', 'Main', 'Street']

8.12.3 other search functions; accessing matches

In [175]:
#function search - finding the first match anywhere in a string
result = re.search('Python', 'Python is fun')

result.group() if result else 'not found'

'Python'

In [178]:
result2 = re.search('fun!', 'Python is fun')
result2.group() if result2 else 'not found'

'not found'

In [179]:
#ignoring case with the optional flags keyword argument
result3 = re.search('Kim', 'KIM LEACH', flags=re.IGNORECASE)
result3.group() if result3 else 'not found'

'KIM'

In [180]:
#metacharacters that restrict matches to the beginning or end of a string
result = re.search('^Python', 'Python is fun')
result.group() if result else 'not found'

'Python'

In [183]:
result = re.search('^fun', 'Python is fun')
result.group() if result else 'not found'

'not found'

In [184]:
result = re.search('Python$', 'Python is fun')
result.group() if result else 'not found'

'not found'

In [185]:
result = re.search('fun$', 'Python is fun')
result.group() if result else 'not found'

'fun'

In [186]:
#function findall and finditer - finding all matches in a string
contact = 'Kim Leach, Home: 555-555-1234, Work: 555-555-4321'

In [189]:
re.findall(r'\d{3}-\d{3}-\d{4}', contact)

['555-555-1234', '555-555-4321']

In [190]:
for phone in re.finditer(r'\d{3}-\d{3}-\d{4}', contact):
    print(phone.group())

555-555-1234
555-555-4321


In [192]:
#capturing substrings in a match
text = 'Kim Leach, email: demo1@deitel.com'
pattern = r'([A-Z][a-z]+ [A-Z][a-z]+), email: (\w+@\w+\.\w{3})'
result = re.search(pattern, text)

In [193]:
result.groups()

('Kim Leach', 'demo1@deitel.com')

In [194]:
result.group()

'Kim Leach, email: demo1@deitel.com'

In [195]:
result.group(1)

'Kim Leach'

In [196]:
result.group(2)

'demo1@deitel.com'

In [197]:
result = re.search(r'(\d+) ([-+*/]) (\d+)', '10 + 5')

In [198]:
result.groups()

('10', '+', '5')

In [200]:
result.group(1)

'10'

In [201]:
result.group(2)

'+'

In [202]:
result.group(3)

'5'

8.13 Intro to data science: pandas, regular expressions and data munging
    * Preparing for data analysis is called data munging or data wrangling
        * two of the most important stpes in data munging are data cleaning and transforming data

In [203]:
import pandas as pd

In [204]:
zips = pd.Series({'Boston': '02215', 'Miami': '3310'})
zips

Boston    02215
Miami      3310
dtype: object

In [205]:
zips.str.match(r'\d{5}')

Boston     True
Miami     False
dtype: bool

In [208]:
cities = pd.Series(['Boston, MA 02215', 'Miami, FL 33101'])
cities

0    Boston, MA 02215
1     Miami, FL 33101
dtype: object

In [209]:
cities.str.contains(r' [A-Z]{2} ')

0    True
1    True
dtype: bool

In [210]:
cities.str.match(r' [A-Z]{2} ')

0    False
1    False
dtype: bool

In [212]:
# reformatting your data

contacts = [['Kim Leach', 'demo1@deitel.com', '5555555555'], ['Sue Brown', 'demo2@deitel.com', '5555551234']]

In [213]:
contactsdf = pd.DataFrame(contacts, 
                          columns = ['Name', 'Email', 'Phone'])

In [214]:
contactsdf

Unnamed: 0,Name,Email,Phone
0,Kim Leach,demo1@deitel.com,5555555555
1,Sue Brown,demo2@deitel.com,5555551234


In [215]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [216]:
formatted_phone = contactsdf['Phone'].map(get_formatted_phone)

In [217]:
formatted_phone

0    555-555-5555
1    555-555-1234
Name: Phone, dtype: object

In [218]:
contactsdf['Phone'] = formatted_phone
contactsdf

Unnamed: 0,Name,Email,Phone
0,Kim Leach,demo1@deitel.com,555-555-5555
1,Sue Brown,demo2@deitel.com,555-555-1234


In [None]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [219]:
contacts = [['Kim Leach', 'demo1@deitel.com', '5555555555'], ['Sue Brown', 'demo2@deitel.com', '5555551234']]

In [220]:
formatted_phone = contactsdf['Phone'].map(get_formatted_phone)

In [221]:
contactsdf = pd.DataFrame(contacts, 
                          columns = ['Name', 'Email', 'Phone'])

In [222]:
contactsdf

Unnamed: 0,Name,Email,Phone
0,Kim Leach,demo1@deitel.com,5555555555
1,Sue Brown,demo2@deitel.com,5555551234


In [223]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    if result:
        part1, part2, part3 = result.groups()
        return '(' + part1 + ') ' + part2 + '-' + part3
    else:
        return value

In [224]:
contactsdf['Phone'] = contactsdf['Phone'].map(get_formatted_phone)
contactsdf

Unnamed: 0,Name,Email,Phone
0,Kim Leach,demo1@deitel.com,(555) 555-5555
1,Sue Brown,demo2@deitel.com,(555) 555-1234
