# Strings

In [1]:
S = 'Spam'

In [2]:
len(S)

4

In [4]:
S[0]

'S'

In [5]:
S[1]

'p'

In [6]:
S[-1]  # The last item from the end in S

'm'

In [7]:
S[-2]  # The second-to-last item from the end

'a'

In [8]:
S[1:3]  # Slice of S from offsets 1 through 2 (not 3)

'pa'

In [9]:
S[:-1]   # Everything but the last again, but simpler (0:-1)

'Spa'

In [10]:
S[:]   # All of S as a top-level copy (0:len(S))

'Spam'

In [11]:
 S + 'xyz'   # Concatenation

'Spamxyz'

In [12]:
 S   # S is unchanged

'Spam'

In [13]:
S * 3   # Repetition

'SpamSpamSpam'

Strings are immutable in Python - they cannot be changed in place after they are created. For example, you can’t change a string by assigning to one of its positions, but you can always build a new one and assign it to the same name.

In [15]:
S[0] = 'z'   # Immutable objects cannot be changed

TypeError: 'str' object does not support item assignment

In [16]:
S = 'shrubbery'

In [17]:
L = list(S)   # Expand to a list: [...]

In [18]:
L

['s', 'h', 'r', 'u', 'b', 'b', 'e', 'r', 'y']

In [19]:
L[1] = 'c'   # Change it in place

In [20]:
''.join(L)   # Join with empty delimiter

'scrubbery'

In [26]:
B = bytearray(b'spam')   # A bytes/list hybrid (ahead)

In [27]:
B.extend(b'eggs')

In [28]:
B   # B[i] = ord(c) works here too

bytearray(b'spameggs')

In [29]:
B.decode()   # Translate to normal string

'spameggs'

The bytearray supports in-place changes for text, but only for text whose characters
are all at most 8-bits wide (e.g., ASCII). All other strings are still immutable—bytear
ray is a distinct hybrid of immutable bytes strings  and mutable lists (coded and displayed in [ ])

Below are Type-Specific Methods - not common to other sequences including lists and tuples.


In [30]:
S = 'Spam'

In [31]:
S.find('pa')   # Find the offset of a substring in S

1

In [32]:
S.replace('pa', 'XYZ')   # Replace occurrences of a string in S with another

'SXYZm'

In [33]:
S

'Spam'

In [34]:
line = 'aaa,bbb,ccccc,dd'

In [35]:
line.split(',')    # Split on a delimiter into a list of substrings

['aaa', 'bbb', 'ccccc', 'dd']

In [36]:
S = 'spam'

In [37]:
S.upper()   # Upper and lowercase conversions

'SPAM'

In [38]:
 S.isalpha()   # Content tests: isalpha, isdigit, etc.

True

In [39]:
line = 'aaa,bbb,ccccc,dd\n'

In [40]:
line.rstrip()   # Remove whitespace characters on the right side

'aaa,bbb,ccccc,dd'

In [41]:
line.rstrip().split(',')   # Combine two operations

['aaa', 'bbb', 'ccccc', 'dd']

In [42]:
'%s, eggs, and %s' % ('spam', 'SPAM!')   # Formatting expression (all)

'spam, eggs, and SPAM!'

In [43]:
'{0}, eggs, and {1}'.format('spam', 'SPAM!')   # Formatting method (2.6+, 3.0+)

'spam, eggs, and SPAM!'

In [44]:
'{}, eggs, and {}'.format('spam', 'SPAM!')   # Numbers optional (2.7+, 3.1+)

'spam, eggs, and SPAM!'

In [45]:
'{:,.2f}'.format(296999.2567)    # Separators, decimal digits

'296,999.26'

In [47]:
'%.2f | %+05d' % (3.14159, -42)   # Digits, padding, signs

'3.14 | -0042'

In [48]:
dir(S)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',


In [49]:
help(S.replace)

Help on built-in function replace:

replace(old, new, count=-1, /) method of builtins.str instance
    Return a copy with all occurrences of substring old replaced by new.
    
      count
        Maximum number of occurrences to replace.
        -1 (the default value) means replace all occurrences.
    
    If the optional argument count is given, only the first count occurrences are
    replaced.



In [50]:
S = 'A\nB\tC'   # \n is end-of-line, \t is tab

In [51]:
len(S)   # Each stands for just one character

5

In [52]:
ord('\n')   # \n is a byte with the binary value 10 in ASCII

10

In [53]:
S = 'A\0B\0C'   # \0, a binary zero byte, does not terminate string
len(S)

5

In [54]:
S   # Non-printables are displayed as \xNN hex escapes

'A\x00B\x00C'