# 21: Working with Python data types

Author: Greg Wray  
2025-MAR-04  
  
Type code into this notebook during lecture. Run code in the selected cell by clicking the `run` (play button) icon or typing `shift-return`.  
Modify and experiment!! This is the best way to get a feel for how Python works (and any other language).   
Consider adding comments to record notes and findings: `#` starts a comment on a new line or part way through a line (just like R and bash). 

### Pre-defined data structures
Use these throughout class to avoid having to type your own!

In [189]:
mammal_tuple = ('mouse', 'fruit bat', 'chimpanzee', 'leopard')
fern_list = ['maidenhair', 'tree', 'staghorn']
number_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
tiny_str = 'R2D2'
short_str = 'Atlantic Right Whale'
long_str = '\'Twas brillig, and the slithy toves\n   Did gyre and gimble in the wabe:\nAll mimsy were the borogoves,\n   And the mome raths outgrabe.'
set_A = {1, 2, 3, 4, 5, 6}
set_B = {2, 4, 6, 8, 10, 12}
birds = ['parrot', 'quail', 'duck']
genera = ['Forpus', 'Coturnix', 'Anser']
tmnt = {"Leonardo": ["leader", "blue", "katana"], 
        "Raphael" : ["muscle", "red", "sai"],
        "Donatello" : ["brains", "purple", "bo"],
        "Michelangelo" : ["comedian", "orange", "nunchuk"]}

### Assignment

In [38]:
# simultaneous assignment
a, b, c = 'mouse', 'leopard', 'orca'   # assignment in order given; balance items on RHS and LHS!
a = b = c = 'mouse'				       # right-most becomes RHS, rest are LHS

In [39]:
b

'leopard'

In [37]:
# swapping
a, b = b, a

In [40]:
print('a =', a, 'b =', b)

a = mouse b = leopard


In [28]:
# unpacking
mammal_tuple = ['mouse', 'fruit bat', 'chimpanzee', 'leopard']
#i, j = mammal_tuple              # assigns without need for indexing
#i, _ = mammal_tuple                	# _ means ignore; if last all other items
#i, _, j = mammal_tuple                	# _ means ignore; if internal one item
#i, *j  = mammal_tuple                  	# * means remaining items are a list
i, *_, j  = mammal_tuple                # assigns first and last items

i = mouse j= leopard


In [None]:
print('i =', i, 'j=', j)

In [51]:
# augmented assignment
tally = 0
tally += 1
tally *= 2

In [52]:
tally

2

### Working with numbers

The following are useful ways to specify and show the values of numeric data types.

In [63]:
# indicating data type
valA = 27            # no decimal indicates integer
valB = 27.           # decimal indicates float
valC = 2 + 7j        # j indicates complex (engineering convention)

In [64]:
type(valA)

int

In [65]:
# assigning values using scientific notation
valD = 35e3			 # float; mantissa and exponent required
valE = 35E-3		 # upper/lowercase e allowed

In [66]:
valD

35000.0

In [67]:
# indicating values of long numbers
valF = 1_000_395_321				# interpreted as integer
valG = 1,000,395,321				# interpreted as tuple 

In [62]:
valG

(1, 0, 395, 321)

### Data types and casting

In [69]:
# creating data structures
my_list = ['cat', 'dog', 'parrot']	# implicit based on formatting
my_list = list('parrot')			# explicit; class creator takes only 1 arg
my_list = set('parrot')			    # creator functions exist for all classes

In [70]:
my_list

{'a', 'o', 'p', 'r', 't'}

In [71]:
# creating empty data structures; useful for loop operations
empty = ''					# creates an empty string; no space!
empty = [] 				    # creates an empty list
empty = () 					# creates an empty tuple
empty = {} 				    # creates an empty dictionary
empty = set()				# creates an empty set

In [72]:
type(empty)

set

In [76]:
# creating data structures with range() pattern generator
x = list(range(10))				# generates list of: 0, 1, 2 … 9
y = tuple(range(10))			# generates tuple of: 0, 1, 2 … 9
z = set(range(10))				# generates set of: 0, 1, 2 … 9
# with optional second and third arguments (works for all of the above)
z = list(range(15,18))			# generates list of: 15, 16, 17
z = list(range(10, -1, -1))		# generates list of: 10, 9, 8 … 0

In [77]:
x

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [None]:
# casting among atomic data types
my_var = int(42.7)				# converts to int; truncates (not rounds)
my_var = float(42)				# converts to float
my_var = str(42)				# converts to string (works for floats)
my_var = int('42')				# converts to integer (only numerals)
my_var = int('42.7')			# converts to float (numerals+decimal)

In [None]:
type(my_var)

In [None]:
# casting among atomic data structures
my_var = list(mammal_tuple)		# tuple to list
my_var = set(mammal_tuple)		# tuple to set
my_var = tuple(tiny_str)		# string to tuple of individual letters

In [None]:
type(my_var)

### Common functions for working with data types and structures

In [None]:
# these work with all data types and structures
type(mammal_tuple)				# returns data type/structure
isinstance(mammal_tuple, int)	# tests for type integer; returns bool
dir(mammal_tuple)				# returns all valid methods for my_tuple

In [80]:
# these work with all iterables and return numerics
len(long_str)					# returns number of characters for str
len(mammal_tuple)				# returns number of items for containers
min(99, 3, 24)					# returns 3
min('parrot')					# returns 'a'
max(99, 3, 24)					# works the same way as min()
z = list(range(10))				# generate a list of integers	
sum = (z)						# adds elements in iterable	
sum(99, 3, 24)					# TypeError: does not take bare values

TypeError: 'list' object is not callable

In [79]:
# these work with all interables and return bools
my_tuple = [True, False, True]	# generate a tuple of truth values
all(my_tuple)					# returns True if all items are True; else returns False
any(my_tuple)					# returns True if at least 1 item is True; else returns False

True

In [82]:
# testing for membership with any container and return bools
'tiger' in mammal_tuple			# returns True if a match exists; else returns False
'tiger' not in mammal_tuple	    # opposite

True

In [None]:
# making a copy of mutable data structures
new_list = my_list.copy()		# works for set and dictionary, too
new_tuple = my_tuple.copy()		# error, not applicable for immutables; use assignment instead 

### Working with strings
The examples below are just a few of the many built-in string methods. Explore and experiment to become proficient manipulating strings!


In [None]:
# using different quoting methods 
s = 'Hello'					    # convention is to use single quotes
s = "Hello"					    # double quotes works; use to enclose '
s = '''Paul's'''				# triple quotes gives more flexibility	
s = """She said "Hello" to him."""  # triple quotes gives more flexibility	

In [None]:
s

In [None]:
# concatenating strings
s = 'Hello' + ' world'			# creates string 'Hello world'
s = 'Hello' + """ world"""		# mixing quoting among items works

In [None]:
s

In [88]:
# retrieving information about a string
b = 'wabe' in long_str	     		# returns True if substring in string
b = long_str.startswith('Twas')	    # returns True if substring begins string
i = long_str.count('the')			# returns count of substring
i = long_str.find('mimsy')	    	# returns index of first occ of string

In [89]:
b

False

In [97]:
# testing for character properties of a string and returns a bool
b = tiny_str.isalpha()				# True if only alphabetical chars
b = tiny_str.isalnum()				# True if only alphabetical + numerical chars
b = tiny_str.isidentifier()			# True if an identifier (variable name)

In [91]:
b

False

In [108]:
# manipulating strings
s = short_str.upper()				# converts all alphabetical to upper case
s = short_str.strip()				# removes all whitespace
s = '42'.zfill(5)					# left-pad with zeros; returns 00042
s = 'ac_7342'.removeprefix('ac_')	# strips prefix; returns 7342
s = short_str.replace('Atlantic', 'Pacific')	# replaces first occurrence only 
s = short_str.replace('Atlantic', 'Pacific', 2)	# replaces first 2 occurrences

In [109]:
t

'7342'

In [116]:
# splitting stings; these methods return lists of strings
s = long_str.split(',')				# split by defined character or string
s = short_str.split()			    # default delimiter is space
s = long_str.splitlines()			# splits at \n, \r, \nr (and others)
s = long_str.partition('gyre')		     # returns a list of 3 items


In [117]:
s

("'Twas brillig, and the slithy toves\n   Did ",
 'gyre',
 ' and gimble in the wabe:\nAll mimsy were the borogoves,\n   And the mome raths outgrabe.')

In [129]:
# formatted strings: the following are just a few examples
var1, var2, var3, var4 = 800, 2.23333252342424, 2000000000, 2000000.23242352
# examples of formatting numbers
print(f"The number is {var1:+}")	# returns +800; - displayed by default 
print(f"The number is {var1:06}")	# returns 000800; include leading 0 
print(f"The number is {var2:.4f}")	# returns 2.334; rounds; note decimal
print(f"The number is {var3:,}")	# returns 2,000,000,000
print(f"The number is {var3:_}")	# returns 2_000_000_000
print(f"The number is {var3:,.2f}")	# returns 2,000,000,000.00
print(f"The number is {var4:,.4f}")	# returns 2,000,000.2324
print(f'The number is {var3:e}')	# returns 2.000000e+09
# anything in curly braces is evaluated at runtime
var1, var2, var3 = 'Mars', 2, 3390
print(f"The planet {var1} has {var2} moons.")				
print(f"The planet {var1} has a diameter of {var3 * 2} kilometers.")
# date and time formatting: won't cover today but very useful

The number is +800
The number is 000800
The number is 2.2333
The number is 2,000,000,000
The number is 2_000_000_000
The number is 2,000,000,000.00
The number is 2,000,000.2324
The number is 2.000000e+09
The planet Mars has 2 moons.
The planet Mars has a diameter of 6780 kilometers.


### Working with lists
Lists are the most common data structure because they are so versatile. The following examples illustrate some of the things you can do with lists.

In [155]:
# modifying items in a list
fern_list[1] = 'Christmas'		            # replaces single existing value
fern_list[0:2] = ['bracken', 'Christmas']   # slices work; recommend 1:1 values

In [156]:
fern_list

['bracken', 'Christmas', 'staghorn']

In [162]:
# adding items to a list
fern_list.append('bicolor')		    # appends 1 item
fern_list.append(number_list)		# appends 1 item (list, not contents)
fern_list.extend(number_list)		# appends contents

In [None]:
fern_list

In [None]:
# removing items from a list
# remove last item and optionally return the value
fern_list.pop()					    # removes last item; returns if assigned
# remove by position (mutating; does not return items)
del fern_list[2]					# removes third item
del fern_list[:2]					# removes first 3 items
del fern_list[::2]				    # removes even-indexed items
# remove by value (mutating; does not return items)
fern_list.remove('maidenhair')		# only removes first match
# remove duplicates
fern_list = list(set(fern_list))	# converts to set and back to list
# remove all items
fern_list.clear()				    # converts to an empty list

In [166]:
# retrieving values from a list
extracted = number_list[2]	    	# returns 3rd item
extracted = number_list[1::2]		# returns even-indexed items
extracted = number_list.pop()		# returns and deletes last item

In [167]:
extracted

0

In [173]:
# retrieving information about the contents of a list
# built-in functions work as expected: len(), min(), max(), sum(), all(), any()
i = max(fern_list)					# returns tree
# test for membership in a list
b = 'staghorn' in fern_list			# returns True
# count items in a list that match a value
i = fern_list.count('staghorn')		# returns 1
# retrieve the index of the first item in a list that match a value
i = fern_list.index('staghorn')		# returns 1; raises error if no match
if 'stagnhorn' in fern_list:	# to avoid halting execution if not match
    i = fern_list.index('staghorn')	

In [None]:
i

In [178]:
# sorting and reversing a list
fern_list = ['maidenhair', 'tree', 'staghorn']
# sort in-place
fern_list.sort()				# sorts in-place 
fern_list.sort(reverse = True)	# sorts in reverse order
fern_list.sort(key = len)		# sorts by length
# retrieve sorted contents without altering the list
f = sorted(fern_list)			# assigns sorted list  
f = sorted(fern_list, key=len)	# both above options work  
# reverse in-place
fern_list.reverse()				# reverses in-place 
# retrieve reversed contents without altering the list
f = fern_list[::-1]			    # assigns the reversed list 

In [180]:
fern_list
# f

['maidenhair', 'staghorn', 'tree']

### Working with tuples
Tuples are commonly returned by functions and methods; they are also useful as a way to protect data from modification.

In [191]:
# creating a tuple
mammal_tuple = ('mouse', 'fruit bat', 'chimpanzee', 'leopard')
my_tup = ('parrot',)				# comma required if only one item

In [183]:
my_tup

('parrot',)

In [187]:
# retrieving information and contents from a tuple is similar to working with a list
# .count() and .index() are the only methods that work with tupbles; see examples with lists

In [192]:
# retrieving sorted  contents of tuples (direct sorting is not possible with tuples)
m = sorted(mammal_tuple)		    # assigns sorted contents  
m = sorted(mammal_tuple, key=len)	# sorts by length
m = sorted(mammal_tuple, reverse=True)	# sorts in reverse order

NameError: name 'mammmal_tuple' is not defined

### Working with sets

Sets are among the least appreciated and utilized data structures. The examples below illustrate some of the ways you can use their capabilities.

In [None]:
set_A = {1, 2, 3, 4, 5, 6}
set_B = {2, 4, 6, 8, 10, 12}
set_C = {22, 33, 44, 55, 66}

In [193]:
# creating a set
set_C = {'parrot'}				# no comma required for one item
set_C = set()					# creates an empty set

In [194]:
set_C

set()

In [None]:
# removing items from a set
# to remove based on value
set_A.remove(3)					# raises an exception if absent
set_A.discard(3)				# silent if absent
# to remove a random item (useful for looping)
set_A.pop()				    	# returns removed value if assigned
# to remove all items
set_A.clear()				    # becomes an empty set

In [None]:
set_A

In [None]:
# creating a set based on values in multiple sets
# to create a union of sets (transitive)
set_A | set_B					# returns union, removing any duplicates
set_A.union(set_B)				# same as above
set_A | set_B | set_C			# same but union of three sets
set_A.union(set_B, set_C)		# same as above
# to create an intersection of sets (transitive)
set_A & set_B					# returns intersection (common items)
set_A.intersection(set_B)			# same as above
# to create a difference between sets: present in first but not in second (not transitive)
set_A - set_B					# returns difference; order matters
set_A.difference(set_B)			# same as above
# to create a symmetric difference between sets: unique to each (transitive)
set_A ^ set_B					# returns difference; order matters
set_A.symmetric_difference(set_B)	# same as above

In [None]:
# adding items to a set based on set operations
# unlike the above, these are mutating methods; they change 1 set
set_A.update_union(set_B)			# updates set_A to union; set_B unchanged
# parallel update_ methods exist for other set operations

In [None]:
# testing for membership in a set
'tiger' in set_C				# returns bool
'tiger' not in set_C			# returns bool

In [None]:
# testing for outcomes of set operations
# these operations are limited to 2 sets and are not mutating; they return bools  
set_A.is_disjoint(set_B)		# True if no items are in common
set_A.is_subset(set_B)			# True if all items in A are in B
set_A.is_superset(set_B)		# True if all items in B are in A
# alternatively can use operators; this allows the identity testing for all but disjoint

### Working with dictionaries
Dictionaries are commonly used to for a variety of purposes. 

In [None]:
birds = ['parrot', 'quail', 'duck']
genera = ['Forpus', 'Coturnix', 'Anser']
tmnt = {"Leonardo": ["leader", "blue", "katana"], 
        "Raphael" : ["muscle", "red", "sai"],
        "Donatello" : ["brains", "purple", "bo"],
        "Michelangelo" : ["comedian", "orange", "nunchuk"]}

In [None]:
# creating a dictionary
# from scratch
my_dict = {'A':1, 'B':2, 'C':3}	# tedious!
my_dict = {}					# creates an empty dictionary (not set)
# from an iterable with enumerate
new_dict = dict(enumerate(birds))	# create a dictionary from the list
new_dict = dict(enumerate(birds), start=100)  # starts numbering at 100
# from two iterables with zip
new_dict = dict(zip(birds, genus)) # zip returns a list of tuples

In [None]:
new_dict

In [None]:
# updating and adding values in a dictionary
# based on key value
new_dict['parrot'] = 'Psittacus'	        # updates if key exists; else adds pair
# using one or more key:value pairs
new_dict.update({'parrot':'Psittacus'})	# updates; adds if key absent
if 'parrot' in new_dict: update({'parrot':'Psittacus'})	# only if key exists

In [None]:
new_dict

In [None]:
# removing values from a dictionary
# based on key value; pop() and popitem() return tuples
del new_dict['parrot']			# removes if key exists; else error
new_dict.pop('parrot')			# same as above; returns if assigned
new_dict.popitem()				# removes most recent addition
# remove random item
new_dict.pop()					# removes at random; returns if assigned
# remove all items
new_dict.clear()					# becomes an empty dictionary

In [None]:
new_dict

In [None]:
# retrieving information from a dictionary
#   mostly similar to working with a list: the usual functions and operators work
#   however: be aware of the gotchas below
# membership tests only for keys
'parrot' in new_dict				 # returns True iff 'parrot' is a key
# testing for membership in values requires an extra step
'parrot' in new_dict.values()		 # returns True iff 'parrot' is a key
# testing for equality of values directly *always* returns False
new_dict.values() == tmnt.values()	 # returns False
my_dict.values() == my_dict.values() # returns False
# testing for equality of values requires an indirect comparison 
list(my_dict.values()) == list(my_dict.values())   # returns True

In [None]:
# retrieving values from a dictionary
# accessing values based on key
new_dict['parrot']				    # returns value; error if key missing
new_dict.get('parrot')			    # same; returns None if key missing
new_dict.get('parrot', 'no key!')	# same; message if key missing
# accessing contents; these methods all return special data structures; all are iterable
new_dict.items()					# returns a dict items object
new_dict.keys()					    # returns a dict keys object 
new_dict.values()				    # returns a dict values object 
# accessing contents directly
list(new_dict.items())			    # returns a list of key:value tuples
list(new_dict.keys())				# returns a list of keys
list(new_dict.values())			    # returns a list of values
# accessing contents in a loop
for k, v in new_dict.items():				
    curr_key, curr_value = new_dict[k, v]
for k in new_dict.keys():				
		curr_key = new_dict[k]


In [None]:
# sorting items in a dictionary is similar to lists
# however: functions and methods operate on *keys*, not values