## Lists

Dynamic, mutable, usually same content data type

In [4]:
squares = [1, 4, 9, 16, 25]
squares

[1, 4, 9, 16, 25]

In [5]:
squares[0]

1

In [57]:
squares[-1]  # negative indexes count from the end

25

In [8]:
squares[:3]

[1, 4, 9]

In [9]:
squares[-3:]

[9, 16, 25]

In [10]:
squares[:]

[1, 4, 9, 16, 25]

In [41]:
list(squares)

[1, 4, 9, 16, 25]

In [11]:
squares + [36, 49, 64, 81, 100]

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [24]:
cubes = [1, 8, 27, 65, 125]  # something's wrong here
4 ** 3  # the cube of 4 is 64, not 65!

64

In [40]:
list

list

In [25]:
cubes[3] = 64  # replace the wrong value
cubes

[1, 8, 27, 64, 125]

In [26]:
cubes.append(216)  # add the cube of 6
cubes.append(7 ** 3)  # and the cube of 7
cubes

[1, 8, 27, 64, 125, 216, 343]

In [18]:
letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
letters

['a', 'b', 'c', 'd', 'e', 'f', 'g']

In [19]:
letters[2:5] = ['C', 'D', 'E']
letters

['a', 'b', 'C', 'D', 'E', 'f', 'g']

In [20]:
letters[2:5] = []  # same as: del letters[2:5]
letters

['a', 'b', 'f', 'g']

In [21]:
letters[:] = []  # same letters.clear()
letters

[]

In [27]:
my_words = ['these', 'are', 'mine']
your_words = my_words
your_words

['these', 'are', 'mine']

In [28]:
my_words[:] = []
your_words

[]

In [29]:
my_words = ['these', 'are', 'mine']
your_words = list(my_words)  # preferred over my_words[:]
my_words[:] = []
your_words

['these', 'are', 'mine']

In [31]:
# builtin function len()
letters = ['a', 'b', 'c', 'd']
len(letters)

4

In [39]:
# in keyword to test membership
'd' in letters

True

In [32]:
a = ['a', 'b', 'c']
n = [1, 2, 3]
x = [a, n]
x

[['a', 'b', 'c'], [1, 2, 3]]

In [33]:
x[0]

['a', 'b', 'c']

In [34]:
x[0][1]

'b'

### ⏱⏱⏱ Performance Matters: Lists

- lists are implemented as arrays in memory
- `L[x]` is O(1)
- `L[x] = ...` is O(1)
- `L.append(e)` is O(1) amortized worst case
- `L.pop()` is O(1) from end
- `len(L)` is O(1)
- most other operations O(n)

## Tuples
Immutable, light weight, often mixed types & position-significant

In [44]:
t = 12345, 54321, 'hello!'
t[0]

12345

In [45]:
t

(12345, 54321, 'hello!')

In [47]:
u = t, (1, 2, 3, 4, 5)
u

((12345, 54321, 'hello!'), (1, 2, 3, 4, 5))

In [48]:
t[0] = 88888

TypeError: 'tuple' object does not support item assignment

In [50]:
v = ([1, 2, 3], [3, 2, 1])
v

([1, 2, 3], [3, 2, 1])

In [52]:
empty = ()
singleton = 'hello',    # <-- note trailing comma
len(empty)

0

In [53]:
len(singleton)

1

In [54]:
singleton

('hello',)

In [56]:
# sequence unpacking
x, y, z = t
y

54321

### ⏱⏱⏱ Performance Matters: Tuples

- tuples are fixed-size and allocated only once
- prefer tuples for safety, limited memory use and speed

## Dicts
Ordered (since 3.6) mutable mapping for any hashable keys → any values 

In [29]:
%pprint   # IPython "helpfully" sorting our output
tel = {'jack': 4098, 'sape': 4139}
tel['guido'] = 4127
tel

Pretty printing has been turned OFF


{'jack': 4098, 'sape': 4139, 'guido': 4127}

In [30]:
tel['jack']

4098

In [31]:
del tel['sape']
tel['irv'] = 4127
tel

{'jack': 4098, 'guido': 4127, 'irv': 4127}

In [32]:
list(tel)

['jack', 'guido', 'irv']

In [69]:
for element in tel:
    print(element)

jack
guido
irv


In [33]:
sorted(tel)

['guido', 'irv', 'jack']

In [72]:
list(tel.items())

[('jack', 4098), ('guido', 4127), ('irv', 4127)]

In [76]:
for name, number in tel.items():
    print(name, end=' -- ')
    print(number)

jack -- 4098
guido -- 4127
irv -- 4127


In [34]:
'guido' in tel

True

In [35]:
'jack' not in tel

False

In [36]:
dict([('sape', 4139), ('guido', 4127), ('jack', 4098)])

{'sape': 4139, 'guido': 4127, 'jack': 4098}

In [37]:
dict(sape=4139, guido=4127, jack=4098)

{'sape': 4139, 'guido': 4127, 'jack': 4098}

In [38]:
# order ignored when comparing
{'a': 1, 'b': 2} == {'b': 2, 'a': 1}

True

In [40]:
# repeated keys not supported 
{'a': 1, 'a': 3}

{'a': 3}

### ⏱⏱⏱ Performance Matters: Dicts

- implemented as compact array + index hash table
- `D[x]` is O(1)
- `x in D` is O(1)
- `D[x] = ...` is O(1) amortized worst case
- `del D[x]`, `D.pop(x)` is O(1) amortized worst case
- `len(D)` is O(1)

dict is not optimized for many dicts with same keys, consider namedtuple or pandas dataframe for working with many similar records more efficiently

## Sets
Unordered collection of hashable values with no duplicates

In [41]:
basket = {'apple', 'orange', 'apple', 'pear', 'orange', 'banana'}
print(basket)

{'pear', 'banana', 'orange', 'apple'}


In [42]:
'orange' in basket

True

In [43]:
'crabgrass' in basket

False

In [44]:
basket - {'orange', 'broccoli'}

{'pear', 'apple', 'banana'}

In [45]:
basket | {'apple', 'strawberry'}

{'banana', 'strawberry', 'pear', 'orange', 'apple'}

In [46]:
basket & {'pear', 'orange', 'broccoli'}

{'pear', 'orange'}

In [47]:
basket ^ {'apple', 'broccoli'}

{'banana', 'pear', 'broccoli', 'orange'}

In [49]:
basket > set()  # Can't use {}, that's a dict

True

In [50]:
basket > {'apple', 'broccoli'}

False

In [52]:
basket > {'apple'}

True

In [54]:
set(['a', 'b', 'c', 'd'])

{'b', 'c', 'd', 'a'}

In [56]:
set('abcd')

{'b', 'c', 'd', 'a'}

In [59]:
set('typing quotes and commas is very tiring'.split())

{'is', 'very', 'typing', 'commas', 'and', 'tiring', 'quotes'}

In [64]:
for element in set('abcd'):
    print(element)

b
c
d
a


## Frozensets
Immutable unordered collection of hashable values with no duplicates

In [60]:
frozenset('abc')

frozenset({'b', 'c', 'a'})

In [61]:
d = {frozenset('abc'): 'found it'}
d[frozenset('cba')]

'found it'

In [63]:
{frozenset('pqr'), frozenset(), frozenset('rpq'), frozenset('rp'), frozenset()}

{frozenset(), frozenset({'p', 'r'}), frozenset({'r', 'q', 'p'})}

## Files

In [84]:
f = open('workfile', 'w', encoding='utf-8')
f.write('Line 1\nLine 2\n')
f.close()

In [87]:
with open('workfile', encoding='utf-8') as f:
    read_data = f.read()
read_data

'Line 1\nLine 2\n'

In [83]:
f.close()
f.read()

ValueError: I/O operation on closed file.

In [89]:
with open('workfile', encoding='utf-8') as f:
    for line in f:
        print('---')
        print(line)

---
Line 1

---
Line 2



In [91]:
value = {'the_answer': 42}
with open('important', 'w', encoding='utf-8') as f:
    f.write(value)

TypeError: write() argument must be str, not dict

In [95]:
import json

value = {'the_answer': 42, 'my_mood': '😺'}
with open('important', 'w', encoding='utf-8') as f:
    json.dump(value, f)

In [99]:
with open('important', encoding='utf-8') as f:
    content = f.read()
content

'{"the_answer": 42, "my_mood": "\\ud83d\\ude3a"}'

In [101]:
json.loads(content)['the_answer']

42

## String Formatting


In [105]:
year = 2019
rtype = 'Contracts Quarterly'
f'Total {rtype} records for {year}'

'Total Contracts Quarterly records for 2019'

In [109]:
year = 2019
rtype = 'Contracts Quarterly'
'Total {} records for {}'.format(rtype, year)

'Total Contracts Quarterly records for 2019'

In [106]:
total = 1_018_410
count = 596_196
percentage = count / total
'{:-9} records  {:2.2%}'.format(count, percentage)

'   596196 records  58.54%'

In [107]:
total = 1_018_410
count = 596_196
f'{count:-9} records  {count / total:2.2%}'

'   596196 records  58.54%'

In [113]:
for x in range(11):
    print(f'{x:3d} {x * x:4d} {x * x * x:5d}')

  0    0     0
  1    1     1
  2    4     8
  3    9    27
  4   16    64
  5   25   125
  6   36   216
  7   49   343
  8   64   512
  9   81   729
 10  100  1000


In [115]:
# classic 
import math
'The value of pi is approximately %5.3f.' % math.pi

'The value of pi is approximately 3.142.'