# Chapter 2 — An Array of Sequences

In [1]:
from collections import abc

In [2]:
issubclass(tuple, abc.Sequence)

True

In [None]:
issubclass(list, abc.MutableSequence)

True

## List Comprehensions and Generator Expressions

In [4]:
symbols = "$¢£¥€¤"
codes = []

for symbol in symbols:
    codes.append(ord(symbol))
  
codes

[36, 162, 163, 165, 8364, 164]

In [5]:
codes = [ord(symbol) for symbol in symbols]
codes

[36, 162, 163, 165, 8364, 164]

In [6]:
codes = [ord(sym)                    # no need for continuator
         for sym in symbols
        ]
codes

[36, 162, 163, 165, 8364, 164]

In [7]:
x = "ABC"
codes = [ord(x) for x in x]
x

'ABC'

In [8]:
codes

[65, 66, 67]

In [9]:
codes = [last:=ord(c) for c in x]

In [10]:
last

67

In [11]:
symbols = "$¢£¥€¤"
beyond_ascii = [ord(s) for s in symbols if ord(s)>127]
beyond_ascii

[162, 163, 165, 8364, 164]

In [12]:
beyond_ascii = list(
    filter(
        lambda c: c>127, map(ord, symbols)
    )
)
beyond_ascii

[162, 163, 165, 8364, 164]

### Cartesian Products

In [13]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
tshirts

[('black', 'S'),
 ('black', 'M'),
 ('black', 'L'),
 ('white', 'S'),
 ('white', 'M'),
 ('white', 'L')]

In [14]:
tshirts = [(color, size) for size in sizes
                         for color in colors
            ]
tshirts

[('black', 'S'),
 ('white', 'S'),
 ('black', 'M'),
 ('white', 'M'),
 ('black', 'L'),
 ('white', 'L')]

### Generator Expressions

In [15]:
symbols = "$¢£¥€¤"
tuple(ord(symbol) for symbol in symbols)

(36, 162, 163, 165, 8364, 164)

In [16]:
import array

In [17]:
array.array('I', (ord(symbol) for symbol in symbols))

array('I', [36, 162, 163, 165, 8364, 164])

In [18]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in (f'{c} {s}' for c in colors for s in sizes):
    print(tshirt)

black S
black M
black L
white S
white M
white L


## Tuples Are Not Just Immutable Lists

### Tuples as Records

In [19]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32_450, 0.66, 8014)
traveler_ids = [ ('USA', '31195855'), ('BRA', 'CE342567'), ('ESP', 'XDA205856') ]

for passport in sorted(traveler_ids):
    print('%s/%s' % passport)
    
for country, _ in traveler_ids:
    print(country)

BRA/CE342567
ESP/XDA205856
USA/31195855
USA
BRA
ESP


### Tuples as Immutable Lists

In [20]:
a = (10, 'alpha', [1,2])
b = (10, 'alpha', [1,2])
a == b

True

In [21]:
b[-1].append(99)
a == b

False

In [22]:
a

(10, 'alpha', [1, 2])

In [23]:
b

(10, 'alpha', [1, 2, 99])

In [24]:
def fixed(o):
    try:
        hash(o)
    except TypeError:
        return False
    return True

In [25]:
tf = (10, 'alpha', (1,2))
tm = (10, 'alpha', [1,2])
fixed(tf)

True

In [26]:
fixed(tm)

False

## Unpacking Sequences and Iterables

In [27]:
lax_coordinates = (33.9425, -118.408056)
latitude, longitude = lax_coordinates
latitude

33.9425

In [28]:
longitude

-118.408056

In [29]:
a

(10, 'alpha', [1, 2])

In [30]:
b

(10, 'alpha', [1, 2, 99])

In [31]:
a, b = b, a
a

(10, 'alpha', [1, 2, 99])

In [32]:
divmod(20, 8)

(2, 4)

In [33]:
t = (20,8)
divmod(*t)

(2, 4)

In [34]:
quot, rema = divmod(*t)
quot

2

In [35]:
rema

4

In [37]:
import os
_, filename = os.path.split('/hjome/luciano/.ssh/id_rsa.pub')
filename

'id_rsa.pub'

### Using `*` to Grab Excess Items

In [38]:
a, b, *rest = range(5)
a

0

In [39]:
b

1

In [40]:
rest

[2, 3, 4]

In [41]:
a, b, rest

(0, 1, [2, 3, 4])

In [42]:
a, b, *rest = range(3)
a, b, rest

(0, 1, [2])

In [43]:
a, b, *rest = range(2)
a, b, rest

(0, 1, [])

In [44]:
a, *body, c, d = range(5)
a, body, c, d

(0, [1, 2], 3, 4)

### Unpacking with * in Function Calls and Sequence Literals

In [45]:
def fun(a, b, c, d, *rest):
    return a, b, c, d, rest

In [46]:
fun(*[1, 2,], 3, *range(4, 7))

(1, 2, 3, 4, (5, 6))

In [47]:
*range(4), 4

(0, 1, 2, 3, 4)

In [48]:
[*range(4), 4]

[0, 1, 2, 3, 4]

In [49]:
[*range(4), 4, *(5, 6, 7)]

[0, 1, 2, 3, 4, 5, 6, 7]

### Nested Unpacking

In [50]:
metro_areas = [
    ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)),
    ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
    ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
    ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
    ('São Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

In [51]:
def maine():
    print(f'{"":15} | {"latitude":>9} | {"longitude":>9}')
    for name, _, _, (lat, lon) in metro_areas:
        if lon <= 0:
            print(f'{name:15} | {lat:9.4f} | {lon:9.4f}')

In [53]:
maine()

                |  latitude | longitude
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
São Paulo       |  -23.5478 |  -46.6358


## Pattern Matching with Sequences

### Pattern Matching Sequences in an Interpreter

## Slicing

### Why Slices and Ranges Exclude the Last Item

In [54]:
l = [10, 20, 30, 40, 50, 60]
l[:2]

[10, 20]

In [55]:
l[2:]

[30, 40, 50, 60]

### Slice Objects

In [56]:
s = 'bicycle'
s[::3]

'bye'

In [57]:
s[::-1]

'elcycib'

In [58]:
s[::-2]

'eccb'

In [60]:
import collections

Card = collections.namedtuple('Card', ['rank', 'suit'])

class FrenchDeck:
  ranks = [str(n) for n in range(2,11)] + list('JQKA')
  suits = 'spades diamonds clubs hearts'.split()
  
  def __init__(self):
    self._cards = [Card(rank,suit) for suit in self.suits
                                  for rank in self.ranks]
    
  def __len__(self):
    return len(self._cards)

  def __getitem__(self, position):
    return self._cards[position]

In [62]:
deck = FrenchDeck()
len(deck)

52

In [63]:
deck[12::13]

[Card(rank='A', suit='spades'),
 Card(rank='A', suit='diamonds'),
 Card(rank='A', suit='clubs'),
 Card(rank='A', suit='hearts')]

In [64]:
deck[12::13]

[Card(rank='A', suit='spades'),
 Card(rank='A', suit='diamonds'),
 Card(rank='A', suit='clubs'),
 Card(rank='A', suit='hearts')]

In [67]:
invoice = """
0.....6.................................40........52...55........
1909  Pimoroni PiBrella                     $17.50    3    $52.50
1489  6mm Tactile Switch x20                 $4.95    2     $9.90
1510  Panavise Jr. - PV-201                 $28.00    1    $28.00
1601  PiTFT Mini Kit 320x240                $34.95    1    $34.95
"""

In [68]:
SKU = slice(0, 6)
DESCRIPTION = slice(6, 40)
UNIT_PRICE = slice(40, 52)
QUANTITY = slice(52, 55)
ITEM_TOTAL = slice(55, None)

line_items = invoice.split('\n')[2:]

for item in line_items:
    print(item[UNIT_PRICE], item[DESCRIPTION])

    $17.50   Pimoroni PiBrella                 
     $4.95   6mm Tactile Switch x20            
    $28.00   Panavise Jr. - PV-201             
    $34.95   PiTFT Mini Kit 320x240            
 


### Multidimensional Slicing and Ellipsis

The ellipsis —written with three full stops (`...`) and not `…`— is recognized as a token by the Python parser. It is an alias to the `Ellipsis` object, the single instance of the `ellipsis` class. As such it can be passed as an argument to functions and as part of a slice specification, as in `f(a, ..., z)` or `a[i:...]`. NumPy uses `...` as a shortcut when slicing arrays of many dimensions; for example, if `x` is a four dimensional array, `x[i, ...]` is a shortcut for `x[i, :, :, :,]`.

### Assigning to Slices

In [74]:
l = list(range(10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [75]:
l[2:5] = [20, 30]
l

[0, 1, 20, 30, 5, 6, 7, 8, 9]

In [76]:
del l[5:7]
l

[0, 1, 20, 30, 5, 8, 9]

In [77]:
l[3::2] = [11, 22]
l

[0, 1, 20, 11, 5, 22, 9]

In [79]:
try:
    l[2:5] = 100
except TypeError as e:
    print(f"{e=}")

e=TypeError('can only assign an iterable')


In [80]:
l[2:5] = [100]
l

[0, 1, 100, 22, 9]

## Using `+` and `*` with Sequences

In [81]:
l = [1, 2, 3]
l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [82]:
5 * 'abcd'

'abcdabcdabcdabcdabcd'

In [83]:
mylist = [[]]*3
mylist

[[], [], []]

In [84]:
mylist[0] = 'R'
mylist

['R', [], []]

In [85]:
mylist = [[]]*3
mylist

[[], [], []]

In [86]:
mylist[0].append("R")
mylist

[['R'], ['R'], ['R']]

### Building Lists of Lists

In [87]:
board = [["_"]*3 for i in range(3)]
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [90]:
board[1][2] = '✘'
board

[['_', '_', '_'], ['_', '_', '✘'], ['_', '_', '_']]

In [91]:
weird_board = [["_"]*3]*3
weird_board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [92]:
weird_board[1][2] = 'Ø'
weird_board

[['_', '_', 'Ø'], ['_', '_', 'Ø'], ['_', '_', 'Ø']]

In [None]:
row = ["_"]*3
board = []
for i in range(3):
    board.append(row)
board

[[' ', ' ', ' '], [' ', ' ', ' '], [' ', ' ', ' ']]

In [94]:
board[1][2] = '✘'
board

[[' ', ' ', '✘'], [' ', ' ', '✘'], [' ', ' ', '✘']]

In [95]:
board = []
for i in range(3):
    board.append(["_"]*3)
board

[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]

In [96]:
board[1][2] = '✘'
board

[['_', '_', '_'], ['_', '_', '✘'], ['_', '_', '_']]

### Augmented Assignment with Sequences

In [97]:
l = [1, 2, 3]
id(l)

139714185688960

In [98]:
l *=2
l

[1, 2, 3, 1, 2, 3]

In [99]:
id(l)

139714185688960

In [100]:
t = (1, 2, 3)
id(t)

139714186376896

In [101]:
t *= 2
id(t)

139714172197312

In [103]:
try:
    t = (1, 2, [30, 40])
    t[2] += [50, 60]
    t
except Exception as e:
    print(f"{e=}")

e=TypeError("'tuple' object does not support item assignment")


## `list.sort` Versus the `sorted` Built-In

In [104]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
sorted(fruits)

['apple', 'banana', 'grape', 'raspberry']

In [105]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [106]:
sorted(fruits, reverse=True)

['raspberry', 'grape', 'banana', 'apple']

In [107]:
sorted(fruits, key=len)

['grape', 'apple', 'banana', 'raspberry']

In [108]:
sorted(fruits, reverse=True, key=len)

['raspberry', 'banana', 'grape', 'apple']

In [109]:
fruits

['grape', 'raspberry', 'apple', 'banana']

In [110]:
fruits.sort()

In [111]:
fruits

['apple', 'banana', 'grape', 'raspberry']

## When a List is **Not** the Answer

### Arrays

In [112]:
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
floats[-1]

0.3900598535304972

In [113]:
fp = open('floats.bin', 'wb')
floats.tofile(fp)
fp.close()

In [114]:
floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10**7)
fp.close
floats2[-1]

0.3900598535304972

In [115]:
floats == floats2

True

In [116]:
!ls -l

total 3568040
-rw-r--r--. 1 jovyan users     105666 Apr 10 11:43 '107 DA0101EN-Review-Introduction jupyterlite.ipynb'
-rw-r--r--. 1 jovyan users     203842 Apr 10 12:48 '207 DA0101EN-2-Review-Data-Wrangling jupyterlite.ipynb'
-rw-r--r--. 1 jovyan users    1606671 Apr 24 18:38 '2 DV0101EN-Exercise-Area-Plots-Histograms-and-Bar-Charts.jupyterlite.ipynb'
-rw-r--r--. 1 jovyan users     477147 Apr 10 14:38 '307 DA0101EN-3-Review-Exploratory-Data-Analysis jupyterlite.ipynb'
-rw-r--r--. 1 jovyan users     994635 Apr 25 09:49 '3 DV0101EN-Exercise-Pie-Charts-Box-Plots-Scatter-Plots-and-Bubble-Plots.jupyterlite.ipynb'
-rw-r--r--. 1 jovyan users     525927 Apr 10 15:25 '407 DA0101EN-4-Review-Model-Development.ipynb'
-rw-r--r--. 1 jovyan users    3264149 Apr 25 12:15 '4 DV0101EN-Exercise-Waffle-Charts-Word-Clouds-and-Regression-Plots-py.ipynb'
-rw-r--r--. 1 jovyan users     465649 Apr 10 17:13 '507 DA0101EN-5-Review-Model-Evaluation-and-Refinement.ipynb'
-rw-r--r--. 1 jovyan users    1401916 Apr 2

### Memory Views

In [117]:
from array import array

In [118]:
octets = array('B', range(6))

In [119]:
m1 = memoryview(octets)
m1.tolist()

[0, 1, 2, 3, 4, 5]

In [120]:
m2 = m1.cast('B', [2, 3])
m2.tolist()

[[0, 1, 2], [3, 4, 5]]

In [121]:
m3 = m1.cast('B', [3, 2])
m3.tolist()

[[0, 1], [2, 3], [4, 5]]

In [122]:
m2[1,1] = 22
m3[1,1] = 33
octets

array('B', [0, 1, 2, 33, 22, 5])

In [124]:
numbers = array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
len(memv)

5

In [125]:
memv[0]

-2

In [127]:
memv_oct = memv.cast('B')
memv_oct.tolist()

[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]

In [128]:
memv_oct[5] = 4
numbers

array('h', [-2, -1, 1024, 1, 2])

### NumPy

In [129]:
import numpy as np

In [130]:
a = np.arange(12)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [131]:
type(a)

numpy.ndarray

In [132]:
a.shape

(12,)

In [133]:
a.shape = 3, 4

In [134]:
a 

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [135]:
a[2]

array([ 8,  9, 10, 11])

In [None]:
a[2, 1]

9

In [137]:
a[:, 1]

array([1, 5, 9])

In [138]:
a.transpose()

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [None]:
import numpy as np

### Deques and Other Queues

In [149]:
from collections import deque

In [150]:
dq = deque(range(10), maxlen=10)
dq

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)

In [151]:
dq.rotate(3)
dq

deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)

In [152]:
dq.rotate(-4)
dq

deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], maxlen=10)

In [153]:
dq.appendleft(-1)
dq

deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)

In [154]:
dq.extend([11, 22, 33,])
dq

deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33], maxlen=10)

In [155]:
dq.extendleft([10, 20, 30, 40])
dq

deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8], maxlen=10)