# Типы данных в python

## Списковые включения 

Сравните два примера:

In [1]:
symbols = '$¢£¥€¤'
codes = []
for symbol in symbols:
    codes.append(ord(symbol))

codes

[36, 162, 163, 165, 8364, 164]

In [2]:
symbols = '$¢£¥€¤'
codes = [ord(symbol) for symbol in symbols]
codes

[36, 162, 163, 165, 8364, 164]

In [4]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
tshirts = [(color, size) for color in colors for size in sizes]
print(tshirts)
for color in colors:
    for size in sizes:
        print((color, size))
        
tshirts = [(color, size) for size in sizes
                            for color in colors]
print(tshirts)

[('black', 'S'), ('black', 'M'), ('black', 'L'), ('white', 'S'), ('white', 'M'), ('white', 'L')]
('black', 'S')
('black', 'M')
('black', 'L')
('white', 'S')
('white', 'M')
('white', 'L')
[('black', 'S'), ('white', 'S'), ('black', 'M'), ('white', 'M'), ('black', 'L'), ('white', 'L')]


In [1]:
x = 1
l = [x for x in 'ABC']
x

1

Списковое включение или filter?

In [4]:
symbols = '$Ф£¥€и'
beyond_ascii = [ord(s) for s in symbols if ord(s) > 127]
print(beyond_ascii)
beyond_ascii = list(filter(lambda c: c > 127, map(ord, symbols)))
print(beyond_ascii)

[1060, 163, 165, 8364, 1080]
[1060, 163, 165, 8364, 1080]


In [13]:
l = list(map(ord, '123'))
l

[49, 50, 51]

Скорость

## Генераторы

In [18]:
symbols = '$¢£¥€¤'
r = [1, 2]
print(r)

[<generator object <genexpr> at 0x7fac28052660>]


In [19]:
import array
array.array('I', (ord(symbol) for symbol in symbols)) 

array('I', [36, 162, 163, 165, 8364, 164])

In [21]:
colors = ['black', 'white']
sizes = ['S', 'M', 'L']
for tshirt in (f'{c} - {s}' for c in colors for s in sizes):
    print(tshirt)

black - S
black - M
black - L
white - S
white - M
white - L


на дом range

## Кортежи 

In [23]:
lax_coordinates = (33.9425, -118.408056)
city, year, pop, chg, area = ('Tokyo', 2003, 32450, 0.66, 8014)
traveler_ids = [('USA', '31195855'), ('BRA', 'CE342567')]
for passport in sorted(traveler_ids):
    print('%s/%s' % passport) 

BRA/CE342567
USA/31195855


In [25]:
for country, _ in traveler_ids:
    print(country)
    
_ = '...'

USA
BRA


In [11]:
lax_coordinates = (33.9425, -118.408056)
latitude, longitude = lax_coordinates # tuple unpacking
print(latitude)
print(longitude)

33.9425
-118.408056


In [13]:
a = 1
b = 2
b, a = a, b

In [4]:
print(divmod(20, 8))
t = (20, 8)
q, r = divmod(*t)
q, r

(2, 4)


(2, 4)

In [5]:
import os
_, filename = os.path.split('./notebook.ipynb')
filename

'notebook.ipynb'

In [10]:
a, b, *rest = range(5)
print(a, b, rest)
a, b, *rest = range(3)
print(a, b, rest)
a, b, *rest = range(2)
print(a, b, rest)
a, b, *rest = range(1)
print(a, b, rest)

0 1 [2, 3, 4]
0 1 [2]
0 1 []


ValueError: not enough values to unpack (expected at least 2, got 1)

In [26]:
a, *body, c, d = range(5)
print(a, body, c, d)
*head, b, *body = range(5)
print(head, b, body)

SyntaxError: two starred expressions in assignment (cell_name, line 6)

In [12]:
metro_areas = [
 ('Tokyo', 'JP', 36.933, (35.689722, 139.691667)), 
 ('Delhi NCR', 'IN', 21.935, (28.613889, 77.208889)),
 ('Mexico City', 'MX', 20.142, (19.433333, -99.133333)),
 ('New York-Newark', 'US', 20.104, (40.808611, -74.020386)),
 ('Sao Paulo', 'BR', 19.649, (-23.547778, -46.635833)),
]

print('{:15} | {:^9} | {:^9}'.format('', 'lat.', 'long.'))
fmt = '{:15} | {:9.4f} | {:9.4f}'
for name, cc, pop, (latitude, longitude) in metro_areas: #
    if longitude <= 0: #
        print(fmt.format(name, latitude, longitude))

                |   lat.    |   long.  
Mexico City     |   19.4333 |  -99.1333
New York-Newark |   40.8086 |  -74.0204
Sao Paulo       |  -23.5478 |  -46.6358


In [14]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
print(tokyo)
print(tokyo.population)
print(tokyo.coordinates)
print(tokyo[1])

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.689722, 139.691667))
36.933
(35.689722, 139.691667)
JP


In [16]:
print(City._fields)
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ('Delhi NCR', 'IN', 21.935, LatLong(28.613889, 77.208889))
delhi = City._make(delhi_data)
print(delhi._asdict())
for key, value in delhi._asdict().items():
    print(key + ':', value)

('name', 'country', 'population', 'coordinates')
OrderedDict([('name', 'Delhi NCR'), ('country', 'IN'), ('population', 21.935), ('coordinates', LatLong(lat=28.613889, long=77.208889))])
name: Delhi NCR
country: IN
population: 21.935
coordinates: LatLong(lat=28.613889, long=77.208889)


на дом dataclass

## Срезы

Почему с нуля?
Число элементов
две неиересекающисся части

In [None]:
l = [10, 20, 30, 40, 50, 60]
print(l[:2])
print(l[2:])
print(l[:3])
print(l[3:])

In [None]:
s = 'bicycle'
print(s[::3])
print(s[::-1])
print(s[::-2])

In [21]:
invoice = """
0.....6.................................40.............52...55........
1909  Pimoroni PiBrella                 $17.50         3    $52.50
1489  6mm Tactile Switch x20            $4.95          2    $9.90
1510  Panavise Jr. - PV-201             $28.00         1    $28.00
1601  PiTFT Mini Kit 320x240            $34.95         1    $34.95
"""
SKU = slice(0, 6)
DESCRIPTION = slice(6, 40)
UNIT_PRICE = slice(40, 52)
QUANTITY = slice(52, 55)
ITEM_TOTAL = slice(55, None)
line_items = invoice.split('\n')[2:]
for item in line_items:
    print(item[UNIT_PRICE], item[DESCRIPTION])

$17.50       Pimoroni PiBrella                 
$4.95        6mm Tactile Switch x20            
$28.00       Panavise Jr. - PV-201             
$34.95       PiTFT Mini Kit 320x240            
 


на дом Многомерные срезы

In [22]:
l = list(range(10))
print(l)
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
l[2:5] = [20, 30]
print(l)
[0, 1, 20, 30, 5, 6, 7, 8, 9]
del l[5:7]
print(l)
[0, 1, 20, 30, 5, 8, 9]
l[3::2] = [11, 22]
print(l)
[0, 1, 20, 11, 5, 22, 9]
l[2:5] = 100

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 6, 7, 8, 9]
[0, 1, 20, 30, 5, 8, 9]
[0, 1, 20, 11, 5, 22, 9]


TypeError: can only assign an iterable

In [23]:
l[2:5] = [100]
l

[0, 1, 100, 22, 9]

## Использование + и * с последовательностями

In [24]:
l = [1, 2, 3]
l * 5

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]

In [25]:
5 * 'abcd'

'abcdabcdabcdabcdabcd'

## Вложенные последовательности

In [26]:
board = [['_'] * 3 for i in range(3)]
print(board)
board[1][2] = 'X'
print(board)


[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', 'X'], ['_', '_', '_']]


In [27]:
weird_board = [['_'] * 3] * 3
print(weird_board)
weird_board[1][2] = 'X'
print(weird_board)


[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', 'X'], ['_', '_', 'X'], ['_', '_', 'X']]


In [28]:
row = ['_'] * 3
board = []
for i in range(3):
    board.append(row) 

board[0][0] = 'X'
print(board)

[['X', '_', '_'], ['X', '_', '_'], ['X', '_', '_']]


In [29]:
board = []
>>> for i in range(3):
    row = ['_'] * 3 #
    board.append(row)

print(board)
board[2][0] = 'X'
print(board) 


[['_', '_', '_'], ['_', '_', '_'], ['_', '_', '_']]
[['_', '_', '_'], ['_', '_', '_'], ['X', '_', '_']]


## Комбинированное присваивание с последовательностями

In [31]:
l = [1, 2, 3]
print(id(l))
l *= 2
print(l)
[1, 2, 3, 1, 2, 3]
print(id(l))
t = (1, 2, 3)
print(id(t))
t *= 2
print(id(t))

140334126524488
[1, 2, 3, 1, 2, 3]
140334126524488
140334127901360
140334126652872


In [7]:
t = (1, 2, [30, 40])
t[2] += [50, 60]


TypeError: 'tuple' object does not support item assignment

In [8]:
t

(1, 2, [30, 40, 50, 60])

In [11]:
l = list(t)
l[2] = []
t = tuple(l)
t

(1, 2, [])

## Встроенная функция sorted и list.sort

In [34]:
fruits = ['grape', 'raspberry', 'apple', 'banana']
print(sorted(fruits))
print(fruits)
print(sorted(fruits, reverse=True))
print(sorted(fruits, key=len))
print(sorted(fruits, key=len, reverse=True))
print(fruits)
fruits.sort()
print(fruits)

['apple', 'banana', 'grape', 'raspberry']
['grape', 'raspberry', 'apple', 'banana']
['raspberry', 'grape', 'banana', 'apple']
['grape', 'apple', 'banana', 'raspberry']
['raspberry', 'banana', 'grape', 'apple']
['grape', 'raspberry', 'apple', 'banana']
['apple', 'banana', 'grape', 'raspberry']


## Bisect

In [35]:
import bisect
import sys
HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]
NEEDLES = [0, 1, 2, 5, 8, 10, 22, 23, 29, 30, 31]
ROW_FMT = '{0:2d} @ {1:2d} {2}{0:<2d}'
def demo(bisect_fn):
    for needle in reversed(NEEDLES):
        position = bisect_fn(HAYSTACK, needle)
        offset = position * ' |'
        print(ROW_FMT.format(needle, position, offset))

bisect_fn = bisect.bisect_left

print('DEMO:', bisect_fn.__name__)
print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
demo(bisect_fn)

bisect_fn = bisect.bisect

print('DEMO:', bisect_fn.__name__)
print('haystack ->', ' '.join('%2d' % n for n in HAYSTACK))
demo(bisect_fn)


DEMO: bisect_left
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14  | | | | | | | | | | | | | |31
30 @ 13  | | | | | | | | | | | | |30
29 @ 12  | | | | | | | | | | | |29
23 @  9  | | | | | | | | |23
22 @  9  | | | | | | | | |22
10 @  5  | | | | |10
 8 @  4  | | | |8 
 5 @  2  | |5 
 2 @  1  |2 
 1 @  0 1 
 0 @  0 0 
DEMO: bisect_right
haystack ->  1  4  5  6  8 12 15 20 21 23 23 26 29 30
31 @ 14  | | | | | | | | | | | | | |31
30 @ 14  | | | | | | | | | | | | | |30
29 @ 13  | | | | | | | | | | | | |29
23 @ 11  | | | | | | | | | | |23
22 @  9  | | | | | | | | |22
10 @  5  | | | | |10
 8 @  5  | | | | |8 
 5 @  3  | | |5 
 2 @  1  |2 
 1 @  1  |1 
 0 @  0 0 


In [36]:
import bisect
import random

SIZE = 7

random.seed(1729)
my_list = []
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print('%2d ->' % new_item, my_list)

10 -> [10]
 0 -> [0, 10]
 6 -> [0, 6, 10]
 8 -> [0, 6, 8, 10]
 7 -> [0, 6, 7, 8, 10]
 2 -> [0, 2, 6, 7, 8, 10]
10 -> [0, 2, 6, 7, 8, 10, 10]


## Массивы

In [38]:
from array import array
from random import random

floats = array('d', (random() for i in range(10**7)))
print(floats[-1])

fp = open('floats.bin', 'wb')
floats.tofile(fp)
fp.close()

floats2 = array('d')
fp = open('floats.bin', 'rb')
floats2.fromfile(fp, 10**7)
fp.close()
print(floats2[-1])
floats2 == floats

0.1288579230853678
0.1288579230853678


True

In [42]:
numbers = array('h', [-2, -1, 0, 1, 2])
memv = memoryview(numbers)
print(len(memv))

print(memv[0])
memv_oct = memv.cast('B')
print(memv_oct.tolist())
memv_oct[5] = 4
print(numbers)

5
-2
[254, 255, 255, 255, 0, 0, 1, 0, 2, 0]
array('h', [-2, -1, 1024, 1, 2])


## NumPy и SciPy

In [45]:
import numpy
a = numpy.arange(12)
print(a)
print(type(a))
print(a.shape)
(12,)
a.shape = 3, 4
print(a)
print(a[2]) 

[ 0  1  2  3  4  5  6  7  8  9 10 11]
<class 'numpy.ndarray'>
(12,)
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[ 8  9 10 11]


In [46]:
print(a[2, 1])
print(a[:, 1])
print(a.transpose()) 

9
[1 5 9]
[[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]


## Очереди

In [47]:
from collections import deque

dq = deque(range(10), maxlen=10)
print(dq)
dq.rotate(3)
print(dq)
dq.rotate(-4)
print(dq)
dq.appendleft(-1)
print(dq)
dq.extend([11, 22, 33])
print(dq)
dq.extendleft([10, 20, 30, 40]) 
print(dq)

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)
deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], maxlen=10)
deque([-1, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
deque([3, 4, 5, 6, 7, 8, 9, 11, 22, 33], maxlen=10)
deque([40, 30, 20, 10, 3, 4, 5, 6, 7, 8], maxlen=10)


## Словари

In [4]:
from collections.abc import Mapping

my_dict = {}
isinstance(my_dict, Mapping)

True

In [5]:
tt = (1, 2, (30, 40))
print(hash(tt))

tf = (1, 2, frozenset([30, 40]))
print(hash(tf))

tl = (1, 2, [30, 40])
print(hash(tl))

8027212646858338501
985328935373711578


TypeError: unhashable type: 'list'

In [6]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})
a == b == c == d == e

True

<img src="../img/dict_methods.png">

In [13]:
import this
import codecs

zen_of_python = codecs.encode(this.s, 'rot13').split('\n')

In [14]:
import sys
import re

WORD_RE = re.compile('\w+')
index = {}

for line_no, line in enumerate(zen_of_python):
     for match in WORD_RE.finditer(line):
        word = match.group()
        column_no = match.start()+1
        location = (line_no, column_no)
        # не очень красиво, поправим в следующем примере
        occurrences = index.get(word, [])
        occurrences.append(location)
        index[word] = occurrences

for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(18, 48), (19, 53)]
Although [(10, 1), (15, 1), (17, 1)]
ambiguity [(13, 16)]
and [(14, 23)]
are [(20, 12)]
aren [(9, 15)]
at [(15, 38)]
bad [(18, 50)]
be [(14, 14), (15, 27), (19, 50)]
beats [(10, 23)]
Beautiful [(2, 1)]
better [(2, 14), (3, 13), (4, 11), (5, 12), (6, 9), (7, 11), (16, 8), (17, 25)]
break [(9, 40)]
by [(0, 20)]
cases [(9, 9)]
complex [(4, 23)]
Complex [(5, 1)]
complicated [(5, 24)]
counts [(8, 13)]
dense [(7, 23)]
do [(14, 64), (20, 48)]
Dutch [(15, 61)]
easy [(19, 26)]
enough [(9, 30)]
Errors [(11, 1)]
explain [(18, 34), (19, 34)]
Explicit [(3, 1)]
explicitly [(12, 8)]
face [(13, 8)]
first [(15, 41)]
Flat [(6, 1)]
good [(19, 55)]
great [(20, 28)]
guess [(13, 52)]
hard [(18, 26)]
honking [(20, 20)]
idea [(18, 54), (19, 60), (20, 34)]
If [(18, 1), (19, 1)]
implementation [(18, 8), (19, 8)]
implicit [(3, 25)]
In [(13, 1)]
is [(2, 11), (3, 10), (4, 8), (5, 9), (6, 6), (7, 8), (16, 5), (17, 16), (18, 23), (19, 23)]
it [(14, 67), (18, 43), (19, 43)]
let [(20, 42)]
may [

In [15]:
import sys
import re

WORD_RE = re.compile('\w+')

index = {}
for line_no, line in enumerate(zen_of_python):
    for match in WORD_RE.finditer(line):
        word = match.group()
        column_no = match.start()+1
        location = (line_no, column_no)
        index.setdefault(word, []).append(location)
            
for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(18, 48), (19, 53)]
Although [(10, 1), (15, 1), (17, 1)]
ambiguity [(13, 16)]
and [(14, 23)]
are [(20, 12)]
aren [(9, 15)]
at [(15, 38)]
bad [(18, 50)]
be [(14, 14), (15, 27), (19, 50)]
beats [(10, 23)]
Beautiful [(2, 1)]
better [(2, 14), (3, 13), (4, 11), (5, 12), (6, 9), (7, 11), (16, 8), (17, 25)]
break [(9, 40)]
by [(0, 20)]
cases [(9, 9)]
complex [(4, 23)]
Complex [(5, 1)]
complicated [(5, 24)]
counts [(8, 13)]
dense [(7, 23)]
do [(14, 64), (20, 48)]
Dutch [(15, 61)]
easy [(19, 26)]
enough [(9, 30)]
Errors [(11, 1)]
explain [(18, 34), (19, 34)]
Explicit [(3, 1)]
explicitly [(12, 8)]
face [(13, 8)]
first [(15, 41)]
Flat [(6, 1)]
good [(19, 55)]
great [(20, 28)]
guess [(13, 52)]
hard [(18, 26)]
honking [(20, 20)]
idea [(18, 54), (19, 60), (20, 34)]
If [(18, 1), (19, 1)]
implementation [(18, 8), (19, 8)]
implicit [(3, 25)]
In [(13, 1)]
is [(2, 11), (3, 10), (4, 8), (5, 9), (6, 6), (7, 8), (16, 5), (17, 16), (18, 23), (19, 23)]
it [(14, 67), (18, 43), (19, 43)]
let [(20, 42)]
may [

In [16]:
import sys
import re
import collections

WORD_RE = re.compile('\w+')

index = collections.defaultdict(list)
for line_no, line in enumerate(zen_of_python):
    for match in WORD_RE.finditer(line):
        word = match.group()
        column_no = match.start()+1
        location = (line_no, column_no)
        index[word].append(location)

for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(18, 48), (19, 53)]
Although [(10, 1), (15, 1), (17, 1)]
ambiguity [(13, 16)]
and [(14, 23)]
are [(20, 12)]
aren [(9, 15)]
at [(15, 38)]
bad [(18, 50)]
be [(14, 14), (15, 27), (19, 50)]
beats [(10, 23)]
Beautiful [(2, 1)]
better [(2, 14), (3, 13), (4, 11), (5, 12), (6, 9), (7, 11), (16, 8), (17, 25)]
break [(9, 40)]
by [(0, 20)]
cases [(9, 9)]
complex [(4, 23)]
Complex [(5, 1)]
complicated [(5, 24)]
counts [(8, 13)]
dense [(7, 23)]
do [(14, 64), (20, 48)]
Dutch [(15, 61)]
easy [(19, 26)]
enough [(9, 30)]
Errors [(11, 1)]
explain [(18, 34), (19, 34)]
Explicit [(3, 1)]
explicitly [(12, 8)]
face [(13, 8)]
first [(15, 41)]
Flat [(6, 1)]
good [(19, 55)]
great [(20, 28)]
guess [(13, 52)]
hard [(18, 26)]
honking [(20, 20)]
idea [(18, 54), (19, 60), (20, 34)]
If [(18, 1), (19, 1)]
implementation [(18, 8), (19, 8)]
implicit [(3, 25)]
In [(13, 1)]
is [(2, 11), (3, 10), (4, 8), (5, 9), (6, 6), (7, 8), (16, 5), (17, 16), (18, 23), (19, 23)]
it [(14, 67), (18, 43), (19, 43)]
let [(20, 42)]
may [

In [18]:
 
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()     
    
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
 
# d.get(key)
print(d.get('2'))
print(d.get(4))
print(d.get(1, 'N/A'))

# key in d
print(2 in d)
print(1 in d)

# d['key']
print(d['2'])
print(d[4])
print(d[1])

two
four
N/A
True
False
two
four


KeyError: '1'

## Множества

In [19]:
l = ['spam', 'spam', 'eggs', 'spam']
print(set(l))
print(list(set(l)))

{'eggs', 'spam'}
['eggs', 'spam']


<img src="../img/set_methods.png">