# 2.6 Python Standard Library
[The Python Standard Library](https://docs.python.org/3/library/)
## Итерация и копирование
- `itertools`
- `copy`
- `enum`
- ...

In [1]:
# обычно импортируются без сокращений
import itertools,operator,copy,enum

In [2]:
# соединение интераторов
it1 = iter([1, 2, 3, 4])
it2 = iter([4, 5, 6, 7])
it = itertools.chain(it1, it2)
list(it), tuple(it)

([1, 2, 3, 4, 4, 5, 6, 7], ())

In [3]:
# все возможные сочетания
prod = itertools.product([1,2],[3,4,5])
list(prod), tuple(prod)

([(1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5)], ())

In [4]:
# все возможные перестановки
perm = itertools.permutations([1, 2, 3])
list(perm), tuple(perm)

([(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)], ())

In [5]:
# варианты сочетаний элементов списка
perm = itertools.permutations([1, 2, 3, 4], 2)
comb = itertools.combinations([1, 2, 3, 4], 2)
comb_re = itertools.combinations_with_replacement(
    [1, 2, 3, 4], 2)
print(list(perm), list(comb), list(comb_re), sep='\n')

[(1, 2), (1, 3), (1, 4), (2, 1), (2, 3), (2, 4), (3, 1), (3, 2), (3, 4), (4, 1), (4, 2), (4, 3)]
[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]
[(1, 1), (1, 2), (1, 3), (1, 4), (2, 2), (2, 3), (2, 4), (3, 3), (3, 4), (4, 4)]


In [6]:
# группировка с пользовательской функцией
def smaller3(x): return x < 4
group_obj1 = itertools.groupby(range(10),key=smaller3)
for key,group in group_obj1:
    print(key,list(group))
# группировка с анонимной функцией
group_obj2 = itertools.groupby(
    ["hi","nice","hello","my","i"],key=lambda x: "i" in x)
for key, group in group_obj2:
    print(key,list(group))
# группировка с предварительной сортировкой
persons = {'1':{'name':'Tom','age':25},'2':{'name':'Dan','age':27}, 
           '3':{'name':'Lisa','age':27},'4':{'name':'Anna','age':25},
           '5':{'name':'Mary','age':26},'6':{'name':'Tim','age':28}}
# список словарей, отсортированных по полю 'age'
persons = sorted(persons.values(),key=lambda value: value['age'])
group_obj3 = itertools.groupby(persons,key=lambda x: x['age'])
for key, group in group_obj3:
    print(key,list(group))

True [0, 1, 2, 3]
False [4, 5, 6, 7, 8, 9]
True ['hi', 'nice']
False ['hello', 'my']
True ['i']
25 [{'name': 'Tom', 'age': 25}, {'name': 'Anna', 'age': 25}]
26 [{'name': 'Mary', 'age': 26}]
27 [{'name': 'Dan', 'age': 27}, {'name': 'Lisa', 'age': 27}]
28 [{'name': 'Tim', 'age': 28}]


In [7]:
# кумулятивные итерации
acc1 = itertools.accumulate(range(10))
acc2 = itertools.accumulate(range(1,9),func=operator.mul)
acc3 = itertools.accumulate([1,5,2,6,3,4,0,9,1],func=max)
for a in (acc1,acc2,acc3):
    print(list(a))

[0, 1, 3, 6, 10, 15, 21, 28, 36, 45]
[1, 2, 6, 24, 120, 720, 5040, 40320]
[1, 5, 5, 6, 6, 6, 6, 9, 9]


In [8]:
# бесконечные итераторы
from itertools import count, cycle, repeat
lst = [[],[],[]]
for i in count(10):
    lst[0] += [i]
    if  i >= 13: break
sum = 0
for i in cycle(range(10)):
    lst[1] += [i]
    sum += i
    if sum >= 100: break
for i in repeat(list(range(3)),3):
    lst[2] += [i]
lst

[[10, 11, 12, 13],
 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4],
 [[0, 1, 2], [0, 1, 2], [0, 1, 2]]]

## Расширенные возможности обработки данных
- `collections`
- `collections.abc`
- `array`
- `weakref`
- `types`
- `graphlib`
- `heapq`
- `...`

In [9]:
# счетчик элементов
from collections import Counter as cc
a = "assaaaabbttbbccwwcdde"
cstr = cc(a)
print(cstr,dict(cstr),cstr.items(),cstr.keys(),cstr.values(),
      sep='\n',end='\n'+100*'='+'\n')
lst = [2, 3, 2, 0, 1, 0, 1, 2, 1, 1, 5, 3, 2, 3, 2, 4]
clist = cc(lst)
print(clist,clist.most_common(1),
      clist.elements(),list(clist.elements()),
      sep='\n',end='\n'+100*'='+'\n')

Counter({'a': 5, 'b': 4, 'c': 3, 's': 2, 't': 2, 'w': 2, 'd': 2, 'e': 1})
{'a': 5, 's': 2, 'b': 4, 't': 2, 'c': 3, 'w': 2, 'd': 2, 'e': 1}
dict_items([('a', 5), ('s', 2), ('b', 4), ('t', 2), ('c', 3), ('w', 2), ('d', 2), ('e', 1)])
dict_keys(['a', 's', 'b', 't', 'c', 'w', 'd', 'e'])
dict_values([5, 2, 4, 2, 3, 2, 2, 1])
Counter({2: 5, 1: 4, 3: 3, 0: 2, 5: 1, 4: 1})
[(2, 5)]
<itertools.chain object at 0x7f7577888310>
[2, 2, 2, 2, 2, 3, 3, 3, 0, 0, 1, 1, 1, 1, 5, 4]


In [10]:
# именованные кортежи
from collections import namedtuple as cnt
point3 = cnt('point','x, y, z')
p = point3(1,-4,2)
print(p,p._fields,type(p),(p.x,p.y,p.z),sep='\n')
student = cnt('student','name age id')
s = student(name='Tom', age=25, id=10027)
print(s,s._fields,type(s),(s.id,s.name,s.age),sep='\n')

point(x=1, y=-4, z=2)
('x', 'y', 'z')
<class '__main__.point'>
(1, -4, 2)
student(name='Tom', age=25, id=10027)
('name', 'age', 'id')
<class '__main__.student'>
(10027, 'Tom', 25)


In [11]:
# словарь с установками по умолчанию
from collections import defaultdict as cdd
d0,d1,d2,d3 = cdd(),cdd(str),cdd(float),cdd(lambda:'@')
d1['1'] = 'a'; d1['2'] = 'b'
d2['a'] = 1.5; d2['b'] = 2.1
d0, d1['3'], d1, d2['c'], d2, d3['1'], d3

(defaultdict(None, {}),
 '',
 defaultdict(str, {'1': 'a', '2': 'b', '3': ''}),
 0.0,
 defaultdict(float, {'a': 1.5, 'b': 2.1, 'c': 0.0}),
 '@',
 defaultdict(<function __main__.<lambda>()>, {'1': '@'}))

In [12]:
# упорядоченный словарь
from collections import OrderedDict as cod
d = cod({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5})
print(d, dict(d), sep='\n')
d.move_to_end('a'); print(d)
d.move_to_end('a',last=False); print(d)
for key, value in reversed(d.items()):
    print(key, '->', value, end = ' || ')
print(); d.popitem(); print(d)
d.sorted_keys = lambda: sorted(d.keys())
d['f'],d['e'] = 7,6; print(d)
cod({k:d[k] for k in d.sorted_keys()})

OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)])
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
OrderedDict([('b', 2), ('c', 3), ('d', 4), ('e', 5), ('a', 1)])
OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 5)])
e -> 5 || d -> 4 || c -> 3 || b -> 2 || a -> 1 || 
OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4)])
OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('f', 7), ('e', 6)])


OrderedDict([('a', 1), ('b', 2), ('c', 3), ('d', 4), ('e', 6), ('f', 7)])

In [13]:
{'a':'A','b':'B'} == {'b':'B','a':'A'},\
cod({'a':'A','b':'B'}) == cod({'b':'B','a':'A'})

(True, False)

In [14]:
from collections import ChainMap as ccm
# последовательность словарей
toys = {'лего':100,'монополия':10}
computers = {'макбук':20,'хромбук':20,'классбук':50}
clothes = {'джинсы':30,'футболки':60}
chain = ccm(toys,computers,clothes)
for el in chain.items(): print(el)
toys.update({'дрон':10,'мяч':10})
print(list(chain))

('джинсы', 30)
('футболки', 60)
('макбук', 20)
('хромбук', 20)
('классбук', 50)
('лего', 100)
('монополия', 10)
['джинсы', 'футболки', 'макбук', 'хромбук', 'классбук', 'лего', 'монополия', 'дрон', 'мяч']


In [15]:
# может содержать одинаковые ключи
anumbers = {"one": 1, "two": 2}
cletters = {"a": "A", "b": "B"}
rnumbers = {"one": "I", "two": "II"}
lletters = {"a": "a", "b": "a"}
dl_chain = ccm(anumbers,rnumbers,cletters,lletters)
dl_chain

ChainMap({'one': 1, 'two': 2}, {'one': 'I', 'two': 'II'}, {'a': 'A', 'b': 'B'}, {'a': 'a', 'b': 'a'})

In [16]:
# объект двустороннего доступа
from collections import deque as cdq
d = cdq([1,2,3,4,5]); print(d)
d.append(6); d.appendleft(0); print(d)
d.extend([7,8]); d.extendleft([-2,-1]); print(d)
d.rotate(2); print(d)
d.rotate(-2); print(d)

deque([1, 2, 3, 4, 5])
deque([0, 1, 2, 3, 4, 5, 6])
deque([-1, -2, 0, 1, 2, 3, 4, 5, 6, 7, 8])
deque([7, 8, -1, -2, 0, 1, 2, 3, 4, 5, 6])
deque([-1, -2, 0, 1, 2, 3, 4, 5, 6, 7, 8])


In [17]:
# компактное представление массива данных
# строго одного типа
import array  
arr1 = array.array("f",[31,60,19,12])  
print(arr1,arr1[0],type(arr1))
f = open("array.csv","wb")
arr1.reverse()
arr1.tofile(f); f.close()
arr2 = array.array("f")
f = open("array.csv","rb")
arr2.fromfile(f,4)
print(arr2,arr2[0],type(arr2))

array('f', [31.0, 60.0, 19.0, 12.0]) 31.0 <class 'array.array'>
array('f', [12.0, 19.0, 60.0, 31.0]) 12.0 <class 'array.array'>


In [18]:
# слабые ссылки
import weakref
class CreateObj(set): pass
obj = CreateObj("hello")
normal_setobj = obj
print(f"обычный объект-множество: {normal_setobj}")
weakref_ref = weakref.ref(obj)
weakref_setobj = weakref_ref()
print(f"объект-множество, созданный со слабой ссылкой: {weakref_setobj}")
proxy_setobj = weakref.proxy(obj)
print(f"доверенный объект-множество: {proxy_setobj}")
list_setobj = [normal_setobj, weakref_setobj, proxy_setobj]
[weakref.getweakrefcount(o) for o in list_setobj]

обычный объект-множество: CreateObj({'l', 'h', 'o', 'e'})
объект-множество, созданный со слабой ссылкой: CreateObj({'l', 'h', 'o', 'e'})
доверенный объект-множество: CreateObj({'l', 'h', 'o', 'e'})


[2, 2, 0]

In [19]:
# алгоритм heap queue
import heapq
scores = [51, 33, 64, 87, 91, 75, 15, 49, 33, 82, 45]
heapq.nlargest(5, scores),\
heapq.nsmallest(5, scores)

([91, 87, 82, 75, 64], [15, 33, 33, 45, 49])

## Работа с текстовыми форматами
- `string`
- `re`
- `difflib` 
- `textwrap`
- `unicodedata`
- `stringprep` 
- `readline`
- `rlcompleter`

In [20]:
# обработка строковых данных
import string
s = string.Template('$who likes $what')
s.substitute(who='Tom', what='apples'),\
string.ascii_lowercase,string.punctuation,string.digits

('Tom likes apples',
 'abcdefghijklmnopqrstuvwxyz',
 '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~',
 '0123456789')

In [21]:
from string import Formatter as sf
s = sf()
s.vformat('{x[a]} {1} {0} {2} ... {x[b]}',
          '12@34',{'x':{'a':'aa','b':'bb'}})

'aa 2 1 @ ... bb'

In [22]:
# исследование состава строковых литералов
# метасимволы, последовательности, множества и методы
import re
# начинаются с f, только 1 повторение, старт hello и стоп !!!
re.findall(r'\bf[a-z]*', 'apple, tree, foot, map, final'),\
re.sub(r'(\b[a-z]+) \1', r'\1', 'cat, cat in the the hat hat!'),\
re.search(r'^hello.*!!!$',r'hello, world!!!')

(['foot', 'final'],
 'cat, cat in the hat!',
 <re.Match object; span=(0, 15), match='hello, world!!!'>)

In [23]:
# информация о всех символах Юникод
import unicodedata as ud
print(ud.lookup('ASTERISK'),
      ud.lookup('Latin Capital letter Y'),sep=' || ')
print(ud.name(u'@'),ud.name(u'°'),sep=' || ')
print(ud.decimal(u'6'),ud.numeric(u'9'),sep=' || ')
print(ud.category(u'Б'),ud.category(u'9'),
      ud.category(u'!'),sep=' || ')
print(ud.mirrored(u'['),ud.mirrored(u'<'),sep=' || ')

* || Y
COMMERCIAL AT || DEGREE SIGN
6 || 9.0
Lu || Nd || Po
1 || 1


## Временные измерения
- `time`
- `timeit`
- `datetime`
- `zoneinfo` >= 3.9
- `calendar`

Во многих средах установлены дополнительные
- `pytz`
- `dateutil`

In [24]:
from time import asctime
asctime()

'Mon Jan 23 13:59:48 2023'

In [25]:
# дата и точное время
import datetime, pytz
today = datetime.date.today()
now = datetime.datetime.now()
now_tz = datetime.datetime.now(pytz.timezone('Europe/Moscow'))
today, str(today), now, str(now), now_tz, str(now_tz)

(datetime.date(2023, 1, 23),
 '2023-01-23',
 datetime.datetime(2023, 1, 23, 13, 59, 48, 914521),
 '2023-01-23 13:59:48.914521',
 datetime.datetime(2023, 1, 23, 16, 59, 48, 977804, tzinfo=<DstTzInfo 'Europe/Moscow' MSK+3:00:00 STD>),
 '2023-01-23 16:59:48.977804+03:00')

In [26]:
# строка установленного формата
tf = "%d %B %Y %A %H:%M:%S%z"
now.strftime(tf),now_tz.strftime(tf)

('23 January 2023 Monday 13:59:48', '23 January 2023 Monday 16:59:48+0300')

In [27]:
# итерация временного интервала
delta = datetime.timedelta(days=1)
start = datetime.date.today()
end = start + 7 * delta
for d in range((end - start).days):
    print(start + d * delta)

2023-01-23
2023-01-24
2023-01-25
2023-01-26
2023-01-27
2023-01-28
2023-01-29


In [28]:
# часовые пояса
from datetime import timedelta, timezone
MSK = timezone(timedelta(hours=+3))
MSK

datetime.timezone(datetime.timedelta(seconds=10800))

In [29]:
from dateutil import tz
from dateutil.parser import parse
EST,CST,MST,PST = \
(tz.gettz('US/'+el) for el in ('Eastern','Central','Mountain','Pacific'))
us_tzinfo = {'CST': CST,'EST': EST,'MST': MST,'PST': PST}
dt_est = parse('2023-1-1 04:00:00 EST', tzinfos=us_tzinfo)
dt_pst = parse('2022-3-11 16:00:00 PST', tzinfos=us_tzinfo)
print (dt_est, dt_pst, sep='\n')

2023-01-01 04:00:00-05:00
2022-03-11 16:00:00-08:00


In [30]:
# оценка быстродействия программных действий
from timeit import default_timer as timer
alist = 10**7*["a"]
start = timer()
astr = ""
for a in alist: astr += a
print("list => string ||   +   || %.6f"%(timer()-start))
start = timer()
astr = "".join(alist)
print("list => string || join()|| %.6f"%(timer()-start))
del alist,astr

list => string ||   +   || 3.742948
list => string || join()|| 0.141230


## Интернет-доступ и обработка веб-ресурсов
- `urllib`
- `http`
- `html`
- `XML Processing Modules`
- `...`

In [31]:
from urllib.request import urlopen
print(urlopen("http://python.org/").headers)

Connection: close
Content-Length: 50123
Server: nginx
Content-Type: text/html; charset=utf-8
X-Frame-Options: DENY
Via: 1.1 vegur, 1.1 varnish
Accept-Ranges: bytes
Date: Mon, 23 Jan 2023 13:59:53 GMT
Age: 1700
X-Served-By: cache-iad-kiad7000146-IAD
X-Cache: HIT
X-Cache-Hits: 1
X-Timer: S1674482393.297226,VS0,VE1
Vary: Cookie
Strict-Transport-Security: max-age=63072000; includeSubDomains




In [32]:
# содержание вебстраницы
print(urlopen("http://python.org/").read()[:82])

b'<!doctype html>\n<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9"> '


In [33]:
# отдельные компоненты информации
url_utc = 'http://worldtimeapi.org/api/timezone/etc/UTC.txt'
with urlopen(url_utc) as response:
    for line in response:
        line = line.decode() # байт-строки в обычные
        if line.startswith('datetime'):
            print(line.rstrip())

datetime: 2023-01-23T13:59:53.573325+00:00


In [34]:
# построение анализатора интернет-страницы
from html.parser import HTMLParser
class BuiltHTMLParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.starts = {}
        self.ends = {}
        self.text_data = []
    def __str__(self):
        start_str = f"start tags: {self.starts}\n"
        end_str = f"end tags: {self.ends}\n"
        data_str = f"data: {self.text_data}"
        return start_str+end_str+data_str
    def handle_starttag(self, tag, attrs):
        if tag in self.starts:
            self.starts[tag] += 1
        else:
            self.starts[tag] = 1
    def handle_endtag(self, tag):
        if tag in self.ends:
            self.ends[tag] += 1
        else:
            self.ends[tag] = 1
    def handle_data(self, data):
        self.text_data += [data]
hp = BuiltHTMLParser()
hp.feed('<html><head><title>Test</title></head>'
        '<body><h1>Header1 #1</h1><h1>Header1 #2</h1>'
        '<h1>Header1 #3</h1></body></html>')
print(hp)

start tags: {'html': 1, 'head': 1, 'title': 1, 'body': 1, 'h1': 3}
end tags: {'title': 1, 'head': 1, 'h1': 3, 'body': 1, 'html': 1}
data: ['Test', 'Header1 #1', 'Header1 #2', 'Header1 #3']


## Вычисления
- `decimal`
- `numbers`
- `math`
- `cmath`
- `fractions`
- `random`
- `statistics`

In [35]:
import numbers,decimal,math,cmath,random,fractions,statistics

In [36]:
# важнейшие статистические показатели
arr = [2.75,1.75,1.25,.75,.25,.5,1.25,3.5]
statistics.mean(arr),statistics.median(arr)

(1.5, 1.25)

In [37]:
# математические функции и константы
math.sqrt(1024),math.pi

(32.0, 3.141592653589793)

In [38]:
# имитация случайного объекта
random.choice(['🍊','🍋','🍎','🍐','🍑']),\
random.randrange(7),random.random(),\
random.sample(list('abcdefghijklmnopqrstuvwxyz'),10)

('🍐',
 4,
 0.6275461245655267,
 ['k', 'j', 'h', 'y', 's', 'b', 'f', 'c', 'q', 'p'])

## Функциональное программирование
- `operator`
- `functools`

In [39]:
# встроенные операторы как функции
import operator
15 // 5, operator.floordiv(15, 5),\
15 // 2, operator.__floordiv__(15, 2)

(3, 3, 7, 7)

In [40]:
# переопределение операторов
class Vector:
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __str__(self):
        return 'vector (%d, %d)' % (self.x, self.y)
    def __repr__(self):
        return 'vector (%d, %d)' % (self.x, self.y)
    def __add__(self,other):
        return Vector(self.x + other.x, self.y + other.y)
operator.add((2,15),(5,1)),Vector(2,15)+Vector(5,1)

((2, 15, 5, 1), vector (7, 16))

In [41]:
# запуск функции несколько раз с декоратором
def n_run(f,n=5):  
    def inner_f(*args,**kwargs):
        for i in range(n): f(*args,**kwargs) 
    return inner_f 
@n_run
def f1(string):  
    print(f"| {string} |",end="")  
f1('hello')

| hello || hello || hello || hello || hello |

In [42]:
# тот же эффект
import functools
def repeat(n):   
    def decorator_repeat(f):  
        @functools.wraps(f)  
        def wrapper(*args,**kwargs):  
            for i in range(n):  
                result = f(*args,**kwargs)  
            return result  
        return wrapper  
    return decorator_repeat  
@repeat(n=5)  
def f1(string):  
     print(f"| {string} |",end="")
f1('hello')

| hello || hello || hello || hello || hello |

## Сервисы операционной системы и обработки файлов
- `os`
- `io`
- `csv`
- `json`
- `glob`
- `tomllib`
- `argparse`
- `tempfile`
- `...`

In [43]:
# взаимодействие с операционной системой
import os,glob        
os.rename("array.csv","array_os.csv")
glob.glob('*.csv')

['array_os.csv']

In [44]:
# управление интерактивным взаимодействием с файлами
import io
output = io.StringIO()
output.write('id name age\n')
output.write('1015 Lisa 23\n')
print('1002 Tom 27', file=output)
contents = output.getvalue()
output.close()
contents

'id name age\n1015 Lisa 23\n1002 Tom 27\n'

In [45]:
# csv файл из словаря
import csv
with open('dict.csv', 'w', newline='') as csvfile:
    fieldnames = ['id', 'name', 'age']
    dict_rows = ({'id':1015,'name':'Tom','age':27},
                 {'id':1002,'name':'Lisa','age':23},
                 {'id':1034,'name':'Dan','age':21},
                 {'id':1025,'name':'Ann','age':25},
                 {'id':1045,'name':'Alex','age':28})
    n = len(dict_rows)
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    [writer.writerow(dict_rows[i]) for i in range(n)]

In [46]:
# построчное чтение
with open('dict.csv', newline='') as f:
    read = csv.reader(f, delimiter=',')
    for row in read:
        fstr = f' | '.join(f'%5s'%r for r in row)
        print(fstr,len(fstr)*'-',sep='\n')

   id |  name |   age
---------------------
 1015 |   Tom |    27
---------------------
 1002 |  Lisa |    23
---------------------
 1034 |   Dan |    21
---------------------
 1025 |   Ann |    25
---------------------
 1045 |  Alex |    28
---------------------


In [47]:
# чтение в упорядоченный словарь
with open('dict.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            rk = list(row.keys())
            n = len(rk)
            print(f'названия полей: {", ".join(row)}')
            line_count += 1
        fstr = ' || '.join(f'{row[rk[i]]:5s}' for i in range(n))
        print(len(fstr)*'-',fstr,len(fstr)*'-',sep='\n')
        line_count += 1
    print(f'файл содержит {line_count-1} строк')

названия полей: id, name, age
-----------------------
1015  || Tom   || 27   
-----------------------
-----------------------
1002  || Lisa  || 23   
-----------------------
-----------------------
1034  || Dan   || 21   
-----------------------
-----------------------
1025  || Ann   || 25   
-----------------------
-----------------------
1045  || Alex  || 28   
-----------------------
файл содержит 5 строк


## Сжатие данных и архивация
- `zlib`
- `gzip`
- `bz2`
- `lzma`
- `zipfile`
- `tarfile`

In [48]:
import zipfile, urllib, datetime
def append_zipfile(zip_file, file_): 
    with zipfile.ZipFile(zip_file, mode='a') as zf:
        zf.write(file_)
def load_file(path, file_):
    input_file = urllib.request.urlopen(path + file_)
    output_file = open(file_,'wb')
    output_file.write(input_file.read())
    output_file.close(); input_file.close()
def create_stream(path, file_list):
    for file_ in file_list:
        load_file(path, file_)
        yield file_
path = 'https://raw.githubusercontent.com/pandas-dev/pandas/'+\
       'main/pandas/tests/io/data/csv/'
file_list = ['banklist.csv','tips.csv','iris.csv']
for f in create_stream(path, file_list):
    append_zipfile("example.zip", f)

In [49]:
with zipfile.ZipFile("example.zip", mode="r") as zf:
    for info in zf.infolist():
        print(f"file: {info.filename}",
              f"modified: {datetime.datetime(*info.date_time)}",
              f"normal size: {info.file_size} bytes",
              f"compressed size: {info.compress_size} bytes",
              "-" * 20, sep="\n")
    zf.close()

file: banklist.csv
modified: 2023-01-23 13:59:54
normal size: 41417 bytes
compressed size: 41417 bytes
--------------------
file: tips.csv
modified: 2023-01-23 13:59:54
normal size: 7943 bytes
compressed size: 7943 bytes
--------------------
file: iris.csv
modified: 2023-01-23 13:59:54
normal size: 4600 bytes
compressed size: 4600 bytes
--------------------


## Хранение данных
- `pickle`
- `copyreg`
- `shelve`
- `marshal`
- `dbm`
- `sqlite3` 

In [None]:
# база данных в формате словаря
import dbm
with dbm.open('example','c') as db:
    db['name'] = 'Tom'
    db['id'] = '1024'
    db['age'] = '27'
    print(db.get(b'name'))
# with - автоматическое закрытие файла 

b'Tom'


In [None]:
import pickle
class PickleExample:
    def __init__(self):
        self.obj1 = 1001
        self.obj2 = 10.01
        self.obj3 = "string"
# невозможно сохранить объект с модулем pickle
        self.obj4 = lambda x: x ** 3
    def __getstate__(self):
        attributes = self.__dict__.copy()
# исключение объекта при сохранении        
        del attributes['obj4']
        return attributes
# возвращение объекта при распаковке 
    def __setstate__(self, state):
        self.__dict__ = state
        self.obj4 = lambda x: x ** 3
pe = PickleExample()
pe_string = pickle.dumps(pe)
pe_obj = pickle.loads(pe_string)
print(*(item for item in pe_obj.__dict__.items()),sep='\n')

('obj1', 1001)
('obj2', 10.01)
('obj3', 'string')
('obj4', <function PickleExample.__setstate__.<locals>.<lambda> at 0x7f91da915b80>)


In [None]:
# dill - расширение pickle
import dill
cube = lambda x: x ** 3
dpe = dill.dumps(cube)
[dpe[i*30:(i+1)*30] for i in range(8)]

[b'\x80\x04\x95\xcd\x00\x00\x00\x00\x00\x00\x00\x8c\ndill._dill\x94\x8c\x10_cre',
 b'ate_function\x94\x93\x94(h\x00\x8c\x0c_create_co',
 b'de\x94\x93\x94(K\x01K\x00K\x00K\x01K\x02KCC\x08|\x00d\x01\x13\x00S\x00\x94N',
 b'K\x03\x86\x94)\x8c\x01x\x94\x85\x94\x8c\x1f<ipython-input-52',
 b'-e43cfb9f847a>\x94\x8c\x08<lambda>\x94K\x03C\x00',
 b'\x94))t\x94R\x94c__builtin__\n__main__\nh',
 b'\nNNt\x94R\x94}\x94}\x94\x8c\x0f__annotations__\x94}',
 b'\x94s\x86\x94b.']

In [51]:
import sqlite3, os
if os.path.exists('example.db'):
    os.remove('example.db')
def db_connect(dbf):
    sqlconn = None
    try:
        sqlconn = sqlite3.connect(dbf)
        return sqlconn
    except sqlite3.Error as err:
        print(err)
        if sqlconn is not None: 
            sqlconn.close()
def db_query(connection, query):
    try:
        cursor = connection.cursor()
        tr = []; cursor.execute(query)
        print("запрос прошел успешно")
        result = cursor.fetchall()
        for r in result: tr += [r]
        if tr == []: return None
        else: return tr
    except sqlite3.Error as err:
        print(f"ошибка: '{err}'")
dbc = db_connect('example.db')

In [52]:
users_table = """
CREATE TABLE IF NOT EXISTS users (
  id INTEGER PRIMARY KEY AUTOINCREMENT,
  name TEXT NOT NULL,
  age INTEGER,
  gender TEXT,
  country TEXT
);
"""
db_query(dbc, users_table)
users = """
INSERT INTO
  users (name, age, gender, country)
VALUES
  ('James', 45, 'male', 'USA'),
  ('Leila', 32, 'female', 'France'),
  ('Brigitte', 35, 'female', 'UK'),
  ('Mike', 40, 'male', 'Germany'),
  ('Mary', 28, 'female', 'Canada');
"""
db_query(dbc, users)
db_query(dbc, """SELECT * FROM users""")

запрос прошел успешно
запрос прошел успешно
запрос прошел успешно


[(1, 'James', 45, 'male', 'USA'),
 (2, 'Leila', 32, 'female', 'France'),
 (3, 'Brigitte', 35, 'female', 'UK'),
 (4, 'Mike', 40, 'male', 'Germany'),
 (5, 'Mary', 28, 'female', 'Canada')]