# Python Progress Bar

## 1.1. tqdm  -- Add a progress meter to your loops in a second
tqdm means "progress" in arabic阿拉伯语 

In [7]:
import time
from tqdm import tqdm

for i in tqdm(range(10)):
    time.sleep(i)
    

100%|██████████| 10/10 [00:45<00:00,  4.50s/it]


In [14]:
# tqdm.py
__all__ = ['tqdm', 'trange']

import sys
import time

def format_interval(t):
    mins, s = divmod(int(t), 60)
    h, m = divmod(mins, 60)
    if h:
        return '%d:%02d:%02d' % (h, m, s)
    else:
        return '%02d:%02d' % (m, s)

def format_meter(n, total, elapsed, n_bars):
    # n - number of finished iterations
    # total - total number of iterations, or None
    # elapsed - number of seconds passed since start
    # N_BARS = bar number
    if n > total:
        total = None
    elapsed_str = format_interval(elapsed)
    rate = '%5.2f' % (n / elapsed) if elapsed else '?'
    
    if total:
        frac = float(n) / total
        N_BARS = n_bars
        bar_length = int(frac * N_BARS)
        bar = '#' * bar_length + '-' * (N_BARS - bar_length)
        
        percentage = '%3d%%' % (frac * 100)
        
        left_str = format_interval(elapsed / n * (total - n)) if n else '?'
        
        return '|%s| %d/%d %s [elapsed: %s left: %s, %s iters/sec]' % (
            bar, n, total, percentage, elapsed_str, left_str, rate)
    
    else:
        return '%d [elapsed: %s, %s iters/sec]' % (n, elapsed_str, rate)


class StatusPrinter(object):
    def __init__(self, file):
        self.file = file
        self.last_printed_len = 0
    
    def print_status(self, s):
        self.file.write('\r' + s + ' ' * max(self.last_printed_len - len(s), 0))
        self.file.flush()
        self.last_printed_len = len(s)


def tqdm(iterable, desc='', total=None, leave=False, file=sys.stderr,
         mininterval=0.5, miniters=1, n_bars=100):
    """
    Get an iterable object, and return an iterator which acts exactly like the
    iterable, but prints a progress meter and updates it every time a value is
    requested.
    'desc' can contain a short string, describing the progress, that is added
    in the beginning of the line.
    'total' can give the number of expected iterations. If not given,
    len(iterable) is used if it is defined.
    'file' can be a file-like object to output the progress message to.
    If leave is False, tqdm deletes its traces from screen after it has
    finished iterating over all elements.
    If less than mininterval seconds or miniters iterations have passed since
    the last progress meter update, it is not updated again.
    """
    if total is None:
        try:
            total = len(iterable)
        except TypeError:
            total = None
    prefix = desc + ': ' if desc else ''
    sp = StatusPrinter(file)
    sp.print_status(prefix + format_meter(0, total, 0, n_bars))
    start_t = last_print_t = time.time()
    last_print_n = 0
    n = 0
    for obj in iterable:
        yield obj
        # Now the object was created and processed, so we can print the meter.
        n += 1
        if n - last_print_n >= miniters:
            # We check the counter first, to reduce the overhead of time.time()
            cur_t = time.time()
            if cur_t - last_print_t >= mininterval:
                sp.print_status(prefix + format_meter(n, total, cur_t - start_t, n_bars))
                last_print_n = n
                last_print_t = cur_t
    
    if not leave:
        sp.print_status('')
        sys.stdout.write('\r')
    else:
        if last_print_n < n:
            cur_t = time.time()
            sp.print_status(prefix + format_meter(n, total, cur_t - start_t, n_bars))
        file.write('\n')


def trange(*args, **kwargs):
    """A shortcut for writing tqdm(range()) on py3 or tqdm(xrange()) on py2"""
    try:
        f = xrange
    except NameError:
        f = range
    return tqdm(f(*args), **kwargs)

if __name__ == '__main__':
    for i in tqdm(range(50),n_bars=50):
        time.sleep(.1)
    

                                                                                                             



# 1.2 Beautiful Soup 4 Requests urllib 
Scrape data from websites.

In [39]:
from bs4 import BeautifulSoup as bs 
import urllib.request 

if __name__ == '__main__':
    list_of_most_popular_websites = 'https://en.wikipedia.org/wiki/List_of_most_popular_websites'
    source = urllib.request.urlopen(list_of_most_popular_websites)
    if source.status == 200:
        content = source.read()
        soup = bs(content, 'lxml')
        print("title of the page:", soup.title)
        print("get attributes:", soup.title.name)
        print("get values:", soup.title.string)
        print("beginning navigation:", soup.title.parent.name)
        #print("getting specific values:", soup.table)
        #print("find them all", soup.find_all('table'))
        for table in soup.find_all('table')[1:]:
            print(table.attrs)
            print(table.string) # NavigableString 对象
            print(table.text) # unicode text 
        # grab links 
        for url in soup.find_all('a'):
            print(url.get('href')) # get the true URL 
        # grab text 
        print(soup.get_text())
        
        print("nav:", soup.nav)
        
        # class_="wiki", which allows us to work with a specific class of tag
        for table in soup.find_all('table', class_='wikitable'):
            print(table.text)
            tbody = table.find('tbody')
            tr_rows = tbody.find_all('tr')
            for tr in tr_rows:
                td = tr.find_all('td')
                row = [i.text for i in td if i == 1]
                print(row)
    else:
        print('404 Not Found')
    

title of the page: <title>List of most popular websites - Wikipedia</title>
get attributes: title
get values: List of most popular websites - Wikipedia
beginning navigation: head
{'style': 'font-size:96%;', 'width': '100%', 'class': ['wikitable', 'sortable']}
None


Site
Domain
Alexa top 100 global websites
(As of April 3, 2017[update])[3]
SimilarWeb top 100 websites
(As of May 2017)[4]
Type

Principal country



Google
google.com
1
1
Internet services and products
 U.S.


YouTube
youtube.com
2
3
Video sharing
 U.S.


Facebook
facebook.com
3
2
Social network
 U.S.


Baidu
baidu.com
4
11
Search engine
 China


Wikipedia
wikipedia.org
5
5
Encyclopedia
 U.S.


Yahoo!
yahoo.com
6
4
Portal and media
 U.S.


Google India
google.co.in
7
8
Search engine
 India


Reddit
reddit.com
8
27
Social news and entertainment
 U.S.


Tencent QQ
qq.com
9
36
Portal
 China


Taobao
taobao.com
10
65
Online shopping
 China


Amazon
amazon.com
11
14
E-commerce and cloud computing
 U.S.


Tmall
tmall.com
12
74
O

AttributeError: 'NoneType' object has no attribute 'find_all'

# 1.3.gPRC  & Protocol Bufferes
gPRC a client application can direcly call methods on a server application on a different machine as if it was a local object.
By default gPRC uses protocol buffers (serializing structured data 序列化结构化数据)

### Install gPRC
    $ python3 -m pip install grpcio
    
    $ python3 -m pip install grpcio-tools # install gPRC tools

### Google's gRPC provides a framework for implementing RPC (Remote Procedure Call) workflows. By layerinmg on top HTTP/2 and using protocol buffers,gRPC promises a lot of benefits over conventional REST+JSON APIs

# Coroutine 协程,微线程,是一种用户态的轻量级线程{协程拥有自己的寄存器上下文和栈,无法利用多核心的资源,协程本质上是单线程,协程需要和进程配合才能运行在多CPU.堵塞Blocking}

In [None]:
# yield 实现协程操作
import time 
import queue 

def consumer(name):
    print("--->starting eating food...")
    while True:
        new_food = yield
        print("[%s] is eating food %s" %(name, new_food))

def producer():
    
    r = con.__next__()
    r = con2.__next__()
    n = 0 
    while n < 5:
        n +=1
        con.send(n)
        con2.send(n)
        print("\033[32;1m[producer]\033[0m is making food %s]]")

# 7 tips to Time Python scripts and control Memory & CPU usage

In [46]:
# Tools to detect the bottlenecks of your code
# 1. Use a decorator to time your functions
import time 
from functools import wraps

def fn_timer(function):
    @wraps(function)
    def function_timer(*args, **kwargs):
        t0 = time.time()
        result = function(*args, **kwargs)
        t1 = time.time()
        print("Total time runing [ {:^20s} ] :: [{:^10s} seconds]".format(function.__name__, str(round(t1-t0,4))))
        return result
    return function_timer 
@fn_timer
def test():
    for i in range(100):
        time.sleep(.1)

if __name__ == '__main__':
        
    test()

Total time runing [         test         ] :: [      10.0595        seconds]


# Python 计算时间差



In [50]:
import datetime 
end = datetime.datetime(2018,1,9)
start = datetime.datetime(2005,4,1)
(end-start).days
(datetime.datetime(2018,1,22) - datetime.datetime(1993,2,1)).days

9121

# Pandas 教程

In [32]:
# Dataframe 数据选取和过滤

import pandas as pd 
import numpy as np 
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))

# 选取 一列或多列
print(df['A'])
print(df[['A','B']])

# 选取一行或几行
print(df.iloc[1,:])
print(df.iloc[1:3,:])

# loc 通过行标签选取数据
print(df.loc[:,'A'])

# iloc, 通过行号获取数据
print(df.iloc[1])

# iat, 获取某一个cell值
print(df.iat[1,2])

# [] 过滤, 筛选D列数据中大于0行; []表示一个boolean表达式
df[df.D>0.5]
print(df[df>1])

# apply 方法构造复杂过滤,实现将返回值为boolean的方法作为过滤条件
print(df[df.apply(lambda x: x['B'] > x['C'], axis=1)])

# & 符号可以实现多条件筛选,"|"实现多条件或
df[(df.D>0) & (df.C<0)]
# 只需要A和B列数据,而D和C列数据都是用于筛选
df[['A','B']][(df.D>0)&(df.C<0)]
# 通过布尔索引
index = (df.D>0) & (df.C<0)
df[index]

0   -0.480369
1   -0.220118
2    0.518165
3    0.787031
4    0.825968
5   -0.099401
Name: A, dtype: float64
          A         B
0 -0.480369  0.657884
1 -0.220118  0.914549
2  0.518165 -1.136548
3  0.787031  0.660821
4  0.825968  0.637076
5 -0.099401 -0.761609
A   -0.220118
B    0.914549
C   -0.505112
D    0.729200
Name: 1, dtype: float64
          A         B         C         D
1 -0.220118  0.914549 -0.505112  0.729200
2  0.518165 -1.136548  0.587791 -0.810353
0   -0.480369
1   -0.220118
2    0.518165
3    0.787031
4    0.825968
5   -0.099401
Name: A, dtype: float64
A   -0.220118
B    0.914549
C   -0.505112
D    0.729200
Name: 1, dtype: float64
-0.5051117572442305
    A   B         C   D
0 NaN NaN       NaN NaN
1 NaN NaN       NaN NaN
2 NaN NaN       NaN NaN
3 NaN NaN       NaN NaN
4 NaN NaN       NaN NaN
5 NaN NaN  2.749369 NaN
          A         B         C         D
0 -0.480369  0.657884 -0.745022  0.819480
1 -0.220118  0.914549 -0.505112  0.729200
3  0.787031  0.660821 -0.84283

Unnamed: 0,A,B,C,D
0,-0.480369,0.657884,-0.745022,0.81948
1,-0.220118,0.914549,-0.505112,0.7292


# 贝叶斯

In [None]:
"""
# 联合概率满足交换律
P(A and B) = P(B and A)

# 联合概率以条件概率展开
P(A and B) = P(A) P(B|A)
P(B and A) = P(B) P(A|B)
P(A) P(B|A) = P(B) P(A|B)

# 变化得到
P(B|A) = P(A|B)P(B)/P(A)

P(B) : 先验概率,得到新数据前某一假设的概率
P(B|A): 后验概率,观察到新数据后计算假设的概率
P(A|B):似然度,既在该假设下得到的这一数据的概率
P(A): 标准化常量,在任何假设下得到这一数据的概率

"""