# 再说几句Python

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## 前情提要
上两集中，我们学习了python的基本语法（数据类型、循环、字符串的处理、循环语句、判断语句、列表、怎么建立函数）和numpy的基本功能（array的概念、加减乘除、array的shape、如何得到array的元素、建立mask array等等）。请先回顾之前的内容。

这次补充几个关于python语言的高级用法，为之后讲爬虫和pandas做一点铺垫。

### `map` and `lambda`

仅在定义匿名函数的地方使用这个函数，其他地方用不到，所以就不需要给它取个阿猫阿狗之类的名字了。

In [4]:
[i / 256 for i in [55, 125, 34]]

[0.21484375, 0.48828125, 0.1328125]

In [2]:
def func(x):
    return x + 5

In [3]:
func(4)

9

In [11]:
map(func, [4, 5, 6, 7])

<map at 0x7fe6804a07b8>

In [12]:
list(map(func, [4, 5, 6, 7]))

[9, 10, 11, 12]

In [16]:
list(map(lambda x: x + 5, [4]))

[9]

In [18]:
list(map(lambda x: x + 5, [4, 5, 6, 7]))

[9, 10, 11, 12]

In [10]:
provinceName = ["湖北", "浙江", "江西", "安徽"]

In [11]:
def append_sheng(x):
    return x + '省'

In [12]:
list(map(append_sheng, provinceName))

['湖北省', '浙江省', '江西省', '安徽省']

In [13]:
provinceFullName = list(map(lambda x: x + '省', provinceName))
provinceFullName

['湖北省', '浙江省', '江西省', '安徽省']

In [24]:
provinceName = ["湖北", "浙江", "江西省", "安徽"]

In [25]:
provinceFullName = list(map(lambda x: x + '省', provinceName))
provinceFullName

['湖北省', '浙江省', '江西省省', '安徽省']

In [26]:
provinceFullName = list(map(lambda x: x.rstrip('省') + '省', provinceName))
provinceFullName

['湖北省', '浙江省', '江西省', '安徽省']

### `enumerate` and `zip`

In [34]:
infection = [27100, 1075, 740, 779]

In [None]:
for prov in provinceFullName:
    if prov == '浙江省':
        # 浙江有多少人感染？？？

In [38]:
for i, prov in enumerate(provinceFullName):
    if prov == '浙江省':
        # 浙江有多少人感染？？？
        print('Infections = {}'.format(infection[i]))

Infections = 1075


用两个数组 provinceFullName 和 infection 来表达疫情，没有很好的结构性，不好一一对应。

In [39]:
list(zip(provinceFullName, infection))

[('湖北省', 27100), ('浙江省', 1075), ('江西省', 740), ('安徽省', 779)]

练习1: print "{city} is in {state}" for each combination.

In [40]:
cities = ["Phoenix", "Austin", "San Diego", "New York"]
states = ["Arizona", "Texas", "California", "New York"]

couple = list(zip(cities, states))

In [55]:
for i, city in enumerate(couple):
    print(i, city)

0 ('Phoenix', 'Arizona')
1 ('Austin', 'Texas')
2 ('San Diego', 'California')
3 ('New York', 'New York')


In [47]:
couple

[('Phoenix', 'Arizona'),
 ('Austin', 'Texas'),
 ('San Diego', 'California'),
 ('New York', 'New York')]

In [48]:
['{0} is in {1}'.format(item[0], item[1]) for item in couple]

['Phoenix is in Arizona',
 'Austin is in Texas',
 'San Diego is in California',
 'New York is in New York']

练习2: 湖北每日新增多少确诊病例？

In [49]:
hubei_infection = [270,   444,   444,   729,   761,  1423,  1423,  3554,  4586, 
                   5806,  5806,  9074,  9074, 13522, 16678, 19665, 22112, 24953, 
                   27100, 27100] # from Jan 21

In [50]:
len(hubei_infection)

20

In [57]:
[hubei_infection[i + 1] - num for i, num in enumerate(hubei_infection[:-1])]

[174,
 0,
 285,
 32,
 662,
 0,
 2131,
 1032,
 1220,
 0,
 3268,
 0,
 4448,
 3156,
 2987,
 2447,
 2841,
 2147,
 0]

### 词典 `dict` and `set`

In [59]:
NCP_infection = dict(list(zip(provinceFullName, infection)))
NCP_infection

{'湖北省': 27100, '浙江省': 1075, '江西省': 740, '安徽省': 779}

In [61]:
NCP_infection['湖北省']

27100

In [62]:
NCP_infection['安徽省']

779

In [63]:
NCP_infection['四川省']

KeyError: '四川省'

In [64]:
NCP_infection.keys()

dict_keys(['湖北省', '浙江省', '江西省', '安徽省'])

In [65]:
NCP_infection['北京市'] = 326
NCP_infection

{'湖北省': 27100, '浙江省': 1075, '江西省': 740, '安徽省': 779, '北京市': 326}

In [77]:
list(NCP_infection.values())

[27100, 1075, 740, 779, 326]

In [78]:
list(NCP_infection.keys())

['湖北省', '浙江省', '江西省', '安徽省', '北京市']

In [79]:
province_name = ['湖北省', '浙江省', '江西省', '安徽省', '北京市', '北京市', '安徽省']

In [80]:
set(province_name)

{'北京市', '安徽省', '江西省', '浙江省', '湖北省'}

In [81]:
list(set(province_name))

['江西省', '湖北省', '浙江省', '安徽省', '北京市']

In [85]:
np.unique(np.array(province_name), return_counts=True, return_index=True)

(array(['北京市', '安徽省', '江西省', '浙江省', '湖北省'], dtype='<U3'),
 array([4, 3, 2, 1, 0]),
 array([2, 2, 1, 1, 1]))

### `datetime` 每时每刻

In [86]:
import datetime
now = datetime.datetime.now()
now

datetime.datetime(2020, 2, 21, 18, 20, 34, 154692)

In [87]:
now = now.replace(second=0, microsecond=0)
now

datetime.datetime(2020, 2, 21, 18, 20)

In [88]:
pd.to_datetime(now)

Timestamp('2020-02-21 18:20:00')

怎么把 '2019年10月11日 23:52' 转化机器能看懂的东西？

In [89]:
crush_time_str = '2019年10月11日 23:52'
crush_time = datetime.datetime.strptime(crush_time_str, '%Y年%m月%d日 %H:%M')
crush_time

datetime.datetime(2019, 10, 11, 23, 52)

In [90]:
pd.to_datetime(crush_time)

Timestamp('2019-10-11 23:52:00')

In [91]:
pd.to_datetime(crush_time + datetime.timedelta(days=1))

Timestamp('2019-10-12 23:52:00')

In [92]:
pd.to_datetime(crush_time + datetime.timedelta(days=365))

Timestamp('2020-10-10 23:52:00')

### 给函数写好注释，做一个负责任的女人
行号

In [93]:
def parse_time(time_str, fmt='%Y年%m月%d日 %H:%M', verbose=True): # why not using `format`?
    '''
    Parse the time given as a string.
    
    Parameters:
        time_str (str): the input string, following the given format.
        fmt (str): the format of input string. Default is '%Y年%m月%d日 %H:%M'.
        verbose (bool): whether print out the parsed time.
        
    Return:
        time (pd.Timestamp): parsed time.
    '''
    import pandas as pd
    import datetime
    
    time = pd.to_datetime(datetime.datetime.strptime(time_str, fmt))
    
    if verbose:
        print('The time is {}'.format(time))
        
    return time

In [97]:
crush_time = parse_time(crush_time_str, verbose=True)

The time is 2019-10-11 23:52:00


In [98]:
crush_time

Timestamp('2019-10-11 23:52:00')

### * 和 **

In [13]:
def parse_time(time_str, fmt='%Y年%m月%d日 %H:%M', verbose=True, **kwds): # why not using `format`?
    '''
    Parse the time given as a string.
    
    Parameters:
        time_str (str): the input string, following the given format.
        fmt (str): the format of input string. Default is '%Y年%m月%d日 %H:%M'.
        verbose (bool): whether print out the parsed time.
        
    Return:
        time (pd.Timestamp): parsed time.
    '''
    import pandas as pd
    import datetime
    
    time = pd.to_datetime(datetime.datetime.strptime(time_str, fmt))
    
    if 'is_why' in kwds.keys():
        print('WHY is comming!!!!!!!')
    
    if verbose:
        if 'time_name' in kwds.keys():
            print('{0} is {1}'.format(kwds['time_name'], time))
        else:
            print('Time is {0}'.format(time))
    
    return time

In [15]:
crush_time_str = '2019年10月11日 23:52'
crush_time = parse_time(crush_time_str, verbose=True, time_name='why')


why is 2019-10-11 23:52:00


In [22]:
def parse_time(time_str, fmt='%Y年%m月%d日 %H:%M', verbose=True, *args): # why not using `format`?
    '''
    Parse the time given as a string.
    
    Parameters:
        time_str (str): the input string, following the given format.
        fmt (str): the format of input string. Default is '%Y年%m月%d日 %H:%M'.
        verbose (bool): whether print out the parsed time.
        
    Return:
        time (pd.Timestamp): parsed time.
    '''
    import pandas as pd
    import datetime
    
    time = pd.to_datetime(datetime.datetime.strptime(time_str, fmt))
    
    if verbose:
        print('{0} is {1}'.format(args, time))
    
    return time

In [23]:
crush_time_str = '2019年10月11日 23:52'
crush_time = parse_time(crush_time_str, '%Y年%m月%d日 %H:%M', True, 'Hahaha')

('Hahaha',) is 2019-10-11 23:52:00


In [26]:
def multi_sum(*args):
    print(args)
    s = 0
    for item in args:
        s += item
    return s

In [28]:
multi_sum(1, 2, 3, 4, 5)

(1, 2, 3, 4, 5)


15

### assert

In [42]:
def parse_time(time_str, fmt='%Y年%m月%d日 %H:%M', verbose=True): # why not using `format`?
    '''
    Parse the time given as a string.
    
    Parameters:
        time_str (str): the input string, following the given format.
        fmt (str): the format of input string. Default is '%Y年%m月%d日 %H:%M'.
        verbose (bool): whether print out the parsed time.
        
    Return:
        time (pd.Timestamp): parsed time.
    '''
    import pandas as pd
    import datetime
    assert(isinstance(time_str, str)), '你的数据类型错了！🐧'
    time = pd.to_datetime(datetime.datetime.strptime(time_str, fmt))
    
    if verbose:
        print('The time is {}'.format(time))
        
    return time

In [43]:
crush_time_str = 2019
crush_time = parse_time(crush_time_str)

AssertionError: 你的数据类型错了！🐧

### 面向对象编程
对象：http://astrojacobli.github.io
🐘🐘

以前写程序的办法都是流程式编程，现在要搞对象（object）。

对象：attributes + methods

class -> object

In [89]:
class People:
    def __init__(self, name, gender, alias, ):
        self.name = name
        self.gender = gender
        self.alias = alias
        self.hunger = 0
        self.thirsty = 0
        self.health = 100
        self.love = 10
        
    def eat(self, amount=1):
        if self.hunger - amount >= 0:
            self.hunger -= amount
    
    def drink(self, amount=1):
        if self.thirsty - amount >= 0:
            self.thirsty -= amount
    
    def exercise(self, time=1):
        if time > 5:
            self.health -= 5 * time
        else:
            self.health += 10 * time
        
    def meowmeow(self):
        self.love += 1
        
    def video_call(self):
        self.love += 10
    
    def quarrel(self, others):
        assert(isinstance(others, People)), '你不是人！！！'
        self.love -= 10
        self.health -= 20
        self.thirsty += 10
        
        others.love -= 5
        others.health -= 10
        others.thirsty += 5
    
    def check_healthy(self):
        if self.thirsty > 30 or self.hunger > 30:
            self.health -= 30
        if self.health < 50:
            return 'unhealthy'
        else:
            return 'healthy'
    
    def write_diary(self, stuff=None):
        ## Basic status
        healthy = self.check_healthy()
        diary = "I'm {}".format(healthy) + ' today'
        diary = "\n".join([diary, stuff])
        self.diary = diary
        
    def make_love(self, others):
        assert(isinstance(others, People)), '你不是人！！！'
        self.love += 100
        others.love += 100

In [91]:
babyxuan = People(name='LJX', gender='Male', alias='XX')
babyhuan = People(name='WHY', gender='Female', alias='Huanhuan')

In [104]:
params = 'WHY', 'Female', 'Huanhuan'
babyhuan = People(*params)

In [106]:
babyhuan.__dict__

{'name': 'WHY',
 'gender': 'Female',
 'alias': 'Huanhuan',
 'hunger': 0,
 'thirsty': 0,
 'health': 100,
 'love': 10}

In [92]:
babyhuan.make_love(babyxuan)

In [94]:
babyxuan.__dict__

{'name': 'LJX',
 'gender': 'Male',
 'alias': 'XX',
 'hunger': 0,
 'thirsty': 10,
 'health': 80,
 'love': 100}

In [100]:
babyhuan.__dict__

{'name': 'WHY',
 'gender': 'Female',
 'alias': 'Huanhuan',
 'hunger': 0,
 'thirsty': 5,
 'health': 90,
 'love': 105}

In [98]:
babyhuan.eat(10)
babyhuan.drink(30)
babyhuan.check_healthy()

'healthy'

In [55]:
babyhuan.write_diary("I don't read some papers today. ")

In [56]:
print(babyhuan.diary)

I'm healthy today
I don't read some papers today. 


In [57]:
babyhuan.video_call()

In [58]:
babyhuan.love

20

In [68]:
babyhuan.__dict__

{'name': 'WHY',
 'gender': 'Female',
 'alias': 'Huanhuan',
 'hunger': 0,
 'thirsty': 55,
 'health': -40,
 'love': -35,
 'diary': "I'm unhealthy today\nF**K! I quarreled a lot today! "}

In [103]:
"I'm unhealthy today".split(' ')

["I'm", 'unhealthy', 'today']

其实python里一切东西都是对象。比如字符串有method叫做split.