# dict操作

### dict特性

1. 字典的key必须可哈希: list/set/dict 均不可被哈希，int、float、str、tuple：是可以哈希的

注意，浮点数比较很不精确，因此千万不要用浮点数作为key！

### 常见操作

1.	clear(self)	 删除字典内所有的元素
2.	copy(self)	返回一个字典的浅copy ，俗称赋值
3.	fromkeys(*args,**kwargs)	创建一个新字典，以序列seq中元素做字典的键，val为字典所有键对应的初始值
4.	get(self,k,d=None)	 返回指定键的值，如果值不在字典中返回default值
5.	items(self)	以列表返回可遍历的(键, 值) 元组数组
6.	keys(self)	 以列表返回一个字典所有的键
7.	pop(self,k,d=None)	 删除字典给定键 key 所对应的值，返回值为被删除的值。key值必须给出。 否则，返回default值
8.	popitem(self)	 随机返回并删除字典中的一对键和值
9.	setdefault(self,k,d=None)	 和get()类似, 但如果键不存在于字典中，将会添加键并将值设为default
10.	update(self,E=None,**F)	 把self的东西更新到外面字典
11. values(self)	 以列表返回字典中的所有值

In [1]:
d1 = dict({'a': 1, 'b': 2, 'c': 3})
print('d1 id=', id(d1))
d2 = d1.copy()
print('d1 id=', id(d1))

d1.setdefault('d')
print(d1)

print(d1.keys(), type(d1.keys()))
print(d1.values(), type(d1.values()))

d1 id= 2650221138064
d1 id= 2650221138064
{'a': 1, 'b': 2, 'c': 3, 'd': None}
dict_keys(['a', 'b', 'c', 'd']) <class 'dict_keys'>
dict_values([1, 2, 3, None]) <class 'dict_values'>


### 创建dict

In [12]:
d1 = {'a': 1, 'b': 2, 'c': 3}
print(d1)

d2 = dict({'a': 1, 'b': 2, 'c': 3})
print(d2)

d3 = dict([('a', 1), ('b', 2), ('c', 3)])
print(d3)

d4 = dict(a=1, b=2, c=3)
print(d4)

d5 = dict(zip(['a', 'b', 'c'], [1, 2, 3]))  # 这个方法也可以用作将两个列表合并成一个字典
print(d5)

### 创建一个（不）含默认值的字典

In [13]:
kk = ['a', 'b', 'c']
print(dict.fromkeys(kk, 123))
print(dict.fromkeys(kk))

{'a': 123, 'b': 123, 'c': 123}
{'a': None, 'b': None, 'c': None}


### 合并两个字典

In [1]:
# 注意: 此时的两个字典的键必须是字符串,否则报错
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = {'d': 4, 'e': 5, 'f': 6}
dict(dict1, **dict2)

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}

### 应用

In [4]:
KEYS = ['time', 'PID', 'account', 'CPU', 'memory', 'kB_rd/s', 'kB_wr/s', 'CLI', 'md5']
VALUES = [None]*len(KEYS)
data = dict(zip(KEYS, VALUES))

print(data)

data = dict.fromkeys(KEYS, None)

print(data)

{'time': None, 'PID': None, 'account': None, 'CPU': None, 'memory': None, 'kB_rd/s': None, 'kB_wr/s': None, 'CLI': None, 'md5': None}
{'time': None, 'PID': None, 'account': None, 'CPU': None, 'memory': None, 'kB_rd/s': None, 'kB_wr/s': None, 'CLI': None, 'md5': None}


In [11]:
from itertools import zip_longest

KEYS = ['time', 'PID', 'account', 'CPU', 'memory', 'kB_rd/s', 'kB_wr/s', 'CLI', 'md5']
line = b'11:43:15,1,root,0.01,0.05,20.64,4.97,/sbin/init ,249b19aaa268143c3a0b3d6aa9faa070,abc,edc'
details = line.split(b',')
data = {}
for k, v in zip_longest(KEYS, details, fillvalue=0):
    data[k] = v.decode()
    
print(data)

{'time': '11:43:15', 'PID': '1', 'account': 'root', 'CPU': '0.01', 'memory': '0.05', 'kB_rd/s': '20.64', 'kB_wr/s': '4.97', 'CLI': '/sbin/init ', 'md5': '249b19aaa268143c3a0b3d6aa9faa070', 0: 'edc'}


In [23]:
import time
from datetime import datetime, date
from itertools import zip_longest

line = b'11:43:15,1,root,0.01,0.05,20.64,4.97,/sbin/init ,249b19aaa268143c3a0b3d6aa9faa070,abc,edc'

def _handle_process_detail(line):
    data = {}
    KEYS = ['time', 'PID', 'account', 'CPU', 'memory', 'kB_rd/s', 'kB_wr/s', 'CLI', 'md5']
    details = line.split(b',')
    for k, v in zip_longest(KEYS, details, fillvalue='0'):
        try:
            data[k] = v.decode().strip()
        except Exception as e:
            data[k] = v
    print(data)
    result = formatter_data(data)
    print(result)
    return result


def datetime_2_timestamp(str_date_time, date_type='datetime'):
    """字符串datetime/date转时间戳,精确到秒"""
    if date_type == 'datetime':
        date_time = datetime.strptime(str_date_time, "%Y-%m-%d %H:%M:%S")
    else:
        date_time = datetime.strptime(str_date_time, "%Y-%m-%d")
    timestamp = str(int(time.mktime(date_time.timetuple())))
    return timestamp


def formatter_data(data):
    date_time = '{} {}'.format(date.today(), data['time'])
    data['time'] = datetime_2_timestamp(date_time)
    if '0' in data:
        data.pop('0')
    for k, v in data.items():
        try:
            if isinstance(eval(v), int):
                data[k] = int(v)
            elif isinstance(eval(v), float):
                data[k] = float(v)
        except Exception as e:
            data[k] = v
    return data

_handle_process_detail(line)

{'time': '11:43:15', 'PID': '1', 'account': 'root', 'CPU': '0.01', 'memory': '0.05', 'kB_rd/s': '20.64', 'kB_wr/s': '4.97', 'CLI': '/sbin/init', 'md5': '249b19aaa268143c3a0b3d6aa9faa070', '0': 'edc'}
{'time': 1533872595, 'PID': 1, 'account': 'root', 'CPU': 0.01, 'memory': 0.05, 'kB_rd/s': 20.64, 'kB_wr/s': 4.97, 'CLI': '/sbin/init', 'md5': '249b19aaa268143c3a0b3d6aa9faa070'}


{'time': 1533872595,
 'PID': 1,
 'account': 'root',
 'CPU': 0.01,
 'memory': 0.05,
 'kB_rd/s': 20.64,
 'kB_wr/s': 4.97,
 'CLI': '/sbin/init',
 'md5': '249b19aaa268143c3a0b3d6aa9faa070'}

database: asset (资产)

collections:    
* process: 进程信息
* port: 开放端口
* account: 账户信息
* schedule: 定时任务
* boot: 开机启动


### 字典排序

In [3]:
# 调用sorted()排序
dict1 = {"a": "apple", "b": "grape", "c": "orange", "d": "banana"}

# 按照key排序
print(sorted(dict1.items(), key=lambda d: d[0]))

# 按照value排序
print(sorted(dict1.items(), key=lambda d: d[1]))

[('a', 'apple'), ('b', 'grape'), ('c', 'orange'), ('d', 'banana')]
[('a', 'apple'), ('d', 'banana'), ('b', 'grape'), ('c', 'orange')]


### 修改字典的key,不修改value

In [5]:
d = {'a.a': 1, 'b':2, 'c':3, 'd.d': 555}
for k, v in d.items():
    if '.' in k:
        nk = k.replace('.', '_')
        d[nk] = d.pop(k)

print(d)

# 更好的方法:
d = {'a.a': 1, 'b':2, 'c':3, 'd.d': 555}
for k, v in d.items():
    if '.' in k:
        nk = k.replace('.', '_')
        d.update({nk: d.pop(k)})

print(d)        

{'b': 2, 'c': 3, 'a_a': 1, 'd_d': 555}
{'b': 2, 'c': 3, 'a_a': 1, 'd_d': 555}


### 判断两个字典内容是否相等

In [6]:
KEYS = ['A', 'B', 'C']
v1 = [1, 2, 3]
v2 = [1, 2, 3]

d1 = dict(zip(KEYS, v1))
d2 = dict(zip(KEYS, v2))

print(d1 == d2)


KEYS = ['A', 'B', 'C']
v3 = [1, 2, {'a': 10, 'b': 20}]
v4 = [1, 2, {'a': 10, 'b': 20}]

d3 = dict(zip(KEYS, v3))
d4 = dict(zip(KEYS, v4))

print(d3 == d4)

True
True


### 截取部分字典

In [9]:
base = {'A':1, 'B':2, 'C':3, 'D':4, 'E':5}
subkey = ['C', 'E']

{k:v for k,v in base.items() if k in subkey}

{key:base[key] for key in subkey}

dict([(key, base[key]) for key in subkey])

{'C': 3, 'E': 5}