# 一些工具函数

### 使用 Unix Shell 中常用的通配符 (比如 *.py , Dat[0-9]*.csv 等) 去匹配文本字符串

In [2]:
from fnmatch import fnmatch
    
allowed_domains = ['*.baidu.com']

def match_domains(domain):
    global allowed_domains
    for item in allowed_domains:
        if domain == item:
            return True
        else:
            if fnmatch(domain, item):
                return True
    return False

mydomain = 'a.f.baidu.com'

res = match_domains(mydomain)

print(res)

True


### 函数执行耗时装饰器

In [14]:
import math
import time
import logging
from functools import wraps

def time_counter(function):
    @wraps(function)
    def function_timer(*args, **kwargs):
        print('[Function: {} start...]'.format(function.__name__))
        t0 = time.time()
        result = function(*args, **kwargs)
        t1 = time.time()
        print('[Function: {} finished, spent time: {:.5f}s]'.format(function.__name__, t1 - t0))
        return result
    return function_timer

@time_counter
def my_func1():
    for i in range(10000):
        math.sqrt(144)

@time_counter
def my_func2():
    for i in range(10000):
        144**.5

@time_counter
def my_func3():
    for i in range(10000):
        pow(144,.5)
        
@time_counter
def my_func4():
    for i in range(10000):
        math.pow(144,.5)

my_func1()
my_func2()
my_func3()
my_func4()

[Function: my_func1 start...]
[Function: my_func1 finished, spent time: 0.00203s]
[Function: my_func2 start...]
[Function: my_func2 finished, spent time: 0.00000s]
[Function: my_func3 start...]
[Function: my_func3 finished, spent time: 0.00301s]
[Function: my_func4 start...]
[Function: my_func4 finished, spent time: 0.00197s]


### 密码md5加密和SHA1加密

md5生成一个128bit的结果，通常用32位的16进制字符串表示 

In [1]:
import hashlib

def crypto_password(pwd):
    md_5 = hashlib.md5()
    md_5.update(pwd.encode('utf8'))
    return md_5.hexdigest()

print(crypto_password('abc@123'))
print(len(str(crypto_password('abc@123'))))

b24331b1a138cde62aa1f679164fc62f
32


sha1生成一个160bit的结果，通常用40位的16进制字符串表示

In [15]:
import hashlib

def crypto_password(pwd):
    md_5 = hashlib.md5()
    md_5.update(pwd.encode('utf8'))
    md5_pwd = md_5.hexdigest()
    sha1 = hashlib.sha1()
    sha1.update(md5_pwd.encode('utf8'))
    sha1_pwd = sha1.hexdigest()
    return sha1_pwd

print(crypto_password('abc@123'))
print(len(str(crypto_password('abc@123'))))
print(crypto_password('Hector@123456'))
print(crypto_password('Water@123'))

7ba80c3cb4f187c314c43cbf778e009b940ef3ef
40
df2b6a57a71dfdba480bcab9f5f22585f87599ea
58f6e211d77eaac09de13fb839b136363b0e7d42


In [17]:
import requests
from pprint import pprint

url = 'http://2code.top/xss'

req = requests.get(url)

print(req.status_code)
pprint(req.headers)
print('=============')
pprint(req.history)
print('=============')
r1 = req.history[0]
pprint(r1.status_code)
pprint(r1.url)
pprint(r1.text)
print('=============')
r2 = req.history[1]
pprint(r2.status_code)
pprint(r2.url)
pprint(r2.text)

200
{'Date': 'Wed, 11 Apr 2018 07:01:53 GMT', 'Server': 'Apache', 'Vary': 'User-Agent,Accept-Encoding', 'Content-Encoding': 'gzip', 'Content-Length': '1239', 'Keep-Alive': 'timeout=15, max=298', 'Connection': 'Keep-Alive', 'Content-Type': 'text/html'}
[<Response [301]>, <Response [302]>]
301
'http://2code.top/xss'
('<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">\n'
 '<html><head>\n'
 '<title>301 Moved Permanently</title>\n'
 '</head><body>\n'
 '<h1>Moved Permanently</h1>\n'
 '<p>The document has moved <a href="http://2code.top/xss/">here</a>.</p>\n'
 '</body></html>\n')
302
'http://2code.top/xss/'
''


In [19]:
import requests
from pprint import pprint

url = 'http://2code.top/1.php'

req = requests.get(url)

print(req.status_code)
print('=============')
pprint(req.history)
print('=============')
r1 = req.history[0]
pprint(r1.status_code)
pprint(r1.url)
pprint(r1.text)
print('=============')
r2 = req.history[1]
pprint(r2.status_code)
pprint(r2.url)
pprint(r2.text)
print('=============')
r3 = req.history[2]
pprint(r3.status_code)
pprint(r3.url)
pprint(r3.text)
# print('=============')
# r4 = req.history[]
# pprint(r2.status_code)
# pprint(r2.url)
# pprint(r2.text)


200
[<Response [302]>, <Response [302]>, <Response [302]>]
302
'http://2code.top/1.php'
'This is 1.php'
302
'http://2code.top/2.php'
'This is 2.php'
302
'http://2code.top/xss/'
''


### 获取当前函数名称

#### 从外部获取函数名称

In [6]:
def aa():
    pass

print(aa.__name__)
print(getattr(aa, '__name__'))

aa
aa


#### 从函数内部获取当前函数名称

1. 使用sys模块的方法,f_code和co_name可以参考python源码解析的pyc生成和命名空间章节。

In [9]:
import sys

def aa():
    print(sys._getframe().f_code.co_name)
    
aa()

aa


In [19]:
from urllib.parse import urlparse, urljoin

url = "https://www.baidu.com/api/method?item=aa&item2=bb"

res = urlparse(url)

if ":" not in res.netloc:
    netloc = res.netloc+":80"

print(res)
new_url = res.scheme+ "://" + netloc + res.path + res.params + res.query + res.fragment

print(new_url)


ParseResult(scheme='https', netloc='www.baidu.com', path='/api/method', params='', query='item=aa&item2=bb', fragment='')
https://www.baidu.com:80/api/methoditem=aa&item2=bb


### 找出列表中的重复元素并计数

In [1]:
import collections

a = [1, 1, 2, 5, 8, 9, 5, 6, 8, 1]

for item, count in collections.Counter(a).items():
    if count > 1:
        print('item: {}, count: {}'.format(item, count))

item: 1, count: 3
item: 5, count: 2
item: 8, count: 2


### 对一个文件夹的文件按照修改日期排序

In [None]:
# 对一个文件夹的文件按照修改日期排序
import os

def sort_files(file_path):
    file_list = os.listdir(file_path)
    # 将文件按照修改时间排序, 最新修改的排在最前面
    file_list.sort(key=lambda fn: os.path.getmtime(os.path.join(barcode_path, fn))
        if not os.path.isdir(os.path.join(file_path, fn)) else 0)
    return file_list

In [1]:
import hashlib

def crypto_password(pwd):
    md_5 = hashlib.md5()
    md_5.update(pwd.encode('utf8'))
    md5_pwd = md_5.hexdigest()
    sha1 = hashlib.sha1()
    sha1.update(md5_pwd.encode('utf8'))
    sha1_pwd = sha1.hexdigest()
    return sha1_pwd


print(crypto_password('123456'))

10470c3b4b1fed12c3baac014be15fac67c6e815


In [3]:
from operator import itemgetter

ip_list = [{'endtime': None,
  'freq': 15,
  'ip': '192.168.1.76',
  'runtime': 10,
  'starttime': None,
  'tid': 6},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.75',
  'runtime': -1,
  'starttime': None,
  'tid': 7},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.77',
  'runtime': 20,
  'starttime': None,
  'tid': 8},
 {'endtime': None,
  'freq': 30,
  'ip': '192.168.1.78',
  'runtime': -1,
  'starttime': None,
  'tid': 9},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.79',
  'runtime': -1,
  'starttime': None,
  'tid': 10},
 {'endtime': None,
  'freq': 120,
  'ip': '192.168.1.80',
  'runtime': -2,
  'starttime': None,
  'tid': 11},
 {'endtime': None,
  'freq': 180,
  'ip': '192.168.1.80',
  'runtime': -1,
  'starttime': None,
  'tid': 12},
 {'endtime': None,
  'freq': 240,
  'ip': '192.169.1.100',
  'runtime': -1,
  'starttime': None,
  'tid': 13},
 {'endtime': None,
  'freq': 1,
  'ip': '192.168.1.101',
  'runtime': -1,
  'starttime': None,
  'tid': 14}]

ip_list.sort(key=itemgetter('runtime', 'freq'))


from pprint import pprint
pprint(ip_list)

[{'endtime': None,
  'freq': 120,
  'ip': '192.168.1.80',
  'runtime': -2,
  'starttime': None,
  'tid': 11},
 {'endtime': None,
  'freq': 1,
  'ip': '192.168.1.101',
  'runtime': -1,
  'starttime': None,
  'tid': 14},
 {'endtime': None,
  'freq': 30,
  'ip': '192.168.1.78',
  'runtime': -1,
  'starttime': None,
  'tid': 9},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.75',
  'runtime': -1,
  'starttime': None,
  'tid': 7},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.79',
  'runtime': -1,
  'starttime': None,
  'tid': 10},
 {'endtime': None,
  'freq': 180,
  'ip': '192.168.1.80',
  'runtime': -1,
  'starttime': None,
  'tid': 12},
 {'endtime': None,
  'freq': 240,
  'ip': '192.169.1.100',
  'runtime': -1,
  'starttime': None,
  'tid': 13},
 {'endtime': None,
  'freq': 15,
  'ip': '192.168.1.76',
  'runtime': 10,
  'starttime': None,
  'tid': 6},
 {'endtime': None,
  'freq': 60,
  'ip': '192.168.1.77',
  'runtime': 20,
  'starttime': None,
  'tid': 8}]


In [2]:
import  itertools

def gen_token():
    token = itertools.product("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",repeat=16)
    yield token

    token = gen_token()

for i in range(10):
    
    print(next(token))

<itertools.product object at 0x000001472B59D750>


StopIteration: 

In [16]:
import time
import random
import string
import hashlib

def gen_unique_strings(length=32):
    chars = string.ascii_letters + string.digits
    random_chars = ''.join([random.choice(chars) for i in range(length)])
#     print(time.time())
    unique_chars = '{}-{}'.format(int(time.time()*1000),random_chars)
#     print('random_chars=', random_chars)
    print(unique_chars)
    return unique_chars

def gen_token(chars):
    md_5 = hashlib.md5()
    md_5.update(chars.encode('utf8'))
    token = md_5.hexdigest()
    return token
    

for i in range(10):
    chars = gen_unique_strings()
    token = gen_token(chars)
    print(token)
    print(len(token))

1534232350833-W66LJHqRLUPCIrAq9c0j4SuQHJSjUTwt
636273b1264541027ddbd6ed0567edc2
32
1534232350833-R5NMBs3qlZvid07Z2e3N65eiMGlt6Btk
6464bedc3960a6df59ae554e55ffd66b
32
1534232350833-oeyaHWFUFQKQbiCawQxT2vbzM7chzhy9
c333b67ccd82ed47ef6f5d3b3d76b6b8
32
1534232350834-NZrvrDK6OlJLdhZlF7DUhODTbBwJTVKN
d8dd55ce7fc68637c346efe862f53b6a
32
1534232350834-nmriDO6GyOFGMYnIx5cLmIstEtVFUQeR
9fe5ed8dd4b1004749ac7496e9a26930
32
1534232350834-bJ88WXlesKh0Dp9KiuLb6kNZsXF03atR
650ce247492a008403be5f451dac3ae7
32
1534232350834-l2Pw4YNGJ7MkYeO8f2oyRIROmQkdI8ij
535df077af4800e341e7061e7eef42d9
32
1534232350834-lmlrK7VZCX6dfO0464UDMuO0lkF5DTLR
4e86be76097d72790e72870de2233025
32
1534232350834-NM5w9qWTcEhVtciVyX7sNL6SddgYDtmE
a288f0c3153c75472e3e6ebdf4240290
32
1534232350834-52JFT832wOA6oQsZN3RZaK2z98A2A4FS
b348896958ee0e9730db80982cdafd69
32


In [6]:
import string
dir(string)

['Formatter',
 'Template',
 '_ChainMap',
 '_TemplateMetaclass',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_re',
 '_string',
 'ascii_letters',
 'ascii_lowercase',
 'ascii_uppercase',
 'capwords',
 'digits',
 'hexdigits',
 'octdigits',
 'printable',
 'punctuation',
 'whitespace']

In [8]:
import string

print(string.ascii_letters)
print(string.ascii_lowercase)
print(string.ascii_uppercase)
print(string.capwords)
print(string.digits)
print(string.hexdigits)
print(string.printable)
print(string.punctuation)
print(string.whitespace)

abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
<function capwords at 0x000001472927E7B8>
0123456789
0123456789abcdefABCDEF
0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
 	

