|
| 1 | +#coding=utf-8 |
| 2 | +""" |
| 3 | +@author:JianxiongRao |
| 4 | +@data:2018/3/12 |
| 5 | +@version:Python3.6 |
| 6 | +""" |
| 7 | +from requests_html import HTMLSession |
| 8 | +import os |
| 9 | +import time |
| 10 | + |
| 11 | +class MM(object): |
| 12 | + def __init__(self): |
| 13 | + self.__page = 1 |
| 14 | + self.__url = "http://www.mm131.com/qingchun/list_1_{}.html" |
| 15 | + self.__session = HTMLSession() |
| 16 | + self.__headers = { |
| 17 | + 'Referer':'http://www.mm131.com/qingchun/', |
| 18 | + 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36' |
| 19 | + } |
| 20 | + self.__imagePath = r'D:/Photo/MM' |
| 21 | + self.__confirmPath() |
| 22 | + |
| 23 | + def __confirmPath(self): |
| 24 | + if not os.path.exists(self.__imagePath): |
| 25 | + os.makedirs(self.__imagePath) |
| 26 | + |
| 27 | + def download(self,link,fileName): |
| 28 | + try: |
| 29 | + with open(self.__imagePath+'/'+fileName+'.jpg','wb') as f: |
| 30 | + f.write(self.__session.request('get',link,headers = self.__headers,allow_redirects=False).content) |
| 31 | + except Exception as e: |
| 32 | + print(str(e)) |
| 33 | + |
| 34 | + def parseData(self): |
| 35 | + start = time.time() |
| 36 | + while self.__page < 12: |
| 37 | + if self.__page == 1: |
| 38 | + self.__url = "http://www.mm131.com/qingchun/" |
| 39 | + else: |
| 40 | + self.__url = 'http://www.mm131.com/qingchun/list_1_{}.html'.format(self.__page) |
| 41 | + r = self.__session.get(self.__url) |
| 42 | + main = r.html.find(".main",first=True) |
| 43 | + dl = main.find('dl')[0] |
| 44 | + dds = dl.find('dd') |
| 45 | + for dd in dds[:-1]: |
| 46 | + attr = dd.find('img')[0].attrs |
| 47 | + imageLink = attr['src'] |
| 48 | + title = attr['alt'] |
| 49 | + self.download(imageLink,title) |
| 50 | + self.__page += 1 |
| 51 | + end = time.time() - start |
| 52 | + print("爬取时间:",end) |
| 53 | + |
| 54 | +if __name__=="__main__": |
| 55 | + mm = MM() |
| 56 | + mm.parseData() |
| 57 | + |
| 58 | + |
0 commit comments