In [33]:
import requests
import threading
import random
import logging
import json
import time
from queue import Queue
from bs4 import BeautifulSoup

In [60]:
def run_time(func):
    def wrapper(*args, **kw):
        start = time.time()
        func(*args, **kw)
        end = time.time()
        print('running', end-start, 's')
    return wrapper

In [2]:
class UserAgent(object):
    __instance_lock = threading.Lock()
    __init_flag = False
    
    def __init__(self):
        if self.__init_flag is False:
            print('init UserAgent')
            self.__agents_pool = list()
            with open('./UserAgents/useragents.txt','r') as read_ob:
                for line in read_ob.readlines():
                    self.__agents_pool.append(line.strip())
            self.__init_flag = True
    
    def __new__(cls, *args, **kwargs):
        if not hasattr(UserAgent, "_instance"):
            print('new UserAgent')
            with UserAgent.__instance_lock:
                if not hasattr(UserAgent, "_instance"): 
                    UserAgent._instance = object.__new__(cls)
        return UserAgent._instance
    
    def get_useragent_randomly(self):
        return random.choice(self.__agents_pool)
    

In [57]:
class Crawl():

    def __init__(self):
        self._session = None
        self._headers = {
            'User-Agent':UserAgent().get_useragent_randomly(),
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language':'zh-cn',
        }
        self._proxies = [
            {"http":"112.111.217.114:9999"},
            {"http":"180.118.128.118:9000"},
            {"http":"171.11.29.217:9999"},
            {"http":"120.83.109.191:9999"}
        ]
        
    def request_get(self, url, **kwargs):
        logging.info('scraping {}...'.format(url))
        try:
            response = requests.get(url, headers=self._headers, proxies=random.choice(self._proxies), **kwargs)
            if response.status_code == 200:
                return response
            logging.error('get invalid status code %s while scraping %s', response.status_code, url)
        except requests.RequestException:
            logging.error('error occurred while scraping %s', url, exc_info=True)
        else:
            logging.info('scraping {} finished'.format(url))
     
    def request_post(self, url, **kwargs):
        logging.info('scraping {}...'.format(url))
        try:
            response = requests.post(url, headers=self._headers, proxies=random.choice(self._proxies), **kwargs)
            if response.status_code == 200:
                return response
            logging.error('get invalid status code %s while scraping %s', response.status_code, url)
        except requests.RequestException:
            logging.error('error occurred while scraping %s', url, exc_info=True)
        else:
            logging.info('scraping {} finished'.format(url))
    
    def session_get(self, url, **kwargs):
        self.check_session()
        logging.info('scraping {}...'.format(url))
        try:
            response = self._session.get(url, headers=self._headers, **kwargs)
            if response.status_code == 200:
                return response
            logging.error('get invalid status code %s while scraping %s', response.status_code, url)
        except requests.RequestException:
            logging.error('error occurred while scraping %s', url, exc_info=True)
        else:
            logging.info('scraping {} finished'.format(url))
    
    def session_post(self, url, **kwargs):
        self.check_session()
        logging.info('scraping {}...'.format(url))
        try:
            response = self._session.post(url, headers=self._headers, **kwargs)
            if response.status_code == 200:
                return response
            logging.error('get invalid status code %s while scraping %s', response.status_code, url)
        except requests.RequestException:
            logging.error('error occurred while scraping %s', url, exc_info=True)
        else:
            logging.info('scraping {} finished'.format(url)) 

    def get_session(self):
        return self._session
    
    def set_session(self, session):
        self._session = session
        return True
    
    def save_cookies(self, user):
        cookieJar = requests.cookies.RequestsCookieJar()
        for cookie in user.cookies:
            cookieJar.set(cookie.name,cookie.value)
        for cookie in user.headers['Set-Cookie'].split(";"):
            key = cookie.split('=')[0]
            value = cookie.split('=')[1]
            cookieJar.set(key,value)
        return cookieJar
    
    def check_session(self):
        if self._session == None:
            self._session = requests.session()
        return
    
    def add_header(self, headers):
        for key, value in headers.items():
            self._headers[key] = value


In [89]:
class DoubanUser(Crawl):
    __instance_lock = threading.Lock()
    __init_flag = False
       
    def __init__(self):
        if self.__init_flag is False:
            super(DoubanUser, self).__init__()
            self.__user = None
    #         self._cookies = None
            self.__login()
            self.__init_flag = True

    def __new__(cls, *args, **kwargs):
        if not hasattr(DoubanUser, "_instance"):
            print('new Douban')
            with DoubanUser.__instance_lock:
                if not hasattr(Douban, "_instance"): 
                    DoubanUser._instance = object.__new__(cls)
        return DoubanUser._instance
    
    def __login(self):
        post_data = {
            'name':'18664678368',
            'password':'Ljc970412',
            'remember':'false'
        }
        self.add_header({
            "Referer":'https://accounts.douban.com/passport/login'
        })
        user = self.session_post('https://accounts.douban.com/j/mobile/login/basic', data=post_data)
        login_detail = json.loads(user.text)
        if login_detail['status'] == 'success':
            print('login success!')
#             self.__cookies = self.save_cookies(user)
        else:
            print('login failed!')
        self.__user =  user.text
    
    def get_user_info(self):
        return self.__user

In [40]:
class Douban(Crawl):
    
    def __init__(self, douban_user):
        super(Douban, self).__init__()
        self.set_session(douban_user.get_session())
    
    def search(self, query, cat=''):
        res = dict()
        params = {
            'q':query,
            'cat':cat
        }
        count=0
        response = self.session_get('https://www.douban.com/search', params=params)
        soup = BeautifulSoup(response.content, 'html.parser')
        results = soup.find(class_='result-list').find_all(class_='result')
        for result in results:
            pic = result.find(class_='pic')
            content = result.find(class_='content')
            img = pic.img.get('src')
            link = content.a.get('href')
            name = content.a.text
            description = content.p.text if content.p else ''
            res[count]={
                'name':name,
                'img':img,
                'link':link,
                'description':description
            }
            count+=1
        return res

In [125]:
class DoubanMovie(Douban):
    
    @run_time
    def get_comments(self, **kwargs):
        
        count = 0
        queue = Queue()
        producer_running = True
        
        def producer(url=None, movie_id=None, page_max=50, page=0):
            nonlocal producer_running
            if producer_running is True and page <= page_max:
                movie_url = None
                if movie_id is not None:
                    movie_url = 'https://movie.douban.com/subject/{}/comments'.format(str(movie_id))
                else:
                    movie_url = url
                print("ÂºÄÂßãÁà¨ÂèñÁ¨¨{0}È°µ***********************************************************************Ôºö".format(page+1))
                params={
                    'start':page*20,
                    'limit':20,
                    'status':'P',
                    'sort':'new_score'
                }
                html = self.session_get(movie_url, params=params)
                soup = BeautifulSoup(html.content, 'html.parser')
                comments = soup.find(id='comments').find_all(class_='comment-item')
                if len(comments) > 1:
                    queue.put(comments)
                    producer(url=movie_url, page=page+1, page_max=page_max)
                else:
                    producer_running=False
            else:
                producer_running=False
            
        def customer():
            nonlocal count
            while producer_running is True or queue.empty() is False:
                comments = queue.get()
                for comment in comments:
                    content = comment.find(class_='comment-content').text
                    user_name = comment.find(class_='comment-info').a.text
                    comment_time = comment.find(class_='comment-info').find(class_='comment-time').attrs['title']
                    print('comment:{}, user_name:{}, comment_time:{}'.format(content,user_name,comment_time))
                    count += 1
                print("count={}".format(count))
                time.sleep(int(random.choice([0.5, 0.2, 0.3])))
                
        threads = list()
        
        thread_p = threading.Thread(target=producer, kwargs=kwargs)
        thread_p.start()
        threads.append(thread_p)
        
        for _ in range(10):
            thread_c = threading.Thread(target=customer)
            thread_c.start()
            threads.append(thread_c)
            
        for thread in threads:
            thread.join(timeout=0.5)

In [75]:
class DoubanMovie(Douban):
    
    @run_time
    def get_comment(self, url=None, movie_id=None, page_max=50):
        movie_url = None
#         if movie_name is not None:
            
        if movie_id is not None:
            movie_url = 'https://movie.douban.com/subject/{}/comments'.format(str(movie_id))
        else:
            movie_url = url
        count = 0
        for page in range(page_max+1):
            print("ÂºÄÂßãÁà¨ÂèñÁ¨¨{0}È°µ***********************************************************************Ôºö".format(page+1))
            params={
                'start':page*20,
                'limit':20,
                'status':'P',
                'sort':'new_score'
            }
            html = self.session_get(movie_url, params=params)
            print(html.url)
            soup = BeautifulSoup(html.content, 'html.parser')
            comments = soup.find(id='comments').find_all(class_='comment-item')
            if len(comments) > 1:
                for comment in comments:
                    content = comment.find(class_='comment-content').text
                    user_name = comment.find(class_='comment-info').a.text
                    comment_time = comment.find(class_='comment-info').find(class_='comment-time').attrs['title']
                    print('comment:{}, user_name:{}, comment_time:{}'.format(content,user_name,comment_time))
                    count += 1
                print("count={}".format(count))
                time.sleep(int(random.choice([0.5, 0.2, 0.3])))
            else:
                print("Â§ßÁ∫¶ÂÖ±{0}È°µËØÑËÆ∫".format(page+1))
                break
            
#     def get_review_by_url(self, url , page_max=50):
#         res = dict()
#         self.set_url(url)
#         count = 0
#         for page in range(page_max+1):
#             print("ÂºÄÂßãÁà¨ÂèñÁ¨¨{0}È°µ***********************************************************************Ôºö".format(page+1))
#             self.set_params({
#                 'start':page*20
#             })
#             html = self.session_get()
#             soup = BeautifulSoup(html.content, 'html.parser')
#             reviews = soup.find(class_='review-list').find_all(class_='review-item')
#             for review in reviews:
#                 user = review.find(class_='main-hd')
#                 user_name = user.find(class_='name').text
#                 user_link = user.find(class_='name').a.get(href)
                
#                 content = review.find(class_='main-bd')
#                 title = content.find('h2').text
#                 link = content.find('h2').a.get('href')
#                 self.set_url(link)
#                 review_response = self.request_get()
#                 review_soup = BeautifulSoup(review_response, 'html.parser')
#                 article = review_soup.find(class_='article')
#                 review_content = article.find(class_='review-content').text
#                 res[title] = {
#                     'user_name':user_name,
#                     'user_link':user_link,
#                     'link':link,
#                     'review_content':review_content
#                 }
#             return res
        
#     def get_review_by_id(self, movie_id, page_max=50):
#         url = 'https://movie.douban.com/subject/{}/reviews'.format(str(movie_id))
#         self.get_review_by_url(url, page_max)
    
    def search(self, movie_name):
        result = super().search(movie_name, cat='1002')
        return result

In [90]:
user = DoubanUser()

new Douban
login success!


In [91]:
user.get_user_info()

'{"status":"success","message":"success","description":"Â§ÑÁêÜÊàêÂäü","payload":{"account_info":{"name":"Ljc970412","weixin_binded":false,"phone":"18664678368","avatar":{"medium":"https://img3.doubanio.com\\/icon\\/user_large.jpg","median":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","large":"https://img3.doubanio.com\\/icon\\/user_large.jpg","raw":"https://img3.doubanio.com\\/icon\\/user_large.jpg","small":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","icon":"https://img3.doubanio.com\\/pics\\/icon\\/user_icon.jpg"},"id":"217184556","uid":"217184556"}}}'

In [126]:
dm_crawl = DoubanMovie(user)

In [127]:
dm_crawl.get_comments(url='https://movie.douban.com/subject/30171424/comments')

Exception in thread Thread-171:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-125-5bdb05a20a49>", line 18, in producer
    if producer_running and page <= page_max:
UnboundLocalError: local variable 'producer_running' referenced before assignment



running 5.028633117675781 s


In [107]:
dm_crawl.get_comments(url='https://movie.douban.com/subject/30171424/comments')

ÂºÄÂßãÁà¨ÂèñÁ¨¨1È°µ***********************************************************************Ôºö
ÂºÄÂßãÁà¨ÂèñÁ¨¨2È°µ***********************************************************************Ôºöcomment:
Âú®ÂΩì‰∏ãËøô‰∏™ËØ≠Â¢ÉÈáåÔºåÊï¢ÊääÊï¥‰∏™È¶ôÊ∏ØÁÇ∏ÁøªÁöÑ‰πüÂè™ÊúâÈÇ±Á§ºÊ∂õ‰∫Ü„ÄÇÂêåÊ†∑ÊòØÈ¶ôÊ∏ØÂØºÊºîÔºåÂêåÊ†∑Â°ëÈÄ†Ëã±ÈõÑÔºåÈÇ±Á§ºÊ∂õÂíåÊûóË∂ÖË¥§ÁÆÄÁõ¥ÂΩ¢Êàê‰∫Ü‰∏§ÁßçÊûÅÁ´ØÔºå‰∏Ä‰∏™Â•èÊåΩÊ≠å‰∏Ä‰∏™Â•èÈ¢ÇÊ≠å„ÄÇ„ÄäÊãÜÂºπ‰∏ìÂÆ∂2„ÄãÊãçÁöÑÊòØË¢´ÈõÜ‰ΩìÂèçÂ§çÂà©Áî®ÂêéÊäõÂºÉÁöÑ‰∏™‰ΩìÔºåÊòØÂà∂Â∫¶ÁöÑÂºÉÂ≠êÔºåÂÖÖÊª°ÊÇ≤ÊÉÖÔºå„ÄäÁ¥ßÊÄ•ÊïëÊè¥„ÄãÊãçÁöÑÊòØÂ§ßÊó†ÁïèÁöÑËã±ÈõÑ‰∏ª‰πâÔºåÊòØ‰∏∫‰∫Ü‰∫∫Ê∞ëÂÆâÂÖ®ÂèØ‰ª•‰∏çÈ°æÂÆ∂‰∫∫ÂíåËá™Â∑±„ÄÅ‰∏ÄËÇ°ËÑëÂÜ≤ÈîãÈô∑ÈòµÁöÑÊïë‰∫∫Êú∫Âô®„ÄÇÈÇ±Á§ºÊ∂õËøòÊòØÈ¶ôÊ∏ØÁîµÂΩ±ÁöÑÊâìÂ∑•‰∫∫Ôºå‰ªçÁÑ∂ËÉΩÂØπ‰∏™‰ΩìÂëΩËøêÊäïÊ≥®ÊÇ≤ÊÇØÂíåÂêåÊÉÖÔºåËÄåÊûóË∂ÖË¥§Â∑≤ÁªèÊ≤âËø∑Âú®‰∏ªÊóãÂæãÂèô‰∫ãÂΩì‰∏≠‰∫ÜÔºåÂè™ËÉΩÁî®‚ÄúÂõΩÊóó‰∏ãÁöÑËÆ≤ËØù‚ÄùÊù•ÂåÖË£ÖÂá∫‰∏Ä‰∏™‰∏™ÊïôÁßë‰π¶ÂºèÁöÑÊ†áÂÖµ„ÄÇ
, user_name:‰∏ÄÂè£Â§ß‰∫ïÂ≠ê, comment_time:2020-12-24 03:06:13
comment:
ÊòØÊôÅÁõñ‰πüÊòØÂÆãÊ±üÔºåÊòØË≠¶ÂØü‰πüÊòØÂå™ÂæíÔºåÊòØËã

ÂºÄÂßãÁà¨ÂèñÁ¨¨4È°µ***********************************************************************Ôºöcomment:
Âú®Ëøô‰∏™ÁóÖÊØíËÇÜËôêÁöÑË¥´Áò†Âπ¥‰ª£ÔºåÊòØÊó∂ÂÄôËØ•‰∏∫ÂèòÊÄÅÁöÑÈÇ±Á§ºÊ∂õÂä†ÂÜï‰∫Ü„ÄÇ‚ÄúÁÇ∏Ë£Ç‚ÄùËøô‰∏™ËØçÁÆÄÁõ¥ÊòØ‰∏∫„ÄäÊãÜÂºπ‰∏ìÂÆ∂2„ÄãÈáèË∫´ÊâìÈÄ†ÁöÑÔºåË∞ÅËÉΩÊÉ≥Âà∞ÔºåÊØÅÂÆåÁ∫¢Á£°ÈößÈÅì„ÄÅ‰∏≠ÁéØÂú∞ÈìÅ‰πãÂêéÔºå‰ªñÂ±ÖÁÑ∂ËøòËÉΩ‰∫§Âá∫Êõ¥Âä†ÈúáÊíºÁöÑÂ§ßÂú∫Èù¢„ÄÇÂú®Ëøô‰ªΩSÁ∫ßÂä®‰ΩúÁâáÊÉ≥Ë±°ÂäõÈù¢ÂâçÔºåÊûóË∂ÖË¥§Ë¢´Ë°¨ÂæóÂÉè‰∏™Âè™‰ºöÂÜôÂÖ´ËÇ°ÊñáÁöÑÂ∞èÂ≠¶Áîü„ÄÇ‚ÄúÊàë‰∏çÊòØÁñØÔºåÊòØÁóõ‚ÄùÔºå‰πüÂè™Êúâ‰ªñÊâçËÉΩÂú®‰∏ÄÈÉ®ÂïÜ‰∏öÂ§ßÂà∂‰ΩúÈáåÔºåËÆ©ÂàòÂæ∑ÂçéÁßíÂèòÈªÑÁßãÁîüÔºåÊõøÂÖ®È¶ôÊ∏ØËØâËØ¥ÂøÉÂ£∞„ÄÇÁæé‰∏≠‰∏çË∂≥ÊòØÂèçËΩ¨Êù•ÂæóËøáÊó©Ôºå‰∏∫ÈùíÈ©¨Â§ßÊ°•ÁàÜÁ†¥ËÆ©ÈÅìÔºåÊ≤°ÊääÂèçÁ§æ‰ºöËøõË°åÂà∞Â∫ïÔºå‰πüÊ≤°ËÉΩÂæÄ‰∫∫ÊÄßÊúÄÂπΩÊöóÂ§ÑÂÜçÊé¢ÂØª„ÄÇ‰ΩÜË∑ü‰ªñÂú®ÂÆ°Êü•Á≥ªÁªüÁúºÁöÆÂ∫ï‰∏ãÂèòÁöÑÂ∏ΩÂ≠êÊàèÊ≥ïÁõ∏ÊØîÔºåËøôÁëïÁñµÊòæÂæóÂæÆ‰∏çË∂≥ÈÅì„ÄÇÊñ∞‰∏ñ‰ª£ÁöÑÈáåÁ®ãÁ¢ëÔºåÊúÄÈÄÇÂêà‰∏∫2020Âπ¥ÂÆöË∞ÉÊî∂Â∞æÁöÑÁîµÂΩ±ÔºåÈ¢ÑË®ÄÂºèÂú∞ÁªôÊâÄÊúâÊó∂‰ª£ÊÉÖÁª™ÊääËÑâÈóÆËØäÔºåÂçé‰ªîÊúÄÂêé‰ªø‰ΩõÂêë„Ää‰ºäÊ≥¢ÊãâÁóÖÊØí„ÄãÊäïÂéªÂáùÈáçÁöÑÂõûÁ

ÂºÄÂßãÁà¨ÂèñÁ¨¨7È°µ***********************************************************************Ôºöcomment:
Â§™ÁáÉÔºÅÂπ¥Â∫¶ÂøÖÁúãÊ∏ØÁâáÔºÅÊó†ËÆ∫‰ªéÂèô‰∫ãËøòÊòØÂú∫Èù¢ÈÉΩÊòØÂØπÁ¨¨‰∏ÄÈõÜÁöÑÂÖ®Èù¢Ë∂ÖË∂ä„ÄÇÊ≤°ÊÉ≥Âà∞ÈÇ±Á§ºÊ∂õÁ´üÁÑ∂ËøòËóèÊúâÂ¶ÇÊ≠§Â§ßÁöÑÊΩúËÉΩÔºåÂÆûÂú®Â§™Á®≥„ÄÇÁàÜÁÇ∏ËøΩÈÄêÂ±ÇÂ±ÇÈÄíËøõÔºåÂú®‚ÄúË∞çÂΩ±ÈáçÈáç‚ÄùÂíå‚ÄúÂèçÊÅê24Â∞èÊó∂‚ÄùÈó¥Êó†ÁºùÂàáÊç¢ÔºåÊó¢ÊòØË∫´‰ªΩËø∑Â±ÄÔºå‰πüÊòØÁàÜÁÇ∏Â§ßÁâá„ÄÇÊØè‰∏ÄÊ¨°ÂõûËø∞ÔºåÂàòÂæ∑ÂçéÈÉΩÁªèÂéÜ‰∏ÄÊ¨°Ë∫´‰ªΩÁöÑÈîô‰ΩçÔºåÂñÑ‰∏éÊÅ∂ÈÉΩÊΩúËóèÂú®Ëøô‰∏™ËßíËâ≤ÁöÑÂÜÖÂøÉ‰πã‰∏≠„ÄÇÂ§©‰Ωø‰∏éÈ≠îÈ¨ºÔºå‰∫âÂ§∫‰∏Ä‰∏™ÁÅµÈ≠ÇÔºåÊòØÊúÄË¥¥ÂàáÁöÑÊΩúÂú®ÊñáÊú¨„ÄÇ
, user_name:Ë•øÂ∏ïÂÖã, comment_time:2020-12-24 17:29:22
comment:
ÈÇ±Á§ºÊ∂õ‰ªéÈ¶ôÊ∏ØÁöÑÁ∫¶Áø∞¬∑Âç°ÊúãÁâπÂèòÊàê‰∫ÜÊâòÂ∞º¬∑ÊñØÁßëÁâπ„ÄÇ
, user_name:ÁöÆÈù©‰∏ö, comment_time:2020-12-26 12:27:33
comment:
#ÈõªÂΩ±Èô¢# ÂÄ™Â¶ÆÁöÑÊ≠£Á¢∫ÊâìÈñãÊñπÂºèÔºÅÂ∑Æ‰∏çÂ§öË∑ü„ÄäÂØíÊà∞2„ÄãÈáåÁöÑÊñáÂíèÁèä‰∏ÄÊ®£È©öËâ∑‰∫ÜÔºåÂ•πÁöÑÂΩ¢Ë±°ÁúüÁöÑÈÅ©ÂêàËÅ∑Ê•≠Á≤æËã±Â•≥ÊÄßÂçÉËê¨‰∏çË¶ÅÂÜçÊºîÂÇªÁôΩÁîú‰∫Ü„ÄÇÂàòÂæ∑ÂçéÁöÑÊ∞îË¥®Áî®Êù•ÊâÆÊºîÈÅìË≤åÂ≤∏ÁÑ∂ÂÜÖÂøÉÊå£

ÂºÄÂßãÁà¨ÂèñÁ¨¨10È°µ***********************************************************************Ôºöcomment:
ËäÇÂ•èÁ¥ßÂáëÔºåÂú∫Èù¢ÂçÅÂàÜÈúáÊíº„ÄÇËôΩÁÑ∂ÊòØÁ¨¨‰∫åÈÉ®Ôºå‰ΩÜÊòØÊòØ‰∏Ä‰∏™ÂÆåÂÖ®Áã¨Á´ãÁöÑÊïÖ‰∫ãÔºåËøòÊúâÊÑèÊÉ≥‰∏çÂà∞ÁöÑÂèçËΩ¨„ÄÇÂñÑÊÅ∂Âè™Âú®‰∏ÄÂøµ‰πãÈó¥ÔºåÂèãÊÉÖ„ÄÅÁà±ÊÉÖ‚Ä¶‚Ä¶ËøòÊúâËÅå‰∏öÁöÑÊìçÂÆà„ÄÇÊúÄÂñúÊ¨¢ÂàòÂæ∑ÂçéÂíåÂàòÈùí‰∫ëÁöÑÂØπÊâãÊàèÔºåËøáÁòæ„ÄÇ
, user_name:Êâ∞Êâ∞, comment_time:2020-12-24 10:08:00

comment:
ÊûúÁÑ∂‰∏çË¥üÊúüÂæÖÔºåÁîµÂΩ±‰∏≠ÁöÑÂ®±‰πêÊÄßÂíåÊîøÊ≤ªÊÄßËûçÂêàÁöÑÊÅ∞Âà∞Â•ΩÂ§ÑÔºå‰∏§‰∏™Â∞èÊó∂ÂÖ®Á®ãË¢´Âê∏Âºï‰ΩèÔºåËøΩÈÄê„ÄÅÊû™Êàò„ÄÅÁàÜÁ†¥Êó†‰∏çÁ≤æÂΩ©Ôºå‰∏ÄÈù¢ÊòØÊó†ÊîøÂ∫ú‰∏ª‰πâÁöÑÊÅêÊÄñÂàÜÂ≠êÔºå‰∏ÄÈù¢ÊòØÈô§Êö¥ÂÆâËâØÁöÑÂèçÊÅêÁ≤æËã±ÔºåÊΩò‰πòÈ£éÁöÑ‚ÄúÂ∑¶Âè≥‚Äù‰∫íÊêèÊó¢Â§çÊùÇÂèàÁúüÂÆûÔºåËë£ÂçìÊñá„ÄÅÂ∫ûÁé≤ÁöÑ‚ÄúÊÉÖ‰∏éÊ≥ï‚ÄùÈÄâÊã©ÂêåÊ†∑Ê∑±ÂàªÔºåÁîüÂä®ÂèØ‰ø°ÂèàÂëàÁé∞Â§öÊ†∑‰∫∫ÊÄß„ÄÇ„ÄÇËÄåË¢´Âä®ÁöÑË∫´‰ªΩÂ§±ÁÑ¶Âíå‰∏ªÂä®ÁöÑÊ∏ÖÈô§ËÆ∞ÂøÜ‰πüÊ≠£Â•ΩÂØπÂ∫îÔºåÊ≤øË¢≠‰∫ÜÈÇ±Á§ºÊ∂õÂØºÊºî‰∏ÄË¥ØÈ£éÊ†º‰∏éÊÄÅÂ∫¶„ÄÇ
, user_name:‰∫≤ÂàáÁöÑÊòäÂ≠ê, comment_time:2020-12-24 20:08:12
comment:
IMAXÂõΩËØ≠Áâà„ÄÇi

running 5.841109991073608 s
ÂºÄÂßãÁà¨ÂèñÁ¨¨13È°µ***********************************************************************Ôºöcomment:
Â∞±Ê≠£Â∏∏Ê∞¥Âπ≥ÂèëÊå•ÂêßÔºÅÁ≤æÂΩ©ÁöÑÈÉΩÂú®Âä®‰ΩúÂú∫Èù¢ÔºåÂ∞¥Â∞¨ÁöÑÈÉΩÂú®ÊñáÊàèÁà±ÊÉÖ„ÄÇÊÑüËßâÁºñÂâßÊòØÊääÂá†‰∏™ÊãÜÂºπÁöÑÊïÖ‰∫ãÊÉ≥ÊòéÁôΩ‰∫ÜÔºåÂÖ∂‰ªñÁöÑ‰∫∫Áâ©Â∞±Â∑•ÂÖ∑‰∫∫ÂæÄÈáåÂ°ûÂêßÔºåÂà´ËÆ≤ÈÄªËæë„ÄÅÊõ¥Âà´Ë∞àÊµÅÁïÖ„ÄÇÂÄ™Â¶ÆÁöÑÊàèÁúüÁöÑÊòØÂ§™ÈöæÁúã‰∫ÜÔºåÂìéÔºåËÑöË∂æÊä†Âá∫‰∏ÄÂ•óÂ§ßÂπ≥Â±Ç„ÄÇËØ¥Áî∑‰∫∫ÊàèÂü∫ÊÉÖÊàèÁöÑÔºå‰∏çÊé•Âèó„ÄÇ
, user_name:ËñáÁæÖÂ∞ºÂç°, comment_time:2020-12-28 00:26:38

comment:
‰∏Ä‰∏™Á≤æÁ•ûÂàÜË£ÇÁöÑÊïÖ‰∫ãÔºåË∫´‰ªΩÔºåÁ´ãÂú∫ÔºåÁ§æ‰ºöÂ∞Ü‰∫∫ÂõõÂàÜ‰∫îË£Ç„ÄÇÈ¶ôÊ∏ØÁîµÂΩ±ÂßãÁªàÂú®Ëá™ÊàëË∫´‰ªΩËÆ§Âêå‰∏≠Êë∏Á¥¢„ÄÇ‰πüÁúãÂà∞ÈöêÂñªÔºåÂè™Êúâ‚ÄúÂ§±ÂøÜ‚ÄùËøôÊ†∑ÁöÑÂ§ñÁïåÂ§ßÂàõ‰º§ÔºåÊâçËÉΩËÆ©‰∏Ä‰∏™ÊÑ§ÊÄíÁöÑ‰∫∫ÂàáÊñ≠ÊÄíÁÅ´ÂõûÂΩí‰∫∫ÊÄßÔºåÁõ∏Â∫îÁöÑÔºåÁ§æ‰ºöÊÄßÁæ§‰ΩìÊÄßÁöÑÈóÆÈ¢òÂíåÂéãÊäëÁßØÁ¥Ø‰∏ãÁöÑÊÑ§ÊÄíÔºåÈô§ÈùûÁ±ª‰ºº‰∫åÊàòËøôÊ†∑ÁöÑÈõÜ‰ΩìÂàõ‰º§ÔºåÊâçËÉΩ‰∏ÄÂàáÈáçÂª∫„ÄÇÂàòÂæ∑ÂçéÁöÑÁî®ÂêéÂç≥ÂºÉÔºåÂàòÈùí‰∫ëÁöÑÊéßËØâËÆ∞ÂøÜÁØ°ÊîπÔºåËøòÊúâËá™ÂÄ™Â¶ÆÂè£‰∏≠ËØ¥Âá∫Ôºå‰ø°ÊàëÊ

ÂºÄÂßãÁà¨ÂèñÁ¨¨16È°µ***********************************************************************Ôºöcomment:
Êª°Â±èCGÁâπÊïàÁúãÁöÑÊúâÁÇπÊôïÔºåÊ∏ØË≠¶Â∑ÖÂ≥∞ËøòÊòØÊó†Èó¥ÈÅìÂïä
, user_name:ÂçäÂ≤õÂ¢®È±ºÂπ≤, comment_time:2020-12-27 14:14:18

comment:
ÂàòÂæ∑Âçé‰ªéÂ§¥Âà∞ËÑöÈÉΩÊòØÊàèÔºåÂ•Ω‰∫∫Âùè‰∫∫‰∏ÄÁû¨Èó¥ÁúºÁ•ûÁöÑÂèòÂåñÁªù‰∫Ü„ÄÇÂÄ™Â¶ÆÁúüÁöÑÂæàÈÄÇÂêàËøôÁßçÈ£íÂ•≥ËßíËâ≤ÔºåÈõÜÁæé‰∏ΩÊô∫ÊÖßÂäõÈáè‰∏Ä‰ΩìÁöÑÂ•≥ÊÄßÁúüÁöÑÂ•ΩÈÄó‰∫∫Áà±ÂïäÔΩû
, user_name:ÊàëÂÇªÁ¨ë‰Ω†ÈöèÊÑè„ÄÇ, comment_time:2020-12-26 11:08:49
comment:
ÊúâÁÇπËøáË™â‰∫Ü„ÄÇ‰∏Ä‰∏™ÂèçÁ§æ‰ºöÁöÑÊÅêÊÄñÂàÜÂ≠êË¢´ÂâçÂ•≥Âèã‰∏ÄÈÄöÂò¥ÁÇÆÂ∞±ÂèçÊ∞¥‰∫ÜÔºüÈÇ£‰ªñÊàêÁ´ãÊÅêÊÄñÁªÑÁªáÁöÑÊÑè‰πâ‰ΩïÂú®ÔºüÊàëÊòØÊ≤°ÊÑüÂèóÂà∞ÊïëËµéÔºåÂèçËÄåÊúâÁßçËÑ±Ë£§Â≠êÊîæÂì™Âï•ÁöÑÊÑüËßâ„ÄÇÂá†‰∏™Êúâ‰ª£Âè∑ÁöÑÂèçÊ¥æÂ∞èÂºüÈÄÅÁöÑÈÉΩÊó¢ÂÆπÊòìÂèàÂÑøÊàè‰∫Ü„ÄÇ
, user_name:Ê†óÂÖàÁîüÊòØ‰∏çÊòØ‰∏Ä, comment_time:2020-12-27 17:04:52
comment:
Ë∂äÂà∞ÂêéÈù¢Ë∂äÊúâÁÇπÊåÇ‰∏ç‰ΩèÔºå‰∏çËøá‰∫∫ÊÄßÂ∞±ÁúüÁöÑÊòØÂñúÊ¨¢ËøôÁßçÁÇ∏ÁÇ∏ÁÇ∏ÁöÑÁîµÂΩ±ÔºåÂÖ®Èù†Èü≥‰πêËΩ∞ËΩ∞ËΩ∞ÔºåË°åÂêß
, user_name:ÈôàË£∏, comment_time:2020-12-28 

ÂºÄÂßãÁà¨ÂèñÁ¨¨19È°µ***********************************************************************Ôºöcomment:
ÊàëËøô‰∏™‰∫∫ÊØîËæÉËÇ§ÊµÖÔºåÊãÜÂºπÁîµÂΩ±ÁÇ∏ÁöÑËøáÁòæÂ∞±ÁªôÈ´òÂàÜÔºÅÊúâËÄÅÁâåÊ∏ØÁâáÁöÑÁ°¨Ê∞îÔºåÊºîÂëò/ÊõøË∫´Ê≠¶ÊàèÂæàÊãºÔºåÊ≤°Êúâ‰∏Ä‰∏™ÊãâËÉØÔºåÂ•ΩËØÑÔºåËßíËâ≤ËÆæÂÆö‰∏äÊòéÊòæÂÄ™Â¶ÆÂ∞±‰∏∫Âπ≥Ë°°Áî∑Â•≥ÊÄßÂà´ÊØî‰æãÔºå‰∏çÂ¶ÇÂ§ßËÉÜ‰∏Ä‰∫õËøô‰∏™ËßíËâ≤‰πüÊâæ‰∏Ä‰ΩçÁî∑ÊºîÂëòÊºî(‚Åé‚ÅçÃ¥Ãõ·¥ó‚ÅçÃ¥Ãõ‚Åé)
, user_name:ÁëæÊúµÊúµ, comment_time:2020-12-26 16:09:42

comment:
‰∏çÊÄé‰πàÊ†∑ÂêßÔºå
, user_name:È©ªÈ©¨Â∫óÂáØÈ≤Å‰∫öÂÖã, comment_time:2020-12-29 08:45:59
comment:
ÁúütmÁàΩÔºÅÂÖâÂá≠Âä®‰ΩúÈÄªËæëÁöÑËÆæËÆ°Â∞±Â∑≤ÁªèÂÆåÂÖ®ÂÄºÂõûÁ•®‰ª∑‰∫ÜÔºåÊúâ‰∫õÁâπÂÜôÂÖ∂ÂÆûÊå∫Âá∫ÊàèÁöÑÔºå‰ΩÜÊó†Â•àÁªôÁöÑÈáèÂ§™Ë∂≥Â§ü‰∫ÜÔºåÂºÄÂú∫Á¨¨‰∏ÄÂú∫ÊàèÂ∞±Áõ¥Êé•Ê†∏ÂºπÊääÊú∫Âú∫ÁªôÁÇ∏‰∫ÜÔºåÁÑ∂ÂêéÂëäËØâ‰Ω†ËøôÁâáÂúüÂú∞‰∏äÊúÄÂ§öÁöÑÊÉÖÁª™Â∞±ÊòØÊÑ§ÊÄíÔºÅ//Ââ™Ëæë‰∏äÊïÖÊÑèÁªôËßÇ‰ºóËê•ÈÄ†‰∏ÄÁßç‰ø°ÊÅØË∂ÖËΩΩÁöÑÊÑüËßâÔºåËøôÁÇπË∑ü„Ää‰ø°Êù°„ÄãÊå∫ÂÉèÁöÑÔºå‰∏çËøáËøôÁâáÁöÑÊïÖ‰∫ãÊ≤°ÈÇ£‰πàÂ§çÊùÇÔºåÊâÄ‰ª•ÂÆåÊàêÂæóËøòÊå∫Â•ΩÁöÑ„ÄÇ//ÈÖç‰πêÁªùÂØπÊä¢ÊàèÔºåÂá†‰πé

ÂºÄÂßãÁà¨ÂèñÁ¨¨22È°µ***********************************************************************Ôºöcomment:
‰∏ä‰∏ÄÈõÜÁÇ∏Ê∏ØÈìÅËøôÈõÜÁÇ∏‰∫ÜÈùíÈ©¨Â§ßÊ°•ÂíåÊú∫Âú∫ÔºåÁáÉÁÇ∏Âú∫Èù¢ËøòÊòØÊå∫Â§öÁöÑÔºåÁ°ÆÂÆûËä±Èí±‰∫Ü„ÄÇ60Â≤ÅÁöÑÂàòÂæ∑ÂçéÂÅöÂêÑÁßçÂç±Èô©Âä®‰ΩúÔºå20Âπ¥ÂâçÂíå‰ªñÂäøÂùáÂäõÊïåÁöÑÂàòÈùí‰∫ëÁªô‰ªñÂΩìÈÖç„ÄÇ40Â≤ÅÁöÑÂê¥ÂçìÁæ≤ÂíåÂàòÊµ©ÈæôÂéªÂΩìÈÖçËßíÂΩìÂ∞èÂñΩÂï∞ÁúãËÄÅÂì•Âì•‰ª¨Áé©ÂëΩ„ÄÇÊñ∞‰∫∫ÈÉΩÊ≤°Êúâ„ÄÇË∞¢ÂêõË±™ËøòÊòØÂæàÂ∏Ö„ÄÇÂÄ™Â¶ÆËøòÊòØÁæéÁöÑÔºåÂè™ÊòØÂèØÊÉúÊòØ‰∏™Â∑•ÂÖ∑‰∫∫„ÄÇ
, user_name:ÁéâÊú®Â§ßÊ≤≥, comment_time:2020-12-24 22:37:43

comment:
ÂÄ™Â¶ÆÂ§™Áæé‰∫Ü Â§ßÁæé‰∫∫ÔºÅÔºÅÔºÅ
, user_name:„Éê„Ç´„É¢„Éé, comment_time:2020-12-24 21:35:48
comment:
Ê≤°ÂäûÊ≥ïÔºåÂÖ•‰∏ç‰∫ÜÊàèÔºåÂàòÂæ∑ÂçéÁöÑÊïÖ‰ΩúÂùöÈüßÔºåÊùØÂÖ∑ÂçßÂ∫ïËøòÊòØËÑ±‰∏çÂºÄÊó†Èó¥ÈÅìÁöÑÂΩ±Â≠êÔºåÂæàÂ§öËÆæÂÆöÂíåÂèçËΩ¨ËøòÊòØÁÆÄÂçïÂ•óË∑Ø‰∫ÜÁÇπÔºåÈÇ£‰∫õÁàÜÁÇ∏ÁâπÊïà‰πüÂ∑Æ‰∫ÜÁÇπÔºåËøôÂú®Â•ΩËé±Âùû‰πüÂ∞±ÊòØBÁ∫ßÁâáÁöÑÊ∞¥Âπ≥„ÄÇËøôÁÆóÊòØÈ¶ôÊ∏ØÈÅ≠ÂèóÊ†∏ÂºπÊîªÂáªÁöÑÁ¨¨‰∏ÄÊ¨°Èì∂ÂπïÂåñÂëàÁé∞ÂêßÔºü
, user_name:Â£∞Ëâ≤ÁîªÊä•, comment_time:2020-12-26 10:13:16
commen

ÂºÄÂßãÁà¨ÂèñÁ¨¨26È°µ***********************************************************************Ôºöcomment:
Ë∫≤ÁöÑ‰∫ÜÂ≠êÂºπÔºåË∫≤‰∏ç‰∫ÜÁÇ∏ÂºπÂïä„ÄÇ
, user_name:ÁßÅÊàøÊ≠å, comment_time:2020-12-25 11:22:41
comment:
‚Äú‰ø°ÊàëÂ∞±ÊòØË≠¶ÂØüÔºå‰∏ç‰ø°ÊàëÂ∞±ÊòØÊÅêÊÄñÂàÜÂ≠êÔºÅ‚ÄùÈÇ±Á§ºÊ∂õÁúüÁâõÈÄºÔºåÂ∞Ü‰∏ÄË∂≥Êú¨ÁöÑÊ∞¥Êµí‰º†ËûçËøõ‰∫ÜÈ¶ôÊ∏ØÊñ∞ÂçÉÂπ¥ÂêéÁöÑÂéÜÂè≤ÔºåÂπ∂Áî®ÊΩò‰πòÈ£éÊäïËØöÂêéÁöÑÈôÖÈÅáÂíåÂßøÊÄÅÈöêÂñª‰∫ÜÈ¶ôÊ∏ØÊú™Êù•Âá†ÂçÅÂπ¥ÂèØËÉΩÁöÑÈÅ≠ÈÅá„ÄÇÁúãÈÇ±Á§ºÊ∂õÁöÑÁîµÂΩ±ÊòØÂÖÖÊª°Èó¥Á¶ªÊÑüÁöÑÔºå‰ªñÊÑè‰∏çÂú®ËÆ≤ÊïÖ‰∫ãÔºåËÄåÊòØÂú®ÂÜôËÆ∫Êñá„ÄÇËÉΩÂ∑ßÂ¶ôÊääÊìçÁ∫µËÆ∞ÂøÜËøôÊ†∑ÁöÑÊîªÂ£≥Âºè‰∏ªÈ¢ò‰∏éÈ¶ôÊ∏ØËøëÂá†Âπ¥ÂíåÊú™Êù•Âá†Âπ¥ÂèØËÉΩÁöÑÈÅ≠ÈÅáÊó†ÁºùËûçÂêàÔºåÁúüÊòØÈ¨ºÊâç„ÄÇËØ¥Âà∞Â§ßÂú∫Èù¢ÂíåÂä®‰ΩúÂú∫Èù¢ÁöÑËÆæËÆ°ÔºåÊÑüËßâÈÇ±Á§ºÊ∂õ‰∏çÂéªÂ•ΩËé±ÂùûÊãçÂ§ßÁâáÁúüÁöÑÂ§™ÂèØÊÉú‰∫Ü
, user_name:Á¥¢Â∞î‰πãÈî§, comment_time:2020-12-26 17:30:27
comment:
Âπ¥Â∫¶ÊúÄ‰Ω≥
, user_name:ÂçóÊ≥â, comment_time:2020-12-28 10:55:05
comment:
ÁºñÂâßËøòÁúüÊòØËÄÅÂ•óÔºåÂ•ΩÂ§öÂØπÁôΩÁõ¥Êé•ÂæóÂè™‰∏∫Êé®Âä®ÂâßÊÉÖÊúçÂä°ÔºåÊÑüÊÉÖÊàèÂæàÁÉÇÔºåÂÄ™Â¶Æ‰∏éÂçé

In [125]:
post_data = {
    'name':'18664678368',
    'password':'LJC970412',
    'remember':'false'
}

In [127]:
crawl.add_header({
    "Referer":'https://accounts.douban.com/passport/login'
})


In [128]:
user = crawl.session_post('https://accounts.douban.com/j/mobile/login/basic',data=post_data)

In [129]:
user.text

'{"status":"success","message":"success","description":"Â§ÑÁêÜÊàêÂäü","payload":{"account_info":{"name":"Ljc970412","weixin_binded":false,"phone":"18664678368","avatar":{"medium":"https://img3.doubanio.com\\/icon\\/user_large.jpg","median":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","large":"https://img3.doubanio.com\\/icon\\/user_large.jpg","raw":"https://img3.doubanio.com\\/icon\\/user_large.jpg","small":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","icon":"https://img3.doubanio.com\\/pics\\/icon\\/user_icon.jpg"},"id":"217184556","uid":"217184556"}}}'

In [111]:
params = {
            'q':'Á¥ßÊÄ•ÊïëÊè¥',
            'cat':''
        }

In [140]:
user.text

'{"status":"success","message":"success","description":"Â§ÑÁêÜÊàêÂäü","payload":{"account_info":{"name":"Ljc970412","weixin_binded":false,"phone":"18664678368","avatar":{"medium":"https://img3.doubanio.com\\/icon\\/user_large.jpg","median":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","large":"https://img3.doubanio.com\\/icon\\/user_large.jpg","raw":"https://img3.doubanio.com\\/icon\\/user_large.jpg","small":"https://img1.doubanio.com\\/icon\\/user_normal.jpg","icon":"https://img3.doubanio.com\\/pics\\/icon\\/user_icon.jpg"},"id":"217184556","uid":"217184556"}}}'

In [133]:
res = dict()

In [134]:
from bs4 import BeautifulSoup

In [137]:
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find(class_='result-list').find_all(class_='result')
for result in results:
    pic = result.find(class_='pic')
    content = result.find(class_='content')
    img = pic.img.get('src')
    link = content.a.get('href')
    name = content.a.text
    description = content.p.text if content.p else ''
    res[name]={
        'img':img,
        'link':link,
        'description':description
    }