In [1]:
import os
import requests
from bs4 import BeautifulSoup
import re
    

In [2]:
class Episode:

    def __init__(self, webtoon_id, no, url_thumbnail, title, rating, created_date):
        self.webtoon_id = webtoon_id
        self.no = no
        self.url_thumbnail = url_thumbnail
        self.title = title
        self.rating = rating
        self.created_date = created_date
        self.image_list = []

    @property
    def info(self):
        from urllib.parse import urlencode
        
        payload = {'titleId': self.webtoon_id , 'no': self.no}
        url = 'http://comic.naver.com/webtoon/detail.nhn?'
        
        result = url+urlencode(payload)
        return result


In [3]:
class Webtoon:
    
    def __init__(self, webtoon_id):
        
        self.webtoon_id = webtoon_id
        self.title = None
        self.author = None
        self.description = None
        self.episode_list = []
        
    def rework(self):
        
        
        payload = {'titleId': self.webtoon_id}
        if not os.path.exists('data/{}.html'.format(self.webtoon_id)):
    
            toon_url = requests.get('http://comic.naver.com/webtoon/list.nhn', params=payload)
            with open('data/{}.html'.format(self.webtoon_id), 'wt') as f:
                f.write(toon_url.text)
                
        with open('data/{}.html'.format(self.webtoon_id), 'rt') as f:
            html = f.read()
            
            
        soup = BeautifulSoup(html, 'lxml')
        title_list = soup.select('div.detail > h2')
        self.title = title_list[0].contents[0].strip()
        
        auth_list = soup.select('span."wrt_nm"')
        self.author = auth_list[0].string.strip()
        
        self.description = soup.p.string
        
        
    def update(self):
        payload = {'titleId': self.webtoon_id}
        if not os.path.exists('data/{}.html'.format(self.webtoon_id)):
    
            toon_url = requests.get('http://comic.naver.com/webtoon/list.nhn', params=payload)
            with open('data/{}.html'.format(self.webtoon_id), 'wt') as f:
                f.write(toon_url.text)
                
        with open('data/{}.html'.format(self.webtoon_id), 'rt') as f:
            html = f.read()
        soup = BeautifulSoup(html, 'lxml')
        # image_url
        list_src = soup.select("a > img['src']")

        # 각 화의 제목
        list_of_title = soup.select('td.title > a')

        # 별점 리스트
        rating_list = soup.select('div.rating_type > strong')

        # 등록일
        date_list = soup.select('td.num')

        # no요소를 빈 리스트 안에 넣은 후 반환
        # 값이 두번씩 반환되기에 step을 두번씩 주어서 반환
        finall_list = []
        no_list = soup.select('td.title > a[href]')
        for i in range(0, len(no_list)):
            a = no_list[i].get('href')
            finall_list.append(re.findall(r'no=(.*?)&', a)[0])



        for i in range(len(list_of_title)):
            inst = Episode(self.webtoon_id, finall_list[i], list_src[i+1].get('src'), list_of_title[i].string, rating_list[i].string,date_list[i].string)
            self.episode_list.append(inst)
            


        
        
        

In [4]:
class EpisodeImage:
    def __init__(self, episode, url):
        self.episode = episode 
        self.url = url
        self.image_list = []
    
    def image_crawler(self,episode_user):

        if not os.path.exists('data/{}.html'.format(episode_user)):
            
            toon_url = requests.get(self.url)
            with open('data/{}.html'.format(episode_user),'wt') as f:
                f.write(toon_url.text)
        
        with open('data/{}.html'.format(episode_user),'rt') as f:
            html = f.read()
            
            
        soup = BeautifulSoup(html, 'lxml')
        list_src = soup.select('div.wt_viewer > img')
        
        user = EpisodeImage(self.episode, self.url)
        
        for i in range(len(list_src)):
            (user.image_list).append(list_src[i]['src'])
        
        (episode_user.image_list).append(user)
        
        

In [5]:
yumi = Webtoon(651673)

In [6]:
yumi.webtoon_id

651673

In [7]:
yumi.rework()

In [8]:
yumi.title

'유미의 세포들'

In [9]:
yumi.author

'이동건'

In [10]:
yumi.update()

In [11]:
print(yumi.episode_list)

[<__main__.Episode object at 0x109aa9e48>, <__main__.Episode object at 0x109aa9e80>, <__main__.Episode object at 0x109aa9eb8>, <__main__.Episode object at 0x109aa9ef0>, <__main__.Episode object at 0x109aa9f28>, <__main__.Episode object at 0x109aa9f60>, <__main__.Episode object at 0x109aa9f98>, <__main__.Episode object at 0x109aa9fd0>, <__main__.Episode object at 0x109aad048>, <__main__.Episode object at 0x109aad080>]


In [12]:
for episode in yumi.episode_list:
    print(episode.info)
    koko = EpisodeImage('yumi',episode.info)
    koko.image_crawler(episode)
    

http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=310
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=309
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=308
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=307
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=306
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=305
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=304
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=303
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=302
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=301


In [14]:
for i in range(len(yumi.episode_list)):
    print(yumi.episode_list[i].image_list[0])

<__main__.EpisodeImage object at 0x10a46e860>
<__main__.EpisodeImage object at 0x109b2d128>
<__main__.EpisodeImage object at 0x10a570160>
<__main__.EpisodeImage object at 0x10a66c710>
<__main__.EpisodeImage object at 0x10a49b710>
<__main__.EpisodeImage object at 0x10a769a90>
<__main__.EpisodeImage object at 0x10a860f98>
<__main__.EpisodeImage object at 0x10a820668>
<__main__.EpisodeImage object at 0x10a769c50>
<__main__.EpisodeImage object at 0x10a7520f0>


In [15]:
for i in range(len(yumi.episode_list)):
    print(yumi.episode_list[i].image_list[0].image_list)

['http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_1.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_2.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_3.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_4.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_5.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_6.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_7.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_8.jpg', 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_9.jpg', 'http://imgcomic.n