In [22]:
import os
import requests
from bs4 import BeautifulSoup
import re
    

In [23]:
class Episode:

    def __init__(self, webtoon_id, no, url_thumbnail, title, rating, created_date):
        self.webtoon_id = webtoon_id
        self.no = no
        self.url_thumbnail = url_thumbnail
        self.title = title
        self.rating = rating
        self.created_date = created_date
        self.image_list = []

    @property
    def info(self):
        from urllib.parse import urlencode
        
        payload = {'titleId': self.webtoon_id , 'no': self.no}
        url = 'http://comic.naver.com/webtoon/detail.nhn?'
        
        result = url+urlencode(payload)
        return result


In [24]:
class Webtoon:
    
    def __init__(self, webtoon_id):
        
        self.webtoon_id = webtoon_id
        self.title = None
        self.author = None
        self.description = None
        self.episode_list = []
        
    def rework(self):
        
        
        payload = {'titleId': self.webtoon_id}
        if not os.path.exists('data/{}.html'.format(self.webtoon_id)):
    
            toon_url = requests.get('http://comic.naver.com/webtoon/list.nhn', params=payload)
            with open('data/{}.html'.format(self.webtoon_id), 'wt') as f:
                f.write(toon_url.text)
                
        with open('data/{}.html'.format(self.webtoon_id), 'rt') as f:
            html = f.read()
            
            
        soup = BeautifulSoup(html, 'lxml')
        title_list = soup.select('div.detail > h2')
        self.title = title_list[0].contents[0].strip()
        
        auth_list = soup.select('span."wrt_nm"')
        self.author = auth_list[0].string.strip()
        
        self.description = soup.p.string
        
        
    def update(self):
        payload = {'titleId': self.webtoon_id}
        if not os.path.exists('data/{}.html'.format(self.webtoon_id)):
    
            toon_url = requests.get('http://comic.naver.com/webtoon/list.nhn', params=payload)
            with open('data/{}.html'.format(self.webtoon_id), 'wt') as f:
                f.write(toon_url.text)
                
        with open('data/{}.html'.format(self.webtoon_id), 'rt') as f:
            html = f.read()
        soup = BeautifulSoup(html, 'lxml')
        # image_url
        list_src = soup.select("a > img['src']")

        # 각 화의 제목
        list_of_title = soup.select('td.title > a')

        # 별점 리스트
        rating_list = soup.select('div.rating_type > strong')

        # 등록일
        date_list = soup.select('td.num')

        # no요소를 빈 리스트 안에 넣은 후 반환
        # 값이 두번씩 반환되기에 step을 두번씩 주어서 반환
        finall_list = []
        no_list = soup.select('td.title > a[href]')
        for i in range(0, len(no_list)):
            a = no_list[i].get('href')
            finall_list.append(re.findall(r'no=(.*?)&', a)[0])



        for i in range(len(list_of_title)):
            inst = Episode(self.webtoon_id, finall_list[i], list_src[i+1].get('src'), list_of_title[i].string, rating_list[i].string,date_list[i].string)
            self.episode_list.append(inst)
            

    @classmethod
    def search_webtoon(cls, webtoon_name):
        
        if not os.path.exists('data/all_webtoon.html'):
            all_webtoon_url = requests.get('http://comic.naver.com/webtoon/weekday.nhn')
            with open('data/all_webtoon.html','wt') as f:
                f.write(all_webtoon_url.text)
                
        with open('data/all_webtoon.html','rt') as f:
            html = f.read()
                
        soup = BeautifulSoup(html, 'lxml')
        
        all_webtoon_list = soup.select('div.col_inner > ul > li > a')
        all_dict_webtoon_list = []
        for i in range(len(all_webtoon_list)):
            href = all_webtoon_list[i]['href']
            titleId = re.findall('titleId=(.*?)&.*?',href)
            (all_dict_webtoon_list).append({'Title' :all_webtoon_list[i].string, 'titleId' : titleId[0]})
        
        search_list = []
        for i in range(len(all_dict_webtoon_list)):
            if webtoon_name in all_dict_webtoon_list[i]['Title']:
                search_list.append(all_dict_webtoon_list[i]['Title'])
        search_list = list(set(search_list))
        
        while True:
            for index, title in enumerate(search_list):
                print('{}. {}'.format(index+1, title))
                
            user_input = input('선택: ')
            webtoon = search_list[int(user_input)-1]
            break
        
        for i in range(len(all_dict_webtoon_list)):
            if webtoon == all_dict_webtoon_list[i]['Title']:
                return cls((all_dict_webtoon_list[i]['titleId']))

In [25]:
class EpisodeImage:
    def __init__(self, episode, url):
        self.episode = episode 
        self.url = url
        self.image_url_list = []
    
    def image_crawler(self, episode_user):

        if not os.path.exists('data/{}.html'.format(episode_user)):
            toon_url = requests.get(self.url)
            with open('data/{}.html'.format(episode_user), 'wt') as f:
                f.write(toon_url.text)

        with open('data/{}.html'.format(episode_user), 'rt') as f:
            html = f.read()

        soup = BeautifulSoup(html, 'lxml')
        list_src = soup.select('div.wt_viewer > img')

        user = EpisodeImage(self.episode, self.url)

        for i in range(len(list_src)):
            user.image_url_list.append(list_src[i]['src'])

        episode_user.image_list.append(user)
        
        

In [26]:
yumi = Webtoon(651673)

In [27]:
yumi.webtoon_id

651673

In [28]:
yumi.rework()

In [29]:
yumi.title

'유미의 세포들'

In [30]:
yumi.author

'이동건'

In [31]:
yumi.update()

In [32]:
print(yumi.episode_list)

[<__main__.Episode object at 0x10fe48be0>, <__main__.Episode object at 0x10fe48c18>, <__main__.Episode object at 0x10fe48c50>, <__main__.Episode object at 0x10fe48c88>, <__main__.Episode object at 0x10fe48cc0>, <__main__.Episode object at 0x10fe48cf8>, <__main__.Episode object at 0x10fe48d30>, <__main__.Episode object at 0x10fe48d68>, <__main__.Episode object at 0x10fe48da0>, <__main__.Episode object at 0x10fe48dd8>]


In [33]:
for episode in yumi.episode_list:
    print(episode.info)
    koko = EpisodeImage('yumi',episode.info)
    koko.image_crawler(episode)
    

http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=310
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=309
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=308
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=307
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=306
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=305
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=304
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=303
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=302
http://comic.naver.com/webtoon/detail.nhn?titleId=651673&no=301


In [34]:
for i in range(len(yumi.episode_list)):
    print(yumi.episode_list[i].image_list)

[<__main__.EpisodeImage object at 0x1101a9d68>]
[<__main__.EpisodeImage object at 0x1104585f8>]
[<__main__.EpisodeImage object at 0x10dae8588>]
[<__main__.EpisodeImage object at 0x110510a58>]
[<__main__.EpisodeImage object at 0x10fd42a58>]
[<__main__.EpisodeImage object at 0x11060acf8>]
[<__main__.EpisodeImage object at 0x110544f28>]
[<__main__.EpisodeImage object at 0x1105ea588>]
[<__main__.EpisodeImage object at 0x11078f828>]
[<__main__.EpisodeImage object at 0x10d1d5a58>]


In [35]:
yumi.episode_list[0].image_list[0]

<__main__.EpisodeImage at 0x1101a9d68>

In [36]:
yumi.episode_list[0].image_list[0].image_url_list

['http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_1.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_2.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_3.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_4.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_5.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_6.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_7.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_8.jpg',
 'http://imgcomic.naver.net/webtoon/651673/310/20180529183300_9e5b6418e84912e27232e998ae93797e_IMAG01_9.jpg',
 'http://i

In [37]:
webtoon = Webtoon.search_webtoon('탑')

1. 신의 탑
선택: 1


In [38]:
print(webtoon)

<__main__.Webtoon object at 0x11015b518>


In [39]:
webtoon.webtoon_id

'183559'

In [40]:
webtoon.rework()

In [41]:
webtoon.title

'신의 탑'

In [42]:
webtoon.author

'SIU'