#### Wavve

---

In [2]:
import requests
import pandas as pd
import json
from bs4 import BeautifulSoup

#### 1. 프로젝트 생성

In [4]:
!scrapy startproject wavve

New Scrapy project 'wavve', using template directory '/home/ubuntu/.pyenv/versions/3.6.9/envs/python3/lib/python3.6/site-packages/scrapy/templates/project', created in:
    /home/ubuntu/python3/notebook/crawling/wavve

You can start your first spider with:
    cd wavve
    scrapy genspider example example.com


In [6]:
!tree wavve

/bin/sh: 1: tree: not found


#### 2. item.py 수정

In [10]:
%%writefile wavve/wavve/items.py
import scrapy

class WavveItem(scrapy.Item):
    title = scrapy.Field()
    episode = scrapy.Field()
    date = scrapy.Field()

Overwriting wavve/wavve/items.py


#### 3. spider.py 생성

In [32]:
%%writefile ./wavve/wavve/spiders/spider.py

import scrapy
import json

from wavve.items import WavveItem

class Spider(scrapy.Spider):
    
    name = 'Wavve'
    
    def __init__(self, **kwargs):
        self.base_url = "https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre={}".format(kwargs["category"])
        self.start_urls = []
        for page in range(1,11):
            offset = (page - 1) *20
            
            self.start_urls.append(self.base_url + f"&limit=20&offset={offset}&orderby=viewtime&page={page}&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto")

        super().__init__(**kwargs)
        
        
    def start_requests(self):
        for url in self.start_urls:
            yield scrapy.Request(url = url, callback=self.get_content)
            
    def get_content(self, response):
        for data in json.loads(response.body_as_unicode())['cell_toplist']['celllist']:
            title = data['title_list'][0]['text']
            count = data['title_list'][1]['text'].split('$')[0]
            try :
                date = data['title_list'][1]['text'].split('$')[2]
            except:
                count = 0
                date = data['title_list'][1]['text'].split('$')[0]
            
            item = WavveItem()
            item['title'] = title
            item['episode'] = count
            item['date'] = date
            
            yield item
        

Overwriting ./wavve/wavve/spiders/spider.py


In [39]:
%%writefile wavve/run.sh
cd wavve
scrapy crawl Wavve -o wavve_all.csv -a category=all
scrapy crawl Wavve -o wavve_drama.csv -a category=01
scrapy crawl Wavve -o wavve_ent.csv -a category=02

Overwriting wavve/run.sh


In [14]:
!chmod 764 wavve/run.sh

In [15]:
!ls -al wavve/run.sh

-rwxrw-r-- 1 ubuntu ubuntu 165 Mar 18 12:08 wavve/run.sh


In [40]:
%%writefile ./wavve/wavve/mongodb.py
import pymongo

client = pymongo.MongoClient('mongodb://test:testpw@15.165.136.173:27017')
db = client.wavve_server
collection = db.items

Overwriting ./wavve/wavve/mongodb.py


In [41]:
%%writefile ./wavve/wavve/pipelines.py
from .mongodb import collection

class WavvePipeline(object):
    def process_item(self, item, spider):
        data = {
            
            'title' : item['title'],
            'count' : item['episode'],
            'date' : item['date'],

        }
        
        collection.insert(data)
        
        return item

Overwriting ./wavve/wavve/pipelines.py


In [23]:
!echo "ITEM_PIPELINES = {\n" >> wavve/wavve/settings.py '\n wavve.pipelines.WavvePipeline' : 300, "}" >> wavve/wavve/settings.py

In [25]:
!tail -n 5 wavve/wavve/settings.py

#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
ITEM_PIPELINES = {
 'wavve.pipelines.WavvePipeline' : 300, }


In [53]:
!./wavve/run.sh

2020-03-18 12:17:46 [scrapy.utils.log] INFO: Scrapy 1.8.0 started (bot: wavve)
2020-03-18 12:17:46 [scrapy.utils.log] INFO: Versions: lxml 4.5.0.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 19.10.0, Python 3.6.9 (default, Jan 17 2020, 13:17:41) - [GCC 7.4.0], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d  10 Sep 2019), cryptography 2.8, Platform Linux-4.15.0-1060-aws-x86_64-with-debian-buster-sid
2020-03-18 12:17:46 [scrapy.crawler] INFO: Overridden settings: {'BOT_NAME': 'wavve', 'FEED_FORMAT': 'csv', 'FEED_URI': 'wavve_all.csv', 'NEWSPIDER_MODULE': 'wavve.spiders', 'SPIDER_MODULES': ['wavve.spiders']}
2020-03-18 12:17:46 [scrapy.extensions.telnet] INFO: Telnet Password: 93d7d4fe7fcdb769
2020-03-18 12:17:46 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-03-

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=20&orderby=viewtime&page=2&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-17(화)', 'episode': '101회 ', 'title': '꽃길만 걸어요'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-14(토)', 'episode': '324회 ', 'title': '강적들'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=160&orderby=viewtime&page=9&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2013-06-26(수)', 'episode': '7회 ', 'title': '너의 목소리가 들려'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2016-02-21(일)', 'episode': '49회 ', 'title': '내딸, 금사월'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://api

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-15(일)', 'episode': '430회 ', 'title': '이제 만나러 갑니다'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=60&orderby=viewtime&page=4&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2013-12-18(수)', 'episode': '1회 ', 'title': '별에서 온 그대'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://api

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2017-03-08(수)', 'episode': '13회 ', 'title': '김과장'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2016-11-24(목)', 'episode': '4회 ', 'title': '역도요정 김복주'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': '2020-03-17(화)', 'episode': 0, 'title': 'KBS 뉴스9'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=60&orderby=viewtime&page=4&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-15(일)', 'episode': '246회 ', 'title': '복면가왕'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr

2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=180&orderby=viewtime&page=10&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2016-08-21(일)', 'episode': '54회 ', 'title': '아이가 다섯'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=all&limit=20&offset=180&orderby=viewtime&page=10&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2019-11-27(수)', 'episode': '1-2회 ', 'title': '하자있는 인간들'}
2020-03-18 12:17:47 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.p

2020-03-18 12:17:48 [scrapy.middleware] INFO: Enabled item pipelines:
['wavve.pipelines.WavvePipeline']
2020-03-18 12:17:48 [scrapy.core.engine] INFO: Spider opened
2020-03-18 12:17:48 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2020-03-18 12:17:48 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
2020-03-18 12:17:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=20&orderby=viewtime&page=2&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto> (referer: None)
2020-03-18 12:17:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=80&orderby=v

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2019-11-27(수)', 'episode': '1-2회 ', 'title': '하자있는 인간들'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2019-03-16(토)', 'episode': '103-104회 ', 'title': '주말 드라마 하나뿐인 내편'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 htt

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=140&orderby=viewtime&page=8&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2011-02-02(수)', 'episode': '9회 ', 'title': '싸인'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=140&orderby=viewtime&page=8&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2010-06-20(일)', 'episode': '7회 ', 'title': '김수로'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=40&orderby=viewtime&page=3&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-01-25(토)', 'episode': '11회 ', 'title': '간택-여인들의 전쟁'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=40&orderby=viewtime&page=3&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2011-11-24(목)', 'episode': '16회 ', 'title': '뿌리깊은 나무'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.p

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2018-07-25(수)', 'episode': '1회 ', 'title': '친애하는 판사님께'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2017-11-20(월)', 'episode': '35회 ', 'title': '사랑의 온도'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=40&orderby=viewtime&page=3&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2010-04-06(화)', 'episode': '6회 ', 'title': '동이'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=0&orderby=viewtime&page=1&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2017-06-13(화)', 'episode': '8회 ', 'title': '쌈, 마이웨이'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/c

2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=180&orderby=viewtime&page=10&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2019-03-25(월)', 'episode': '25-26회 ', 'title': '아이템'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=01&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2019-05-22(수)', 'episode': '1-2회 ', 'title': '수목 드라마 단, 하나의 사랑'}
2020-03-18 12:17:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https

2020-03-18 12:17:50 [scrapy.utils.log] INFO: Scrapy 1.8.0 started (bot: wavve)
2020-03-18 12:17:50 [scrapy.utils.log] INFO: Versions: lxml 4.5.0.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.5.2, w3lib 1.21.0, Twisted 19.10.0, Python 3.6.9 (default, Jan 17 2020, 13:17:41) - [GCC 7.4.0], pyOpenSSL 19.1.0 (OpenSSL 1.1.1d  10 Sep 2019), cryptography 2.8, Platform Linux-4.15.0-1060-aws-x86_64-with-debian-buster-sid
2020-03-18 12:17:50 [scrapy.crawler] INFO: Overridden settings: {'BOT_NAME': 'wavve', 'FEED_FORMAT': 'csv', 'FEED_URI': 'wavve_ent.csv', 'NEWSPIDER_MODULE': 'wavve.spiders', 'SPIDER_MODULES': ['wavve.spiders']}
2020-03-18 12:17:50 [scrapy.extensions.telnet] INFO: Telnet Password: 74c08681d0d9aa66
2020-03-18 12:17:50 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-03-

2020-03-18 12:17:50 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=40&orderby=viewtime&page=3&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-13(금)', 'episode': '18회 ', 'title': '편애중계'}
2020-03-18 12:17:50 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=40&orderby=viewtime&page=3&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-17(화)', 'episode': '271회 ', 'title': '나는 몸신이다'}
2020-03-18 12:17:50 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.c

2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=60&orderby=viewtime&page=4&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2012-01-14(토)', 'episode': '3회 ', 'title': '<무한도전 스페셜> 무한상사 모음'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=60&orderby=viewtime&page=4&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-15(일)', 'episode': '230회 ', 'title': '모란봉 클럽'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https:/

2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2016-01-31(일)', 'episode': '2회 ', 'title': '신비한 TV 서프라이즈 레전드 다시보기'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=160&orderby=viewtime&page=9&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2016-04-02(토)', 'episode': '1회 ', 'title': '꽃미남브로맨스'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 htt

2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=80&orderby=viewtime&page=5&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2011-12-03(토)', 'episode': '3회 ', 'title': '<무한도전 레전드> 캐릭터쇼'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=180&orderby=viewtime&page=10&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2018-03-13(화)', 'episode': '3회 ', 'title': '하룻밤만 재워줘'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://

2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=20&orderby=viewtime&page=2&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-16(월)', 'episode': '4회 ', 'title': '오지GO'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=20&orderby=viewtime&page=2&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2020-03-14(토)', 'episode': '32회 ', 'title': '자연스럽게'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr

2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=140&orderby=viewtime&page=8&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2012-11-27(화)', 'episode': '139회 ', 'title': '김승우의 승승장구'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://apis.pooq.co.kr/cf/vod/popularcontents?WeekDay=all&broadcastid=6339&came=broadcast&contenttype=vod&genre=02&limit=20&offset=100&orderby=viewtime&page=6&uiparent=GN2-VN2&uirank=2&uitype=VN2&apikey=E5F3E0D30947AA5440556471321BB6D9&credential=none&device=pc&drm=wm&partner=pooq&pooqzone=none&region=kor&targetage=auto>
{'date': ' 2013-01-26(토)', 'episode': '1회 ', 'title': '인간의 조건 - 집으로'}
2020-03-18 12:17:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://

In [54]:
categories = {'all' : 'all' , '01' : 'drama' ,'02' : 'ent'}

In [55]:
dfs = [pd.read_csv("wavve/wavve_{}.csv".format(category)) for category in categories.values()]

In [56]:
[(category, len(df)) for category, df in zip(categories.values(), dfs)]

[('all', 200), ('drama', 200), ('ent', 200)]

In [57]:
result_df = pd.concat(dfs, ignore_index=True)
result_df.tail(2)

Unnamed: 0,date,episode,title
598,2004-01-17(토),11회,X맨 1
599,2020-01-08(수),11회,팔로우 미 시즌12


In [58]:
import pymongo

client = pymongo.MongoClient('mongodb://test:testpw@15.165.136.173:27017')
db = client.wavve_server
collection = db.items

In [59]:
datas = collection.find({}, {"_id": False})

In [60]:
pd.DataFrame(datas).tail(2)

Unnamed: 0,count,date,title
598,11회,2004-01-17(토),X맨 1
599,11회,2020-01-08(수),팔로우 미 시즌12
