In [None]:
!scrapy startproject stock

In [2]:
%%writefile stock/stock/items.py
import scrapy


class StockItem(scrapy.Item):
    title = scrapy.Field()
    news = scrapy.Field()
    news_link = scrapy.Field()
    date = scrapy.Field()
    

Overwriting stock/stock/items.py


In [7]:
%%writefile stock/stock/spiders/spider.py
import scrapy
import re 
from stock.items import StockItem

class StockSpider(scrapy.Spider):
    name = "Stock"
    
    def start_requests(self):
        codes = ["060310", "095570"]
        urls = [f"https://finance.naver.com/item/news_news.nhn?code={code}&page=&sm=title_entity_id.basic" for code in codes]
        for url in urls:
            yield scrapy.Request(url, callback=self.parse)
            
    def parse(self, response):
        page_links = response.xpath('/html/body/div/table[2]/tr/td/a/@href').extract()
        last_page = re.findall('page=([0-9]{1,4})', page_links[-1])[0]
        stock_url = str(response.url)
        for page in range(1,int(last_page)+1):
            url = stock_url[:-25] + str(page) + stock_url[-25:]
            yield scrapy.Request(url, callback=self.parse_content1)  
    
    def parse_content1(self, response):
        links = response.xpath('/html/body/div/table[1]/tbody/tr/td[1]/a/@href').extract()
        for link in links:
            yield scrapy.Request("https://finance.naver.com/" + link, callback=self.parse_content2)
        
    def parse_content2(self, response):
        item = StockItem()
        item["title"] = response.xpath('//*[@id="content"]/div[2]/table/tbody/tr[1]/th/strong/text()').extract()
        item["news"] = response.xpath('//*[@id="content"]/div[2]/table/tbody/tr[2]/th/span/text()').extract()
        item["date"] = response.xpath('//*[@id="content"]/div[2]/table/tbody/tr[2]/th/span/span/text()').extract()
        item["news_link"] = response.url
        yield item    

Overwriting stock/stock/spiders/spider.py


In [8]:
%%writefile run.sh
cd stock
rm stock.csv
scrapy crawl Stock -o stock.csv

Overwriting run.sh


In [9]:
!source run.sh

2021-06-03 02:47:16 [scrapy.utils.log] INFO: Scrapy 2.5.0 started (bot: stock)
2021-06-03 02:47:16 [scrapy.utils.log] INFO: Versions: lxml 4.6.3.0, libxml2 2.9.10, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 21.2.0, Python 3.8.5 (default, Mar 30 2021, 06:19:28) - [GCC 7.5.0], pyOpenSSL 20.0.1 (OpenSSL 1.1.1k  25 Mar 2021), cryptography 3.4.7, Platform Linux-5.4.0-1048-aws-x86_64-with-glibc2.27
2021-06-03 02:47:16 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.epollreactor.EPollReactor
2021-06-03 02:47:16 [scrapy.crawler] INFO: Overridden settings:
{'BOT_NAME': 'stock',
 'NEWSPIDER_MODULE': 'stock.spiders',
 'SPIDER_MODULES': ['stock.spiders']}
2021-06-03 02:47:16 [scrapy.extensions.telnet] INFO: Telnet Password: 932b77f401ecddd4
2021-06-03 02:47:16 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.extensions.feedexport.FeedExporter',
 

2021-06-03 02:47:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0003796047&office_id=011&code=060310&page=1&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=060310&page=1&sm=title_entity_id.basic)
2021-06-03 02:47:17 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004381756&office_id=015&code=060310&page=2&sm=title_entity_id.basic>
{'date': [' 2020.07.16 13:37'],
 'news': ['한국경제 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004381756&office_id=015&code=060310&page=2&sm=title_entity_id.basic',
 'title': ["'3S' 10% 이상 상승, 전일 외국인 대량 순매수"]}
2021-06-03 02:47:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004868856&office_id=018&code=060310&page=1&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=060310&page=1&sm=t

2021-06-03 02:47:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004750533&office_id=009&code=095570&page=2&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=095570&page=2&sm=title_entity_id.basic)
2021-06-03 02:47:17 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004538913&office_id=014&code=095570&page=2&sm=title_entity_id.basic>
{'date': [' 2020.12.01 18:08'],
 'news': ['파이낸셜뉴스 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004538913&office_id=014&code=095570&page=2&sm=title_entity_id.basic',
 'title': ['[공시]AJ네트웍스, 단기차입금 500억 증가']}
2021-06-03 02:47:17 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004823112&office_id=277&code=095570&page=2&sm=title_entity_id.basic>
{'date': [' 2021.01.05 17:08'],
 'news': ['아시아경제 '],
 'news_link': 'https://fina

2021-06-03 02:47:17 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004373056&office_id=015&code=095570&page=4&sm=title_entity_id.basic>
{'date': [' 2020.07.03 09:38'],
 'news': ['한국경제 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004373056&office_id=015&code=095570&page=4&sm=title_entity_id.basic',
 'title': ["'AJ네트웍스' 20% 이상 상승, 주가 60일 이평선 상회, 단기·중기 이평선 역배열"]}
2021-06-03 02:47:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004508917&office_id=015&code=095570&page=1&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=095570&page=1&sm=title_entity_id.basic)
2021-06-03 02:47:17 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004595845&office_id=014&code=095570&page=1&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code

2021-06-03 02:47:18 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004887793&office_id=018&code=095570&page=1&sm=title_entity_id.basic>
{'date': [' 2021.03.29 18:24'],
 'news': ['이데일리 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004887793&office_id=018&code=095570&page=1&sm=title_entity_id.basic',
 'title': ['AJ네트웍스, 완전자회사 AJ엠·AJ이앤에스 흡수합병']}
2021-06-03 02:47:18 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004498341&office_id=014&code=095570&page=3&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=095570&page=3&sm=title_entity_id.basic)
2021-06-03 02:47:18 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://finance.naver.com//item/news_read.nhn?article_id=0004776844&office_id=018&code=095570&page=3&sm=title_entity_id.basic> (referer: https://finance.naver.com/item/news_news.nhn?code=095570&page=3&sm=t

2021-06-03 02:47:18 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004776844&office_id=018&code=095570&page=3&sm=title_entity_id.basic>
{'date': [' 2020.11.04 09:26'],
 'news': ['이데일리 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004776844&office_id=018&code=095570&page=3&sm=title_entity_id.basic',
 'title': ['[특징주]AJ네트웍스, 파렛트 사업부 인적분할에 ‘급등’']}
2021-06-03 02:47:18 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=0004688106&office_id=009&code=095570&page=3&sm=title_entity_id.basic>
{'date': [' 2020.11.03 18:13'],
 'news': ['매일경제 '],
 'news_link': 'https://finance.naver.com//item/news_read.nhn?article_id=0004688106&office_id=009&code=095570&page=3&sm=title_entity_id.basic',
 'title': ['AJ네트웍스, 파렛트 사업부 인적분할…전문성 강화']}
2021-06-03 02:47:18 [scrapy.core.scraper] DEBUG: Scraped from <200 https://finance.naver.com//item/news_read.nhn?article_id=00044383

In [10]:
pd.read_csv("stock/stock.csv")

Unnamed: 0,date,news,news_link,title
0,2020.06.16 18:25,이데일리,https://finance.naver.com//item/news_read.nhn?...,"3S, 26억 전기車 배터리 성능시험장치 공급 계약"
1,2020.07.16 13:37,한국경제,https://finance.naver.com//item/news_read.nhn?...,"'3S' 10% 이상 상승, 전일 외국인 대량 순매수"
2,2020.06.16 15:55,아시아경제,https://finance.naver.com//item/news_read.nhn?...,"3S, 26억원 규모 전기차 배터리 성능시험장치 공급계약"
3,2020.06.04 18:23,이데일리,https://finance.naver.com//item/news_read.nhn?...,3S “박모 대표 2심 판결에서 징역 2년 선고돼”
4,2020.06.16 15:41,한국경제,https://finance.naver.com//item/news_read.nhn?...,3S 수주공시 - 전기자동차용 배터리 성능시험장치 26.4억원 (매출액대비 7.38%)
...,...,...,...,...
61,2020.07.01 14:14,한국경제,https://finance.naver.com//item/news_read.nhn?...,"1년새 주가 반토막 난 AJ네트웍스, 회사채 투자자 관심 붙들까"
62,2020.07.03 09:20,이데일리,https://finance.naver.com//item/news_read.nhn?...,"[특징주]AJ네트웍스, AJ셀카 매각설에 급등"
63,2020.06.26 09:24,헤럴드경제,https://finance.naver.com//item/news_read.nhn?...,A급 현대일렉트릭·BBB급 AJ네트웍스 회사채 발행 추진
64,2020.07.03 09:09,매일경제,https://finance.naver.com//item/news_read.nhn?...,"[특징주] AJ네트웍스, AJ셀카 매각설에 급등"
