In [28]:

from urllib.request import urlopen
from bs4 import BeautifulSoup 
from selenium import webdriver  
from selenium.webdriver.common.action_chains import ActionChains  
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from bs4.element import NavigableString
from selenium.webdriver.chrome.service import Service
import time
import re
import requests
import gc

import pandas as pd 
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt

### **1. IPO Data**
* Create IPO dataframe based on each season
* IPO Data processing : 
    * create date range for news searching
    * drop foreign company(KY)

In [15]:
IPO = pd.read_excel('IPO_RawData.xlsx')
IPO = IPO[IPO['ipo_date'] >= '2006-01-01'].reset_index(drop=True)


       name  tse_ind tse_ind_name underwrite_date underwrite_announcement  \
0   1815 富喬       28  M2328 電子零組件      2006-01-17              2006-01-05   
1  3130 一零四       30  M2330 資訊服務業      2006-02-13              2006-01-12   
2  3221 台嘉碩       27  M2327 通信網路業      2006-02-16              2006-02-09   
3  3388 崇越電       28  M2328 電子零組件      2006-02-16              2006-02-09   
4   3390 旭軟       28  M2328 電子零組件      2006-02-15              2006-02-08   

    ipo_date otc_ipo_date tse_ipo_date establish_date ipo_market  ...  \
0 2006-01-23   2006-01-23          NaT     1999-01-14        OTC  ...   
1 2006-02-17          NaT   2006-02-17     1993-10-30        TSE  ...   
2 2006-02-22   2006-02-22          NaT     1997-11-10        OTC  ...   
3 2006-02-22   2006-02-22          NaT     1994-02-21        OTC  ...   
4 2006-02-22   2006-02-22          NaT     1998-10-12        OTC  ...   

  trading_volume(k)_2 close_price_3 trading_volume(k)_3 close_price_4  \
0              1348.0    

In [16]:
def clean(df):#增加資料處裡後的欄位
    df['stock_code'] = df['name'].str[:4]
    df['name'] = df['name'].str[5:]
    df['age'] = df['ipo_date'] - IPO['establish_date']
    df['age'] = df['age']/timedelta(days=365)
    df['ipo_year'] = df['ipo_date'].dt.year
    df['allot'] = df['allot']*0.01
    df['ipo_month'] = df['ipo_date'].dt.month

clean(IPO)
IPO['KY'] = 0
IPO.loc[IPO['name'].str.contains('KY'),'KY'] = 1

IPO['underprice'] = IPO['close_price']/IPO['offer_price'] - 1

IPO['oversub'] = 1/IPO['allot'] 

# create news search range
s = [] # start date
e = [] # end date
    
for j in IPO['ipo_date']:
    s.append((j - relativedelta(months=1)).strftime("%Y-%m-%d"))
    
for i in IPO['ipo_date']:
    e.append((i- relativedelta(days=1)).strftime("%Y-%m-%d"))

IPO['start_date'] = s
IPO['end_date'] = e

# USE_CODE = IPO[IPO['KY'] == 0][['stock_code']].sort_values(by='stock_code', ascending = 'False').reset_index(drop = True)
IPO_USE = IPO[IPO['KY'] == 0].reset_index(drop = True)

In [18]:
def ipo_group(i,j):
    return IPO_USE[(IPO_USE['ipo_year'] == i) & IPO_USE['ipo_month'].isin(j)].reset_index(drop = True)

IPO_0604, IPO_0608, IPO_0612 = ipo_group(2006,list(range(1,5))), ipo_group(2006,list(range(5,9))), ipo_group(2006,list(range(9,13)))
IPO_0704, IPO_0708, IPO_0712 = ipo_group(2007,list(range(1,5))), ipo_group(2007,list(range(5,9))), ipo_group(2007,list(range(9,13)))
IPO_0804, IPO_0808, IPO_0812 = ipo_group(2008,list(range(1,5))), ipo_group(2008,list(range(5,9))), ipo_group(2008,list(range(9,13)))
IPO_0904, IPO_0908, IPO_0912 = ipo_group(2009,list(range(1,5))), ipo_group(2009,list(range(5,9))), ipo_group(2009,list(range(9,13)))
IPO_1004, IPO_1008, IPO_1012 = ipo_group(2010,list(range(1,5))), ipo_group(2010,list(range(5,9))), ipo_group(2010,list(range(9,13)))
IPO_1104, IPO_1108, IPO_1112 = ipo_group(2011,list(range(1,5))), ipo_group(2011,list(range(5,9))), ipo_group(2011,list(range(9,13)))
IPO_1204, IPO_1208, IPO_1212 = ipo_group(2012,list(range(1,5))), ipo_group(2012,list(range(5,9))), ipo_group(2012,list(range(9,13)))
IPO_1304, IPO_1308, IPO_1312 = ipo_group(2013,list(range(1,5))), ipo_group(2013,list(range(5,9))), ipo_group(2013,list(range(9,13)))
IPO_1404, IPO_1408, IPO_1412 = ipo_group(2014,list(range(1,5))), ipo_group(2014,list(range(5,9))), ipo_group(2014,list(range(9,13)))
IPO_1504, IPO_1508, IPO_1512 = ipo_group(2015,list(range(1,5))), ipo_group(2015,list(range(5,9))), ipo_group(2015,list(range(9,13)))
IPO_1604, IPO_1608, IPO_1612 = ipo_group(2016,list(range(1,5))), ipo_group(2016,list(range(5,9))), ipo_group(2016,list(range(9,13)))
IPO_1704, IPO_1708, IPO_1712 = ipo_group(2017,list(range(1,5))), ipo_group(2017,list(range(5,9))), ipo_group(2017,list(range(9,13)))
IPO_1804, IPO_1808, IPO_1812 = ipo_group(2018,list(range(1,5))), ipo_group(2018,list(range(5,9))), ipo_group(2018,list(range(9,13)))

### **2. WebCrawler Function**
* open news page
* collect news title date name for each company
* collect news content for each company
* save as excel file

In [19]:
# function to open infotimes webpage
def open_infotimes():
    driver.implicitly_wait(10)
    # maximize window
    driver.maximize_window()
    # connect to news search page of infotimes
    driver.get('http://kmw.chinatimes.com/News/NewsSearch.aspx?searchkind=a&keyword=')
    # login
    driver.find_element_by_xpath("/html/body/form/div[3]/div[1]/div[2]/p[2]/a").click()
    url = driver.current_url
    # deselect Want Daily 
    driver.find_element_by_xpath("/html/body/form/div[5]/div[1]/table/tbody/tr[1]/td[2]/span/label[4]/input").click()

In [20]:
# get news_title, date, name; use in get_news() function
def get_title_date_name(x,y):
    # title, append to title list
    for title in x.find_all(class_ = "NewsContetn"):
        title_list.append(title.text.replace("\n",""))
    # date, append to date list
    for date in x.find_all(class_ = "temp-gvList-row"):
        date_list.append(re.search('[\d]{4}/[\d]{2}/[\d]{2}', date.text.replace("\n","").replace(" ","")).group(0))
    # name, append to name list
    for i in range(len(x.find_all(class_ = "NewsContetn"))):
        name_list.append(y)

In [21]:
# get news content; use in get_news()
def get_content(i):
    # new tab
    new_tab = driver.find_element_by_xpath("/html/body/form/div[5]/div[3]/div/table/tbody/tr["+str(i)+"]/td[5]/a") 
    action = ActionChains(driver)
    action.move_to_element(new_tab).key_down(Keys.CONTROL).click(new_tab).key_up(Keys.CONTROL).perform() # new tab
    time.sleep(3) # wait for new tab 
    driver.switch_to.window(driver.window_handles[1]) # switch to new tab
    time.sleep(1)

    # get content of new tab, append to content list
    bsobj = BeautifulSoup(driver.page_source,"lxml") 
    content = bsobj.find("article", id = "dvContainer")
    # remove unwanted block
    content.style.decompose()
    content.table.decompose()
    # append to list
    content_list.append(content.get_text().replace("\n","").replace("\t","").replace("\u3000","").replace(" ",""))
    time.sleep(1)

    # close new tab
    driver.close()
    time.sleep(1)

    # switch back to search result page
    driver.switch_to.window(driver.window_handles[0])

In [22]:
# select and clear column content 
def clear(element):
    element.send_keys(Keys.CONTROL,'a')
    element.send_keys(Keys.BACKSPACE)

In [23]:
def get_news(name,start_date,end_date):
    # input search name
    search_elm = driver.find_element_by_xpath("/html/body/form[@id='aspnetForm']/div[@class='container clear-fix']/div[@class='master-content clear-fix']/table[@class='search']/tbody/tr[2]/td[2]/span[@class='auto-style2']/input[@id='txtKeyword']")
    clear(search_elm)
    search_elm.send_keys(name)
    time.sleep(1)
    
    # input search date range
    start = driver.find_element_by_id("txtSDate")
    clear(start)
    start.send_keys(u""+start_date)
    time.sleep(1)

    end = driver.find_element_by_id("txtEDate")
    clear(end)
    end.send_keys(u""+end_date)
    time.sleep(1)
    
    # click on search button
    driver.find_element_by_xpath("/html/body/form/div[5]/div[1]/input").click()                                  
    time.sleep(3)

    # Find news result number
    news = BeautifulSoup(driver.page_source, "lxml")
    time.sleep(1)
    news_num = int(news.find(id = "ctl00_ContentPlaceHolder1_UCPage1_lblRowCount").text)

    # Find news page
    if (news_num/10).is_integer():
        news_page = int(news_num/10) 
    else:
        news_page = int(news_num/10)+1
    
    # For loop to change page and get news content
    for i in range(0,news_page+1):
        # no page
        if i == 0:
            pass
        # page 1
        elif i == 1:
            get_title_date_name(news,name)
            for j in range(2,(news_num-(i*10-12))):
                if j <= 11:
                    get_content(j)
        # other page
        else:
            #click to change page
            driver.find_element_by_xpath("/html/body/form/div[5]/div[2]/table/tbody/tr/td[3]/div/span[2]/a[2]").click()
            news = BeautifulSoup(driver.page_source, "lxml")
            get_title_date_name(news,name)
            time.sleep(1)
            for j in range(2,(news_num-(i*10-12))):
                if j <= 11:
                    get_content(j)
    
    gc.collect()
    time.sleep(1)

In [24]:
def excel(title):
    df = pd.DataFrame({'name':name_list,'date':date_list,'title':title_list,'content':content_list})
    df.to_excel('./news_data/times_'+title+'.xlsx', index=False)
    return df

In [34]:
# driver = webdriver.Chrome('./chromedriver')  
service = Service(executable_path=r'C:\Users\nanan\Desktop\文字探勘\林可佳-20211122T165408Z-001\林可佳\程式碼_原始資料\0_NewsCrawler\news_crawler\chromedriver.exe')
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=service, options=options)
###driver = webdriver.Chrome(executable_path=r'/Users/user/Documents/FinancialEconometrics/Webcralwer/Ch07/chromedriver.exe')
driver.implicitly_wait(10)
driver.maximize_window()
driver.get('http://kmw.chinatimes.com/News/NewsSearch.aspx?searchkind=a&keyword=')  
driver.find_element_by_xpath("/html/body/form/div[3]/div[1]/div[2]/p[2]/a").click()
# time.sleep(5)

# 搜索新聞
url = driver.current_url
driver.implicitly_wait(10)
driver.get(url)
driver.find_element_by_xpath("/html/body/form/div[5]/div[1]/table/tbody/tr[1]/td[2]/span/label[4]/input").click()

# send in search keys
search_elm = driver.find_element_by_xpath("/html/body/form[@id='aspnetForm']/div[@class='container clear-fix']/div[@class='master-content clear-fix']/table[@class='search']/tbody/tr[2]/td[2]/span[@class='auto-style2']/input[@id='txtKeyword']")
clear(search_elm)
search_elm.send_keys(u'碩天')

start = driver.find_element_by_id("txtSDate")
clear(start)
start.send_keys(u"2009/11/23")
time.sleep(1)

end = driver.find_element_by_id("txtEDate")
clear(end)
end.send_keys(u"2009/12/22")
time.sleep(1)

#click on search button
driver.find_element_by_xpath("/html/body/form/div[5]/div[1]/input").click()                                  
time.sleep(5)

# Find news result number
news = BeautifulSoup(driver.page_source, "lxml")
time.sleep(1)
news_num = int(news.find(id = "ctl00_ContentPlaceHolder1_UCPage1_lblRowCount").text)

# Find news page
if (news_num/10).is_integer():
    news_page = int(news_num/10) 
else:
    news_page = int(news_num/10)+1

title_list = []
date_list = []
name_list = []
content_list = []

for i in range(0,news_page+1):
    # no page
    if i == 0:
        pass
    # page 1
    elif i == 1:
        # title, append to title list
        for title in news.find_all(class_ = "NewsContetn"):
            title_list.append(title.text.replace("\n",""))
        # date, append to date list
        for date in news.find_all(class_ = "temp-gvList-row"):
            text = date.text.replace("\n","").replace(" ","")
            date_list.append(re.search('[\d]{4}/[\d]{2}/[\d]{2}', text).group(0))
        # name, append to name list
        for i in range(len(news.find_all(class_ = "NewsContetn"))):
            name_list.append('碩天')
        for j in range(2,3):
            if j <= 11:
                get_content(j)
    # other page
    else:
        #click to change page
        driver.find_element_by_xpath("/html/body/form/div[5]/div[2]/table/tbody/tr/td[3]/div/span[2]/a[2]").click()
        news = BeautifulSoup(driver.page_source, "lxml")
        get_title_date_name(news,'碩天')
        time.sleep(1)
        for j in range(2,3):
            if j <= 11:
                get_content(j)

# driver.quit()

WebDriverException: Message: unknown error: cannot find Chrome binary
Stacktrace:
Backtrace:
	Ordinal0 [0x00700AE3+1706723]
	Ordinal0 [0x006668C1+1075393]
	Ordinal0 [0x005DDFA9+516009]
	Ordinal0 [0x0056C70E+50958]
	Ordinal0 [0x0058BFD7+180183]
	Ordinal0 [0x0058BDDD+179677]
	Ordinal0 [0x00589D4B+171339]
	Ordinal0 [0x00571D4A+73034]
	Ordinal0 [0x00572DC0+77248]
	Ordinal0 [0x00572D59+77145]
	Ordinal0 [0x0067BB37+1162039]
	GetHandleVerifier [0x0079A7C6+508998]
	GetHandleVerifier [0x0079A504+508292]
	GetHandleVerifier [0x007AF617+594583]
	GetHandleVerifier [0x0079B036+511158]
	Ordinal0 [0x00673FFC+1130492]
	Ordinal0 [0x0067D49B+1168539]
	Ordinal0 [0x0067D603+1168899]
	Ordinal0 [0x00695B05+1268485]
	BaseThreadInitThunk [0x74FAFCC9+25]
	RtlGetAppContainerNamedObjectPath [0x770A7C5E+286]
	RtlGetAppContainerNamedObjectPath [0x770A7C2E+238]


* 2006

In [35]:
IPO_0604_drop = IPO_0604[~IPO_0604['name'].isin(['一零四'])].reset_index(drop = True)

In [6]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0604_drop['name'], IPO_0604_drop['start_date'], IPO_0604_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_0604 = excel('0604')

In [37]:
IPO_0608_drop = IPO_0608[~IPO_0608['name'].isin(['中天'])].reset_index(drop = True)

In [38]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0608_drop['name'], IPO_0608_drop['start_date'], IPO_0608_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_0608 = excel('0608')

In [39]:
IPO_0612_drop = IPO_0612[~IPO_0612['name'].isin(['創意'])].reset_index(drop = True)

In [40]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0612_drop['name'], IPO_0612_drop['start_date'], IPO_0612_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_0612 = excel('0612')

* 2007

In [41]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0704['name'], IPO_0704['start_date'], IPO_0704['end_date']):
    get_news(i,j,k)

driver.quit()
df_0704 = excel('0704')

In [42]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0708['name'], IPO_0708['start_date'], IPO_0708['end_date']):
    get_news(i,j,k)

driver.quit()
df_0708 = excel('0708')

In [43]:
IPO_0712_drop = IPO_0712[~IPO_0712['name'].isin(['無敵'])].reset_index(drop = True)

In [44]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0712_drop['name'], IPO_0712_drop['start_date'], IPO_0712_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_0712 = excel('0712')

* 2008

In [45]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0804['name'], IPO_0804['start_date'], IPO_0804['end_date']):
    get_news(i,j,k)

driver.quit()
df_0804 = excel('0804')

In [46]:
IPO_0808_drop = IPO_0808[~IPO_0808['name'].isin(['進階'])].reset_index(drop = True)

In [47]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0808_drop['name'], IPO_0808_drop['start_date'], IPO_0808_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_0808 = excel('0808')

In [48]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0812['name'], IPO_0812['start_date'], IPO_0812['end_date']):
    get_news(i,j,k)

driver.quit()
df_0812 = excel('0812')

* 2009

In [49]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0904['name'], IPO_0904['start_date'], IPO_0904['end_date']):
    get_news(i,j,k)

driver.quit()
df_0904 = excel('0904')

In [50]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0908['name'], IPO_0908['start_date'], IPO_0908['end_date']):
    get_news(i,j,k)

driver.quit()
df_0908 = excel('0908')

In [51]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_0912['name'], IPO_0912['start_date'], IPO_0912['end_date']):
    get_news(i,j,k)

driver.quit()
df_0912 = excel('0912')

* 2010

In [37]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1004['name'], IPO_1004['start_date'], IPO_1004['end_date']):
    get_news(i,j,k)

driver.quit()
df_1004 = excel('1004')

In [38]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1008['name'], IPO_1008['start_date'], IPO_1008['end_date']):
    get_news(i,j,k)

driver.quit()
df_1008 = excel('1008')

In [39]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1012['name'], IPO_1012['start_date'], IPO_1012['end_date']):
    get_news(i,j,k)

driver.quit()
df_1012 = excel('1012')

* 2011

In [40]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1104['name'], IPO_1104['start_date'], IPO_1104['end_date']):
    get_news(i,j,k)

driver.quit()
df_1104 = excel('1104')

In [41]:
IPO_1108_drop = IPO_1108[~IPO_1108['name'].isin(['辣椒'])].reset_index(drop = True)

In [42]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1108_drop['name'], IPO_1108_drop['start_date'], IPO_1108_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1108 = excel('1108')

In [43]:
IPO_1112_drop = IPO_1112[~IPO_1112['name'].isin(['合一','傳奇','安心','基亞'])].reset_index(drop = True)

In [44]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1112_drop['name'], IPO_1112_drop['start_date'], IPO_1112_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1112 = excel('1112')

* 2012

In [45]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1204['name'], IPO_1204['start_date'], IPO_1204['end_date']):
    get_news(i,j,k)

driver.quit()
df_1204 = excel('1204')

In [46]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1208['name'], IPO_1208['start_date'], IPO_1208['end_date']):
    get_news(i,j,k)

driver.quit()
df_1208 = excel('1208')

In [36]:
IPO_1212_drop = IPO_1212[~IPO_1212['name'].isin(['惠普'])].reset_index(drop = True)

In [47]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1212_drop['name'], IPO_1212_drop['start_date'], IPO_1212_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1212 = excel('1212')

* 2013

In [75]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1304['name'], IPO_1304['start_date'], IPO_1304['end_date']):
    get_news(i,j,k)

driver.quit()
df_1304 = excel('1304')

In [76]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1308['name'], IPO_1308['start_date'], IPO_1308['end_date']):
    get_news(i,j,k)

driver.quit()
df_1308 = excel('1308')

In [77]:
IPO_1312_drop = IPO_1312[~IPO_1312['name'].isin(['大量'])].reset_index(drop = True)

In [78]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1312_drop['name'], IPO_1312_drop['start_date'], IPO_1312_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1312 = excel('1312')

* 2014

In [79]:
IPO_1404_drop = IPO_1404[~IPO_1404['name'].isin(['數字'])].reset_index(drop = True)

In [80]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1404_drop['name'], IPO_1404_drop['start_date'], IPO_1404_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1404 = excel('1404')

In [81]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1408['name'], IPO_1408['start_date'], IPO_1408['end_date']):
    get_news(i,j,k)

driver.quit()
df_1408 = excel('1408')

In [82]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1412['name'], IPO_1412['start_date'], IPO_1412['end_date']):
    get_news(i,j,k)

driver.quit()
df_1412 = excel('1412')

* 2015

In [83]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1504['name'], IPO_1504['start_date'], IPO_1504['end_date']):
    get_news(i,j,k)

driver.quit()
df_1504 = excel('1504')

In [84]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1508['name'], IPO_1508['start_date'], IPO_1508['end_date']):
    get_news(i,j,k)

driver.quit()
df_1508 = excel('1508')

In [85]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1512['name'], IPO_1512['start_date'], IPO_1512['end_date']):
    get_news(i,j,k)

driver.quit()
df_1512 = excel('1512')

* 2016

In [87]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1604['name'], IPO_1604['start_date'], IPO_1604['end_date']):
    get_news(i,j,k)

driver.quit()
df_1604 = excel('1604')

In [88]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1608['name'], IPO_1608['start_date'], IPO_1608['end_date']):
    get_news(i,j,k)

driver.quit()
df_1608 = excel('1608')

In [89]:
IPO_1612_drop = IPO_1612[~IPO_1612['name'].isin(['互動','創業家'])].reset_index(drop = True)

In [90]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1612_drop['name'], IPO_1612_drop['start_date'], IPO_1612_drop['end_date']):
    get_news(i,j,k)

driver.quit()
df_1612 = excel('1612')

* 2017

In [27]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1704['name'], IPO_1704['start_date'], IPO_1704['end_date']):
    get_news(i,j,k)

driver.quit()
df_1704 = excel('1704')

In [14]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1708['name'], IPO_1708['start_date'], IPO_1708['end_date']):
    get_news(i,j,k)

driver.quit()
df_1708 = excel('1708')

In [15]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1712['name'], IPO_1712['start_date'], IPO_1712['end_date']):
    get_news(i,j,k)

driver.quit()
df_1712 = excel('1712')

* 2018

In [16]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1804['name'], IPO_1804['start_date'], IPO_1804['end_date']):
    get_news(i,j,k)

driver.quit()
df_1804 = excel('1804')

In [17]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1808['name'], IPO_1808['start_date'], IPO_1808['end_date']):
    get_news(i,j,k)

driver.quit()
df_1808 = excel('1808')

In [18]:
driver = webdriver.Chrome('./chromedriver')  
open_infotimes()

title_list = []
date_list = []
name_list = []
content_list = []

for i,j,k in zip(IPO_1812['name'], IPO_1812['start_date'], IPO_1812['end_date']):
    get_news(i,j,k)

driver.quit()
df_1812 = excel('1812')