# 一、安裝必要套件

pip前須加驚嘆號

`!pip install -q matplotlib-venn`

# numpy、requests、pandas、pandas_datareader、matplotlib



numpy支援高階大量的維度陣列與矩陣運算，此外也針對陣列運算提供大量的數學函式函式庫。

Requests是一個Python HTTP庫，在Apache 2許可證下發布。

pandas是一個Python語言的模組（module），是一個基於numpy為基礎的庫，他是一個能讓使用者方便讀取儲存資料的函數庫。


# 二、程式碼

# 1.抓取台股證交所每日股價資料與分析

載入所需函式庫

In [0]:
!pip install numpy
!pip install requests
!pip install pandas
!pip install pandas_datareader
!pip install matplotlib

In [0]:
import numpy as np
import requests
import pandas as pd
import datetime


格式轉換

In [0]:
#   http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=20180817&stockNo=2330  取一個月的股價與成交量
def get_stock_history(date, stock_no):
    quotes = []
    url = 'http://www.twse.com.tw/exchangeReport/STOCK_DAY?date=%s&stockNo=%s' % ( date, stock_no)
    r = requests.get(url)
    data = r.json()
    return transform(data['data'])  #進行資料格式轉換

def transform_date(date):
        y, m, d = date.split('/')
        return str(int(y)+1911) + '/' + m  + '/' + d  #民國轉西元
    
def transform_data(data):
    data[0] = datetime.datetime.strptime(transform_date(data[0]), '%Y/%m/%d')
    data[1] = int(data[1].replace(',', ''))  #把千進位的逗點去除
    data[2] = int(data[2].replace(',', ''))
    data[3] = float(data[3].replace(',', ''))
    data[4] = float(data[4].replace(',', ''))
    data[5] = float(data[5].replace(',', ''))
    data[6] = float(data[6].replace(',', ''))
    data[7] = float(0.0 if data[7].replace(',', '') == 'X0.00' else data[7].replace(',', ''))  # +/-/X表示漲/跌/不比價
    data[8] = int(data[8].replace(',', ''))
    return data

def transform(data):
    return [transform_data(d) for d in data]



按照日期進行分析

In [0]:
def create_df(date,stock_no):
    s = pd.DataFrame(get_stock_history(date, stock_no))
    s.columns = ['date', 'shares', 'amount', 'open', 'high', 'low', 'close', 'change', 'turnover']
                #"日期","成交股數","成交金額","開盤價","最高價","最低價","收盤價","漲跌價差","成交筆數" 
    stock = []
    for i in range(len(s)):
        stock.append(stock_no)
    s['stockno'] = pd.Series(stock ,index=s.index)  #新增股票代碼欄，之後所有股票進入資料表才能知道是哪一張股票
    datelist = []
    for i in range(len(s)):
        datelist.append(s['date'][i])
    s.index = datelist  #索引值改成日期
    s2 = s.drop(['date'],axis = 1)  #刪除日期欄位
    mlist = []
    for item in s2.index:
        mlist.append(item.month)
    s2['month'] = mlist  #新增月份欄位
    return s2

交易日期及代碼

In [0]:
listDji = ['2330']
for i in range(len(listDji)):
    result = create_df('20180701', listDji[i])
    print(result)
    
print(result.groupby('month').close.count())  #每個月幾個營業日
print(result.groupby('month').shares.sum())  #每個月累計成交股數

               shares       amount   open   high    low  close  change  \
2018-07-02   33496442   7257081470  218.5  219.0  214.0  214.0    -2.5   
2018-07-03   28663220   6173115899  215.5  218.0  213.5  214.5     0.5   
2018-07-04   15359295   3324927032  217.0  217.5  215.5  216.0     1.5   
2018-07-05   18225416   3904832940  214.0  215.0  213.0  214.5    -1.5   
2018-07-06   31344962   6779063394  217.5  217.5  215.0  217.0     2.5   
2018-07-09   41165793   9100880393  219.5  223.0  218.5  221.5     4.5   
2018-07-10   20196957   4498264914  223.0  224.0  222.0  222.0     0.5   
2018-07-11   19854690   4355547100  220.0  220.0  218.0  220.0    -2.0   
2018-07-12   23806051   5246628771  218.0  222.0  218.0  220.5     0.5   
2018-07-13   30419311   6799894334  222.5  224.5  222.5  224.5     4.0   
2018-07-16   16107254   3612296896  224.5  225.0  223.5  223.5    -1.0   
2018-07-17   22554436   5003028272  222.5  223.5  221.0  221.5    -2.0   
2018-07-18   45802658  10221407834  22

# 2.亞洲股市指數

安裝前需要先裝 libxml2, libxslt

$ brew install libxml2

$ brew install libxslt

$ brew link libxml2 --force

$ brew link libxslt --force
接著裝以下套件即可完工

$ pip install lxml

$ pip install html5lib

$ pip install pandas

In [0]:
import pandas as pd
url = 'http://www.stockq.org/market/asia.php'
table = pd.read_html(url)[4]
table = table.drop(table.columns[[0,1,2,3,4]],axis=0)
table = table.drop(table.columns[9:296],axis=1)
table

Unnamed: 0,0,1,2,3,4,5,6,7,8
5,紐西蘭,9498.0,24.73,0.26%,9501.57,9463.05,9477.21,7.79%,16:11
6,澳洲股市,6273.3,8.2,0.13%,6292.0,6260.2,9.88%,15:31,
7,日經225,21579.66,128.81,0.60%,21612.67,21500.32,21576.36,7.82%,11:35
8,東證一部,1609.4,6.77,0.42%,1611.71,1604.47,7.72%,12:30,
9,東證二部,6855.66,41.32,0.61%,6859.55,6833.68,6836.81,9.98%,12:30
10,JASDAQ,152.1,0.74,0.49%,152.13,151.63,11.27%,12:30,
11,韓國股市,2177.88,1.77,0.08%,2183.9,2173.21,6.70%,13:32,
12,台灣加權,10481.37,42.13,0.40%,10481.98,10445.74,7.75%,12:11,
13,台灣店頭,137.73,0.08,0.06%,138.41,137.71,11.49%,12:12,
14,上海綜合,3059.95,38.2,1.26%,3061.67,3009.51,22.70%,12:31,


# 3.台股每日爬蟲

抓取台股的資料表

In [0]:
import requests
from io import StringIO
import pandas as pd
import numpy as np

datestr = '20180131'

# 下載股價
r = requests.post('http://www.twse.com.tw/exchangeReport/MI_INDEX?response=csv&date=' + datestr + '&type=ALL')

# 整理資料，變成表格
df = pd.read_csv(StringIO("\n".join([i.translate({ord(c): None for c in ' '}) 
                                     for i in r.text.split('\n') 
                                     if len(i.split('",')) == 17 and i[0] != '='])), header=0)

選股時間，我們想要選擇 本益比 < 15 的所有股票：

In [0]:
df[pd.to_numeric(df['本益比'], errors='coerce') < 10 ]

# 4.財報爬蟲

In [0]:
import requests
import pandas as pd
import numpy as np

def financial_statement(year, season, type='綜合損益彙總表'):

    if year >= 1000:
        year -= 1911
        
    if type == '綜合損益彙總表':
        url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb04'
    elif type == '資產負債彙總表':
        url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb05'
    elif type == '營益分析彙總表':
        url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb06'
    else:
        print('type does not match')

    r = requests.post(url, {
        'encodeURIComponent':1,
        'step':1,
        'firstin':1,
        'off':1,
        'TYPEK':'sii',
        'year':str(year),
        'season':str(season),
    })
    
    r.encoding = 'utf8'
    dfs = pd.read_html(r.text)
    
    
    for i, df in enumerate(dfs):
        df.columns = df.iloc[0]
        dfs[i] = df.iloc[1:]
        
    df = pd.concat(dfs).applymap(lambda x: x if x != '--' else np.nan)
    df = df[df['公司代號'] != '公司代號']
    df = df[~df['公司代號'].isnull()]
    return df

利用Pandas輕鬆選股

In [0]:
df = financial_statement(107, 2, '營益分析彙總表')
# 我們將 “營益分析彙總表” 儲存在 df 這個變數之中
df[pd.to_numeric(df['公司代號'], errors='coerce') < 1111 ]

Unnamed: 0,公司代號,公司名稱,合計：共 895 家,毛利率(%)(營業毛利)/(營業收入),營業利益率(%)(營業利益)/(營業收入),營業收入(百萬元),稅前純益率(%)(稅前純益)/(營業收入),稅後純益率(%)(稅後純益)/(營業收入)
1,1101,台泥,,27.93,23.7,57500.25,25.11,18.39
2,1102,亞泥,,23.92,19.8,39009.09,25.22,19.95
3,1103,嘉泥,,14.47,-2.18,1000.93,21.22,44.12
4,1104,環球水泥,,11.46,3.09,2314.86,20.64,17.64
5,1108,幸福水泥,,3.7,-1.94,1684.25,-2.01,-0.48
6,1109,信大水泥,,29.27,22.5,2809.56,23.03,13.96
7,1110,東泥,,8.58,2.08,792.19,6.71,7.0
