## Redis Server

```
sudo apt-get update
sudo apt-get install redis-server
```

or

```
wget http://download.redis.io/releases/redis-6.0.8.tar.gz
tar xzf redis-6.0.8.tar.gz
cd redis-6.0.8
make
```

```
pip install redis==3.5.3
REDIS_CONNECTION_URL=redis://127.0.0.1:6379
```

In [2]:
import redis
redis_ip = 'redis://127.0.0.1:6379'
redis_client = redis.StrictRedis.from_url(redis_ip)

---

## Main

In [119]:
import time
import json
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from datetime import timedelta
from selenium import webdriver

In [5]:
driver = webdriver.PhantomJS(executable_path='./phantomjs-2.1.1-macosx/bin/phantomjs')  # PhantomJs



In [None]:
stock_id = '00878'
url = 'https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID={}'.format(stock_id)
driver.get(url)
soup = BeautifulSoup(driver.page_source, 'html.parser')

In [7]:
def get_name(soup, stock_id):
    raw_id_name = soup.find(
        name="a",
        class_="link_blue",
        href='StockDetail.asp?STOCK_ID={}'.format(stock_id),
    ).text
    
    splitter = raw_id_name.find('\xa0')
    parsed_id, name = raw_id_name[:splitter], raw_id_name[splitter+1:]
    assert parsed_id == stock_id, 'Parsed stock_id not match: got {} v.s. {}'.format(parsed_id, stock_id)
    return name

In [8]:
def get_beta(soup, stock_id):
    raw_beta = soup.find(
        name="table",
        class_='solid_1_padding_4_2_tbl',
        style='font-size:11pt;line-height:20px;',
    ).find_all('td')
    
    header = raw_beta[0]
    assert header.find('nobr').text == '風險係數', 'soup parser error, 風險指數 not found in {}'.format(header)

    num_interval = len(raw_beta) // 2
    intervals = raw_beta[1:num_interval+1]
    values = raw_beta[num_interval+1:]
    
    res = {}
    interval_to_numeric_map = {
        '5日': '5', '10日': '10',
        '一個月': '20', '三個月': '60',
        '半年': '120', '一年': '240', '三年': '720', 
        '五年': '1200', '十年': '2400', '二十年': '4800',
    }
    for interval, value in zip(intervals, values):
        res[interval_to_numeric_map[interval.find('nobr').text]] = value.text
        
    return res

In [154]:
def get_stock_info(stock_id, check_redis=True):
    now_date = datetime.now()
    
    if now_date.hour <= 13:
        last_market_closed = (now_date - timedelta(days=1)).replace(hour=14, minute=0, second=0)
    else:
        last_market_closed = now_date.replace(hour=14, minute=0, second=0)
    
    if check_redis and redis_client.exists(stock_id):
        res = eval(redis_client.get(stock_id).decode())
        update_date = datetime.strptime(res['update_date'], "%Y/%m/%d %H:%M")
        print(update_date)
        print(last_market_closed)
        if update_date > last_market_closed:
            return res
        
    print('Get data from web crawler')
    url = 'https://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID={}'.format(stock_id)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    res = {}
    res['id'] = stock_id
    res['name'] = get_name(soup, stock_id)
    res['beta'] = get_beta(soup, stock_id)
    res['update_date'] = datetime.now().strftime("%Y/%m/%d %H:%M")
    
    redis_client.set(stock_id, json.dumps(res, ensure_ascii=True))

    return res

In [155]:
start = time.time()
    
stock_id = '2330'
res = get_stock_info(stock_id, check_redis=True)

print('Time cost: {:.3f}s'.format(time.time() - start))
print(res)

2021-01-23 14:01:00
2021-01-23 14:00:00.330857
Time cost: 0.003s
{'id': '2330', 'name': '台積電', 'beta': {'5': '1.63', '10': '1.4', '20': '1.27', '60': '1.47', '120': '1.57', '240': '1.21', '720': '1.34', '1200': '1.35', '2400': '1.21', '4800': '1.14'}, 'update_date': '2021/01/23 14:01'}


In [157]:
get_stock_info('2754', check_redis=True)

2021-01-23 14:02:00
2021-01-23 14:00:00.326598


{'id': '2754',
 'name': '亞洲藏壽司',
 'beta': {'5': '1.26', '10': '0.86', '20': '0.85', '60': '0.73'},
 'update_date': '2021/01/23 14:02'}