# 價值股策略

In [6]:
import pandas as pd
import numpy as np
import pymongo
from typing import Union
import datetime
from tqdm import tqdm
import matplotlib.pyplot as plt


### Class預期
1. 關於資料的讀取 - 最後都要組成單檔股票為一個sheet來處理
    * 抓取資料後存到dict? 或是存成numpy? 也可以存在pandas(每個資料一個sheet) - 以存到numpy為主要測試方向(快很多)
    * 確定numpy沒辦法用標籤對齊，可先用熊貓組成資料後對齊，最後轉成numpy再來運算(避免錯誤)
2. 關於資料的改動
    * 需要用到的資料最後變成每個資料一個sheet，捨棄不需要用的資料省下記憶體與增加速度 - 要記得思考要如何同時跟其他股票在同一時間判斷進出場(資金控管需求)
3. 關於回測
    * 關於回測的預期: return 損益、部位以及transactions - return、position index=datetime, columns=symbols+cash

In [54]:
class Base(object):
    def __init__(self, client: pymongo.MongoClient):
        self._config()
        self.client = client


    def _config(self):
        # 設定繪圖、TQDM讀取條、小數點位數、顯示視窗長度
        pd.options.plotting.backend = "matplotlib"
        tqdm.pandas(desc="progress-bar")
        plt.rcParams['font.family'] = ['Microsoft JhengHei'] # 中文標籤
        plt.rcParams['axes.unicode_minus'] = False # 負號
        pd.set_option('display.max_rows', 200)
        pd.set_option('display.float_format', lambda x: '%.3f' % x)
        pd.options.display.float_format = '{:,.4f}'.format

class DataCenter(Base):
    def __init__(self, client: pymongo.MongoClient):
        super().__init__(client)
        self.data = {}
        self.factors = {}

    def get_from_mongo(self, elements: Union[str, list], db:str='Fields',
     start: Union[datetime.datetime, None]=None, 
     end: Union[datetime.datetime, None]=None):
        """
        elements: Element, 表示要在Mongo中抓哪些資料, 可用list包起來好幾項
        start: 開始時間
        end: 結束時間
        """
        if start != None:
            if type(elements) != list:
                self.data[elements] = pd.DataFrame(self.client[db][elements].find({"日期": {'$gt': start, '$lt': end}}, {'_id': 0})).set_index('日期')
            else:
                for e in elements:
                    self.data[e] = pd.DataFrame(self.client[db][e].find({"日期": {'$gt': start, '$lt': end}}, {'_id': 0})).set_index('日期')
                    print(f'Data {e} has shape {self.data[e].shape}')
        else:
            if type(elements) != list:
                self.data[elements] = pd.DataFrame(self.client[db][elements].find({}, {'_id': 0}))
                if '日期' in self.data[e].columns:
                    self.data[e] = self.data[e].set_index('日期')
            else:
                for e in elements:
                    self.data[e] = pd.DataFrame(self.client[db][e].find({}, {'_id': 0}))
                    if '日期' in self.data[e].columns:
                        self.data[e] = self.data[e].set_index('日期')
                    print(f'Data {e} has shape {self.data[e].shape}')            

    def set_factor(self, dataname: str, data: np.ndarray, check: bool=True):
        """
        放到factors的所有資料要有同樣的shape
        都set完後在backtesting中主要使用factor中的數據，與data分隔開(但計算損益還是會撈data的收盤價)
        """
        if check:
            d = self.factors[list(self.factors.keys())[0]].shape
            for k, v in self.factors.values():
                if v.shape != d:
                    return f'{k} not have same shape, It should have {d} but it get {v.shape}'

        self.factors[dataname] = data

    def set_factor_in_financial(self, base: str='EPS_Q', target: Union[str, list]='還原收盤價'):
        base_index = self.data[base].index
        if isinstance(target, str):
            target_data = self.data[target]
            self.set_factor(target, target_data.iloc[target_data.index.get_indexer(base_index, method='nearest')].values)
        else:
            for t in target:
                target_data = self.data[t]
                self.set_factor(t, target_data.iloc[target_data.index.get_indexer(base_index, method='nearest')].values)

    def creat_to_daily(self, df: pd.DataFrame, base: str='還原收盤價', real_daily: bool=False):
        """
        df: 放要轉換成日資料的值
        base: 根據哪個資料轉換成日
        real_daily: 要取得真實有交易日期或是保留財報日期
        """
        n = pd.DataFrame(columns=self.data[base].columns, index=self.data[base].index.union(df.index))
        for i in df.index:
            for c in df.columns:
                n.at[i, str(c)] = df.at[i, c]
        if real_daily:
            return n.fillna(method='ffill').loc[self.data[base].index, self.data[base].columns]
        else:
            return n.fillna(method='ffill')

class Analysis(DataCenter):
    pass

class BackTesting(Analysis):
    pass

class Trader(BackTesting):
    pass

        

In [55]:
if __name__ == '__main__':
    """
    流程:
        1. get_from_mongo所有資料
        2. 計算資料後放到td.data去 - 是否全部都要放bool呢?
        3. 將整理完整要使用的資料set到tf.factors去(期望裡面都放ndarray)
        4. 在backtesting class中計算所有回測
        5. 再建立一個analysis class去分析結果(類似pyfolio)
    """
    client = pymongo.MongoClient()
    # 將class實例化
    td = Trader(client)
    # 第一步驟
    data_list = ['本益比(近四季)', '股價淨值比', '成交金額(千)', '還原收盤價', 'EPS_Q', '營業收入淨額_Q', '殖利率', '收盤價_指數']
    start = datetime.datetime(2020, 1, 1)
    end = datetime.datetime.today()
    td.get_from_mongo(data_list, 'Fields', start, end)
    data2_list = ['TWA00', '指數彙編', '指數名稱轉換']
    td.get_from_mongo(data2_list, 'Index')
    # 第二步驟
    # 目的是用每季資料來做交易，因此要先抓出每季收到財報"後"一天的日期
    # 抑或不加入財報資料，直接用日來交易更好呢? 兩套可以分開測試
    td.data['MA60'] = td.data['還原收盤價'].rolling(60).mean()
    td.data['MA120'] = td.data['還原收盤價'].rolling(120).mean()
    


Data 本益比(近四季) has shape (708, 2211)
Data 股價淨值比 has shape (708, 2211)
Data 成交金額(千) has shape (708, 2211)
Data 還原收盤價 has shape (708, 2211)
Data EPS_Q has shape (12, 2211)
Data 營業收入淨額_Q has shape (12, 2211)
Data 殖利率 has shape (708, 2211)
Data 收盤價_指數 has shape (708, 59)
Data TWA00 has shape (6043, 34)
Data 指數彙編 has shape (2211, 8)
Data 指數名稱轉換 has shape (1, 22)


In [59]:
base_data = td.data['EPS_Q']
target_data = td.data['還原收盤價']
#daily_data = td.creat_to_daily(base_data)

In [63]:
target_data.iloc[target_data.index.get_indexer(base_data.index, method='nearest')]

Unnamed: 0_level_0,2706,8016,8719,4904,2882,6441,1407,2316,6182,1785,...,8714,8715,8716,8717,8722,8724,8725,8910,9903,9932
日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-31,11.47,89.26,,55.06,29.95,55.69,,22.89,23.57,14.69,...,,,,,,,,,,
2020-05-15,12.18,107.42,,57.24,33.13,68.51,,30.28,30.38,17.98,...,,,,,,,,,,
2020-08-14,12.94,113.58,,57.15,35.91,100.57,,31.26,33.86,37.51,...,,,,,,,,,,
2020-11-13,13.37,106.45,,55.69,36.13,129.56,,26.97,31.38,42.34,...,,,,,,,,,,
2021-03-31,13.72,189.16,,58.52,42.73,123.65,,28.67,40.37,42.48,...,,,,,,,,,,
2021-05-14,12.88,185.2,,59.25,45.89,74.55,,24.69,42.52,40.7,...,,,,,,,,,,
2021-08-13,13.52,284.12,,59.0,51.13,52.55,,26.43,58.13,42.57,...,,,,,,,,,,
2021-11-15,14.36,228.92,,61.3,55.5,60.8,,28.04,65.51,54.53,...,,,,,,,,,,
2022-03-31,14.01,235.82,,70.62,59.96,47.0,,29.87,66.97,49.15,...,,,,,,,,,,
2022-05-16,13.32,198.07,,78.89,49.45,44.25,,27.27,56.96,44.1,...,,,,,,,,,,


## 會用到的資料列表

In [None]:
['本益比(近四季)', '股價淨值比', '成交金額(千)', '還原收盤價', 'EPS_Q', '營業收入淨額_Q', '殖利率', '收盤價_指數']
# 需要計算
['MA60', 'MA120']
['PE閥值', 'PB閥值', '殖利率閥值']
# EPS與營業收入淨額沒有顯著作用
# 在db['Index']
['TWA00', '指數彙編', '指數名稱轉換']

### load data