### 美指數日期調整

In [None]:
# 套件匯入
import pandas as pd
import numpy as np
import os
import re

import talib
from talib import abstract

In [None]:
# 列出所有 ShippingIndex 檔案名稱
filenames = [f for f in os.listdir('../Try data mining/data/') if re.search('^DateAdj_U', f)]
filenames

['DateAdj_US_DJI.csv',
 'DateAdj_US_GSPC.csv',
 'DateAdj_US_IXIC.csv',
 'DateAdj_US_TWII.csv']

In [None]:
for filename in filenames:
    df = pd.read_csv(f'../Try data mining/data/{filename}')
    shapef = df.shape
    print(f'The length of {filename} is {shapef}')

The length of DateAdj_US_DJI.csv is (2566, 6)
The length of DateAdj_US_GSPC.csv is (2566, 6)
The length of DateAdj_US_IXIC.csv is (2566, 6)
The length of DateAdj_US_TWII.csv is (2566, 6)


In [None]:
df = pd.read_csv(f'../Try data mining/data/DateAdj_US_DJI.csv', index_col=0)

In [None]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-03,11577.429688,11711.469727,11577.349609,11670.750000,203420000.0
2011-01-04,11670.900391,11698.219727,11635.740234,11691.179688,178630000.0
2011-01-05,11688.610352,11742.679688,11652.889648,11722.889648,169990000.0
2011-01-06,11716.929688,11736.740234,11667.459961,11697.309570,193080000.0
2011-01-07,11696.860352,11726.940430,11599.679688,11674.759766,188720000.0
...,...,...,...,...,...
2021-06-24,33933.910156,34233.019531,33933.910156,34196.820313,275870000.0
2021-06-25,34328.101563,34501.019531,34314.800781,34433.839844,432550000.0
2021-06-28,34428.101563,34449.648438,34186.128906,34283.269531,280280000.0
2021-06-29,34338.890625,34469.828125,34266.828125,34292.289063,282360000.0


In [None]:
df = df.astype('float')

In [None]:
# 乖離率
def BIAS(close, timeperiod=20):
    if isinstance(close,np.ndarray):
        pass
    else:
        close = np.array(close)
    MA = talib.MA(close,timeperiod=timeperiod)
    return (close-MA)/MA

In [None]:
# 移動平均平行線差指標
# def AMA(stockStat):
#     return talib.MA(stockStat['dma'],  timeperiod=10)

# https://github.com/jealous/stockstats/blob/master/stockstats.py
# df['dma'] = df['close_10_sma'] - df['close_50_sma']

def AMA(df):
    return talib.MA(talib.SMA(df['close'], timeperiod=10)-talib.SMA(df['close'], timeperiod=50),  timeperiod=10)

In [None]:
# 心理線指標
def PSY(priceData, period):
    difference = priceData[1:].values - priceData[:-1].values
    difference = np.append(0, difference)
    difference_dir = np.where(difference > 0, 1, 0)
    psy = np.zeros((len(priceData),))
    psy[:period] *= np.nan
    for i in range(period, len(priceData)):
        psy[i] = (difference_dir[i-period+1:i+1].sum()) / period
    return psy*100

In [None]:
# 區間震盪線
def DPO(close):
    p = talib.MA(close, timeperiod=11)
    p.shift()
    return close-p

In [None]:
# 十字過濾線指標
def VHF(close):
    LCP = talib.MIN(close, timeperiod=28)
    HCP = talib.MAX(close, timeperiod=28)
    NUM = HCP - LCP
    pre = close.copy()
    pre = pre.shift()
    DEN = abs(close-close.shift())
    DEN = talib.MA(DEN, timeperiod=28)*28
    return NUM.div(DEN)

In [None]:
# 相對活力指數
def RVI(df):
    close = df.close
    open = df.open
    high = df.high
    low = df.low
    X = close-open+2*(close.shift()-open.shift())+2*(close.shift(periods=2)-open.shift(periods=2))*(close.shift(periods=3)-open.shift(periods=3))/6
    Y = high-low+2*(high.shift()-low.shift())+2*(high.shift(periods=2)-low.shift(periods=2))*(high.shift(periods=3)-low.shift(periods=3))/6
    Z = talib.MA(X, timeperiod=10)*10
    D = talib.MA(Y, timeperiod=10)*10
    return Z/D

In [None]:
for filename in filenames:
    
    # 讀檔案
    df = pd.read_csv(f'../Try data mining/data/{filename}', index_col=0)  
    
    # 將 columns 改成小寫以便之後 talib function 使用
    df.columns = df.columns.map(lambda x:x.lower())
    
    # All columns type change to float
    df = df.astype('float')
    
    # 全部技術指標
    ta_list = talib.get_functions()

    for x in ta_list:

        try:

            # x 為技術指標的代碼，透過迴圈填入，再透過 eval 計算出 output
            output = eval('abstract.'+x+'(df)')

            # 如果輸出是一維資料，幫這個指標取名為 x 本身；多維資料則不需命名
            output.name = x.lower() if type(output) == pd.core.series.Series else None

            # 透過 merge 把輸出結果併入 df DataFrame
            df = pd.merge(df, pd.DataFrame(output), left_on = df.index, right_on = output.index)
            df = df.set_index('key_0')

        except:

            print(x)
            
            
    # 日期特徵值
    datetime_series = pd.DatetimeIndex(df.index)
    
    
    df = pd.concat([df, pd.DataFrame(datetime_series.year.rename('year')).set_index(df.index),
                    pd.DataFrame(datetime_series.month.rename('month')).set_index(df.index),
                    pd.DataFrame(datetime_series.day.rename('day')).set_index(df.index),
                    pd.DataFrame(datetime_series.isocalendar().week.rename('weekofyear')).set_index(df.index),
                    pd.DataFrame(datetime_series.dayofyear.rename('dayofyear')).set_index(df.index),
                    pd.DataFrame(datetime_series.isocalendar().day.rename('dayofweek')).set_index(df.index)], axis=1)
    
    # 加入六個股票常用指標
    df = pd.concat([df, pd.DataFrame(BIAS(df['close'], 20), columns=['bias']).set_index(df.index),
                    pd.DataFrame(AMA(df), columns=['ama']),
                    pd.DataFrame(PSY(df['close'], 24), columns=['psy']).set_index(df.index),
                    pd.DataFrame(DPO(df['close']), columns=['dpo']),
                    pd.DataFrame(VHF(df['close']), columns=['vhf']),
                    pd.DataFrame(RVI(df), columns=['rvi'])], axis=1)
    
    splitfilename = filename.split('_')
    
    if splitfilename[2] == 'TWII.csv':
        print(f'The shape of AF_Inedx_TW_{splitfilename[2]}:')
        print(df.shape)

        # 存檔
        df.to_csv(f'afdata/AF_Inedx_TW_{splitfilename[2]}')
        print(f'Save file :　AF_Inedx_TW_{splitfilename[2]}')
        print('-'*100)
        
    else:
    
        print(f'The shape of AF_Inedx_{splitfilename[1]}_{splitfilename[2]}:')
        print(df.shape)

        # 存檔
        df.to_csv(f'afdata/AF_Inedx_{splitfilename[1]}_{splitfilename[2]}')
        print(f'Save file :　AF_Inedx_{splitfilename[1]}_{splitfilename[2]}')
        print('-'*100)

MAVP


  return op.get_result()


The shape of AF_Inedx_US_DJI.csv:
(2566, 191)
Save file :　AF_Inedx_US_DJI.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Inedx_US_GSPC.csv:
(2566, 191)
Save file :　AF_Inedx_US_GSPC.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Inedx_US_IXIC.csv:
(2566, 191)
Save file :　AF_Inedx_US_IXIC.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Inedx_TW_TWII.csv:
(2566, 191)
Save file :　AF_Inedx_TW_TWII.csv
----------------------------------------------------------------------------------------------------
