### 航運指數日期調整

In [1]:
# 套件匯入
import pandas as pd
import numpy as np
import os
import re

import talib
from talib import abstract

In [2]:
# 列出所有 ShippingIndex 檔案名稱
filenames = [f for f in os.listdir('../Try data mining/data/') if re.search('^DateAdj_S', f)]
filenames

['DateAdj_ShippingIndex_BCI.csv',
 'DateAdj_ShippingIndex_BCTI.csv',
 'DateAdj_ShippingIndex_BDI.csv',
 'DateAdj_ShippingIndex_BDTI.csv',
 'DateAdj_ShippingIndex_BPI.csv',
 'DateAdj_ShippingIndex_BSI.csv']

In [3]:
for filename in filenames:
    
    # 讀檔案
    df = pd.read_csv(f'../Try data mining/data/{filename}', index_col=0)
    
    # 日期改至 2011-01-01 開始
    df = df[df['Date'].between('2011-01-01', '2021-06-30')]
    
    splitfilename = filename.split('_')
    print(f'The shape of AF_{splitfilename[1]}_NA_{splitfilename[2]}:')
    print(df.shape)
    
     # 存檔
    df.to_csv(f'afdata/AF_{splitfilename[1]}_NA_{splitfilename[2]}')
    print(f'Save file :　AF_{splitfilename[1]}_NA_{splitfilename[2]}')
    print('-'*100)

The shape of AF_ShippingIndex_NA_BCI.csv:
(2566, 2)
Save file :　AF_ShippingIndex_NA_BCI.csv
----------------------------------------------------------------------------------------------------
The shape of AF_ShippingIndex_NA_BCTI.csv:
(2566, 2)
Save file :　AF_ShippingIndex_NA_BCTI.csv
----------------------------------------------------------------------------------------------------
The shape of AF_ShippingIndex_NA_BDI.csv:
(2566, 2)
Save file :　AF_ShippingIndex_NA_BDI.csv
----------------------------------------------------------------------------------------------------
The shape of AF_ShippingIndex_NA_BDTI.csv:
(2566, 2)
Save file :　AF_ShippingIndex_NA_BDTI.csv
----------------------------------------------------------------------------------------------------
The shape of AF_ShippingIndex_NA_BPI.csv:
(2566, 2)
Save file :　AF_ShippingIndex_NA_BPI.csv
----------------------------------------------------------------------------------------------------
The shape of AF_ShippingIndex_N

### 外匯日期調整

In [4]:
# 讀檔案
df = pd.read_csv(f'../Try data mining/data/DateAdj_ExchangeRate_NTD2USD.csv', index_col=0)
    
# 日期改至 2011-01-01 開始
df = df[df['Date'].between('2011-01-01', '2021-06-30')]

print(f'The shape of AF_ExchangeRate_NA_NTD2USD.csv:')
print(df.shape)
    
# 存檔
df.to_csv(f'afdata/AF_ExchangeRate_NA_NTD2USD.csv')
print(f'Save file :　AF_ExchangeRate_NA_NTD2USD.csv')
print('-'*100)

The shape of AF_ExchangeRate_NA_NTD2USD.csv:
(2566, 2)
Save file :　AF_ExchangeRate_NA_NTD2USD.csv
----------------------------------------------------------------------------------------------------


### 期貨報價調整

In [5]:
# 列出所有 Futures 檔案名稱
filenames = [f for f in os.listdir('../Try data mining/data/') if re.search('^DateAdj_F', f)]
filenames

['DateAdj_Futures_Brent Oil.csv',
 'DateAdj_Futures_Crude Oil.csv',
 'DateAdj_Futures_DalianIronOre.csv',
 'DateAdj_Futures_Gold.csv']

In [6]:
# 乖離率
def BIAS(close, timeperiod=20):
    if isinstance(close,np.ndarray):
        pass
    else:
        close = np.array(close)
    MA = talib.MA(close,timeperiod=timeperiod)
    return (close-MA)/MA

In [7]:
# 移動平均平行線差指標
# def AMA(stockStat):
#     return talib.MA(stockStat['dma'],  timeperiod=10)

# https://github.com/jealous/stockstats/blob/master/stockstats.py
# df['dma'] = df['close_10_sma'] - df['close_50_sma']

def AMA(df):
    return talib.MA(talib.SMA(df['close'], timeperiod=10)-talib.SMA(df['close'], timeperiod=50),  timeperiod=10)

In [8]:
# 心理線指標
def PSY(priceData, period):
    difference = priceData[1:].values - priceData[:-1].values
    difference = np.append(0, difference)
    difference_dir = np.where(difference > 0, 1, 0)
    psy = np.zeros((len(priceData),))
    psy[:period] *= np.nan
    for i in range(period, len(priceData)):
        psy[i] = (difference_dir[i-period+1:i+1].sum()) / period
    return psy*100

In [9]:
# 區間震盪線
def DPO(close):
    p = talib.MA(close, timeperiod=11)
    p.shift()
    return close-p

In [10]:
# 十字過濾線指標
def VHF(close):
    LCP = talib.MIN(close, timeperiod=28)
    HCP = talib.MAX(close, timeperiod=28)
    NUM = HCP - LCP
    pre = close.copy()
    pre = pre.shift()
    DEN = abs(close-close.shift())
    DEN = talib.MA(DEN, timeperiod=28)*28
    return NUM.div(DEN)

In [11]:
# 相對活力指數
def RVI(df):
    close = df.close
    open = df.open
    high = df.high
    low = df.low
    X = close-open+2*(close.shift()-open.shift())+2*(close.shift(periods=2)-open.shift(periods=2))*(close.shift(periods=3)-open.shift(periods=3))/6
    Y = high-low+2*(high.shift()-low.shift())+2*(high.shift(periods=2)-low.shift(periods=2))*(high.shift(periods=3)-low.shift(periods=3))/6
    Z = talib.MA(X, timeperiod=10)*10
    D = talib.MA(Y, timeperiod=10)*10
    return Z/D

In [12]:
for filename in filenames:
    
    # 讀檔案
    df = pd.read_csv(f'../Try data mining/data/{filename}')
    
    # 日期改至 2011-01-01 開始
    df = df[df['Date'].between('2011-01-01', '2021-06-30')]
    
    
    if filename != 'DateAdj_Futures_DalianIronOre.csv':
        
        df = df[['Date', 'Open', 'High', 'Low', 'Price', 'Vol.', 'Change %' ]]
        
        # 丟掉多餘的 columns, Date 換成 index
        df = df.drop(['Change %'], axis='columns').set_index('Date')        
          
        # Change column name from Vol. to volume
        df.rename(columns = {'Vol.':'volume'}, inplace=True)
        df.rename(columns = {'Price':'close'}, inplace=True)
        
    else:
        # Date 換成 index
        df = df.set_index('Date')    
        
    
    # 將 columns 改成小寫以便之後 talib function 使用
    df.columns = df.columns.map(lambda x:x.lower())
    
    # All columns type change to float
    df = df.astype('float')
    
    # 全部技術指標
    ta_list = talib.get_functions()

    for x in ta_list:

        try:

            # x 為技術指標的代碼，透過迴圈填入，再透過 eval 計算出 output
            output = eval('abstract.'+x+'(df)')

            # 如果輸出是一維資料，幫這個指標取名為 x 本身；多維資料則不需命名
            output.name = x.lower() if type(output) == pd.core.series.Series else None

            # 透過 merge 把輸出結果併入 df DataFrame
            df = pd.merge(df, pd.DataFrame(output), left_on = df.index, right_on = output.index)
            df = df.set_index('key_0')

        except:

            print(x)
            
            
    # 日期特徵值
    datetime_series = pd.DatetimeIndex(df.index)
    
    
    df = pd.concat([df, pd.DataFrame(datetime_series.year.rename('year')).set_index(df.index),
                    pd.DataFrame(datetime_series.month.rename('month')).set_index(df.index),
                    pd.DataFrame(datetime_series.day.rename('day')).set_index(df.index),
                    pd.DataFrame(datetime_series.isocalendar().week.rename('weekofyear')).set_index(df.index),
                    pd.DataFrame(datetime_series.dayofyear.rename('dayofyear')).set_index(df.index),
                    pd.DataFrame(datetime_series.isocalendar().day.rename('dayofweek')).set_index(df.index)], axis=1)
    
    # 加入六個股票常用指標
    df = pd.concat([df, pd.DataFrame(BIAS(df['close'], 20), columns=['bias']).set_index(df.index),
                    pd.DataFrame(AMA(df), columns=['ama']),
                    pd.DataFrame(PSY(df['close'], 24), columns=['psy']).set_index(df.index),
                    pd.DataFrame(DPO(df['close']), columns=['dpo']),
                    pd.DataFrame(VHF(df['close']), columns=['vhf']),
                    pd.DataFrame(RVI(df), columns=['rvi'])], axis=1)
    
    splitfilename = filename.split('_')
    print(f'The shape of AF_{splitfilename[1]}_NA_{splitfilename[2]}:')
    print(df.shape)
    
     # 存檔
    df.to_csv(f'afdata/AF_{splitfilename[1]}_NA_{splitfilename[2]}')
    print(f'Save file :　AF_{splitfilename[1]}_NA_{splitfilename[2]}')
    print('-'*100)

MAVP


  return op.get_result()


The shape of AF_Futures_NA_Brent Oil.csv:
(2566, 191)
Save file :　AF_Futures_NA_Brent Oil.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Futures_NA_Crude Oil.csv:
(2566, 191)
Save file :　AF_Futures_NA_Crude Oil.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Futures_NA_DalianIronOre.csv:
(1880, 191)
Save file :　AF_Futures_NA_DalianIronOre.csv
----------------------------------------------------------------------------------------------------
MAVP
The shape of AF_Futures_NA_Gold.csv:
(2566, 191)
Save file :　AF_Futures_NA_Gold.csv
----------------------------------------------------------------------------------------------------
