In [2]:
import pandas as pd
import numpy as np

from Feature import Feature

from pathlib import Path

def label(path: str, freq: str='900S'):
    '''
    Return a DataFrame from original DataFrame with time period of 1 hour, duplicated values removed
    and NaN values filled with forward fill
    '''
    
    df = pd.read_csv(path, index_col="Time_UTC_Start")
    df.drop(["Timestamp", "Timestamp End", "N/A.5"], axis=1, inplace=True)
    df.rename(columns = {'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close',
                        'N/A': 'volume', 'N/A.1': 'quote_asset_volume', 'N/A.2': 'number_of_trades',
                        'N/A.3': 'taker_buy_base_asset_volume', 'N/A.4': 'taker_buy_quote_asset_volume'}, inplace = True)
    df = df.iloc[26:1999947, :]
    df = df.drop_duplicates(keep = 'last')
    df.index = pd.to_datetime(df.index)
    df = df.asfreq(freq).fillna(method = 'ffill')
    length = df.shape[0]
    op = df['open'].values.reshape(length,1)
    cl = df['close'].values.reshape(length,1 )
    label = np.ones((length, 1)) * (op < cl)
    df['label'] = label
    
    return df

df_1H = label(path="../data/binance-BTCUSDT-1m.csv", freq='H')
a = Feature(df_1H.copy())

a.TR(inplace=True)
a.MACD()
for i in [4,8,12,16,20,24,48,72]: 
    a.volume_EMA(days=i, inplace=True)
    a.volume_SMA(days=i, inplace=True)
    a.volume_WMA(days=i, inplace=True)
    a.raw_money_flow(inplace=True)
    a.plus_DM(inplace=True)
    a.minus_DM(inplace=True)
    a.smoothed_plus_DM(days=i, inplace=True)
    a.smoothed_minus_DM(days=i, inplace=True)
    a.smoothed_plus_DM(days=i, inplace=True)
    a.smoothed_minus_DM(days=i, inplace=True)
    a.mad(days=i, inplace=True)
    a.ATR(days=i, inplace=True)
    a.CCI(days=i, inplace=True)
    a.DX(days=i, inplace=True)
    a.ADX(days=i, inplace=True)
    a.MFI(days=i, inplace=True)
    a.BOP(days=i, inplace=True)
    for name in ["close", "high", "low", "open"]:
        a.price_EMA(days=i, inplace=True, price_col_name=name)
        a.price_SMA(days=i, inplace=True, price_col_name=name)
        a.price_WMA(days=i, inplace=True, price_col_name=name)
        a.RS(days=i, inplace=True, price_col_name=name)
        a.RSI(days=i, inplace=True, price_col_name=name)
        a.std(days=i, inplace=True, price_col_name=name)
        a.price_diff(inplace=True, price_col_name=name)
        a.std(days=i, inplace=True, price_col_name=name)
        a.median(days=i, inplace=True, price_col_name=name)
        a.PPO(inplace=True, price_col_name=name)
        

a.dropna(inplace=True)
# a.to_csv("../data/data_1h_with_features_ver01.csv")