In [1]:
%matplotlib inline
%pylab inline
import matplotlib.pyplot as plt
import mpl_finance as mpf

import os
import pandas as pd
import sys
import talib

from datetime import datetime, timedelta
from mplfinance.original_flavor import candlestick_ohlc as candlestick
from sklearn.preprocessing import MinMaxScaler




    Please use `mplfinance` instead (no hyphen, no underscore).

    To install: `pip install --upgrade mplfinance` 

   For more information, see: https://pypi.org/project/mplfinance/




#### util: file_path

In [2]:
class PathChecker:
    def __init__(self, symbol="BTC", tick='1h'):
        self.__data_path = "./dataset"
        self.__check_dir(data_path = self.__data_path)
        self.__symbol = symbol
        self.__tick = tick
        self.file_path = self.__get_path(self.__symbol, self.__tick, self.__data_path)
        
    @staticmethod
    def __check_dir(data_path):
        if not os.path.exists(data_path):
            os.mkdir(data_path) 
    
    @staticmethod
    def __get_path(symbol, tick, data_path):
        date = datetime.today().strftime("%Y-%m-%d").replace('-', '')
        filename = symbol + '_' + tick + '_' + date + ".csv"
        return os.path.join(data_path, filename)

# get_data (default 'BTC')

In [3]:
file_path = PathChecker().file_path
df = pd.read_csv(file_path)
df["Date"] = df["Date"].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %I-%p').strftime('%Y-%m-%d %H:%M:%S'))
df["Date"] = df["Date"].apply(lambda x: pd.to_datetime(x))
df = df.sort_values(by=["Date"], ascending=True)
df = df.set_index("Date", inplace=False)

# indicators

In [4]:
df['sma_short'] = talib.SMA(df['Close'].values, 7)
df['sma_long'] = talib.SMA(df['Close'].values, 14)
df['ema_short'] = talib.SMA(df['Close'].values, 3)
df['ema_long'] = talib.SMA(df['Close'].values, 5)
df['K'], df['D'] = talib.STOCH(df['High'], df['Low'], df['Close'])
df['RSI_5'] = talib.RSI(df['Close'], 5)
df['RSI_10'] = talib.RSI(df['Close'], 10)
df['Upper'], df['Middle'], df['Lower'] = talib.BBANDS(df['Close'], timeperiod=10, nbdevup=2, nbdevdn=2)
df['MACD'] = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)[0]
df['OBV'] = talib.OBV(df['Close'], df.iloc[:, -2])
df['AROON Down'], df['AROON Up'] = talib.AROON(df['High'], df['Low'], timeperiod=7)

# Train / Test split

In [5]:
def slice_date(df, start_date=None, end_date=None):
    # Pass if start_time, end_time are not set
    if start_date == None:
        start_date = df.index[0]
    if end_date == None:
        end_date = df.index[-1]
    # Slice data into time interval
    df = df[(df.index >= start_date) & (df.index <= end_date)]
    return df

In [6]:
df_train = slice_date(df, "2017-01-01", "2018-12-31")

In [7]:
test_length = "week" # "year", "half-year", "month", "week"

if test_length == "year":
    df_test = slice_date(df, "2019-01-01", "2019-12-31")
elif test_length == "half-year":
    df_test = slice_date(df, "2019-07-01", "2019-12-31")
elif test_length == "month":
    df_test = slice_date(df, "2019-12-01", "2019-12-31")
elif test_length == "week":
    df_test = slice_date(df, "2019-12-25", "2019-12-31")

In [8]:
chosen_features = ['ema_short', 'ema_long', 'K', 'D', 'RSI_5', 'RSI_10', 'Upper', 'Middle', 'Lower', 'MACD', 'OBV', 'AROON Down', 'AROON Up', 'Close']
# chosen_features = ['ema_short', 'ema_long', 'K', 'D', 'Close']

In [9]:
train_features = df_train[chosen_features]
train_features.fillna(0, inplace=True)
test_features = df_test[chosen_features]
test_features.fillna(0, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [10]:
def normalize(train, test):
    minmax_sc = MinMaxScaler()
    train_norm = pd.DataFrame(minmax_sc.fit_transform(train),
                              columns=train.columns,
                              index=train.index)
    test_norm = pd.DataFrame(minmax_sc.transform(test),
                             columns=test.columns,
                             index=test.index)
    return train_norm, test_norm, minmax_sc

In [11]:
train_norm, test_norm, sc = normalize(train_features, test_features)

In [12]:
pd.plotting.register_matplotlib_converters()

In [13]:
fig = plt.figure(figsize=(30, 35))

<Figure size 2160x2520 with 0 Axes>

In [14]:
Select = df_test
df_test.index.strftime('%Y-%m-%d')

Index(['2019-12-25', '2019-12-25', '2019-12-25', '2019-12-25', '2019-12-25',
       '2019-12-25', '2019-12-25', '2019-12-25', '2019-12-25', '2019-12-25',
       ...
       '2019-12-30', '2019-12-30', '2019-12-30', '2019-12-30', '2019-12-30',
       '2019-12-30', '2019-12-30', '2019-12-30', '2019-12-30', '2019-12-31'],
      dtype='object', name='Date', length=145)

In [15]:
ax1 = fig.add_axes([0.05,0.45,0.9,0.45])
ax2 = fig.add_axes([0.05,0.30,0.9,0.15])
ax3 = fig.add_axes([0.05,0.05,0.9,0.25])

mpf.candlestick2_ochl(ax1, Select['Open'], Select['Close'], Select['High'],
                      Select['Low'], width=0.6, colorup='r', colordown='g', alpha=0.75)

(<matplotlib.collections.LineCollection at 0x133e57ad0>,
 <matplotlib.collections.PolyCollection at 0x133e57bd0>)

In [16]:
ax1.plot(Select['sma_short'], label='MA short')
ax1.plot(Select['sma_long'], label='MA long')
ax1.legend()
ax1.autoscale()
ax1.set_ylim((Select['Close'].min()*0.9, Select['Close'].max()*1.1))

(6397.587, 8206.792000000001)

In [17]:
# b.) KD line chart.
ax2.plot(Select['K'], label='K')
ax2.plot(Select['D'], label='D')
ax2.legend()

<matplotlib.legend.Legend at 0x133e92f50>

In [18]:
# c.) Volume bar chart.
mpf.volume_overlay(ax3, Select['Open'], Select['Close'], Select['Volume %s' % "BTC"],
                   colorup='r', colordown='g', width=0.5, alpha=0.8)

ax3.set_xticks(range(0, len(Select.index), 20))
ax3.set_xticklabels(Select.index[::20])

[Text(0, 0, '2019-12-25 00:00:00'),
 Text(0, 0, '2019-12-25 20:00:00'),
 Text(0, 0, '2019-12-26 16:00:00'),
 Text(0, 0, '2019-12-27 12:00:00'),
 Text(0, 0, '2019-12-28 08:00:00'),
 Text(0, 0, '2019-12-29 04:00:00'),
 Text(0, 0, '2019-12-30 00:00:00'),
 Text(0, 0, '2019-12-30 20:00:00')]

In [22]:
plt.clf()
plt.close()

In [23]:
print(df_train.shape)
print(df_test.shape)
print(train_norm.shape)
print(test_norm.shape)

(13142, 22)
(145, 22)
(13142, 14)
(145, 14)
