## Labeling on base of ATR

In [1]:
import os
import numpy as np
import pandas as pd

import plotly
import plotly.graph_objs as go
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook

In [2]:
plotly.offline.init_notebook_mode(connected=True)
pd.options.mode.chained_assignment = None # отключаем предупреждения pandas

In [10]:
# переменные 

# C:\Users\parsh\Desktop\Exchange/data/US_stocks_csv/{}
path = r'C:/Users/parsh/Desktop/Exchange/data/US_stocks_csv/NASDAQ/'
file_name = 'AAPL.csv'
file_ticker = path + file_name

n_atr = 10 # период ATR
stop_loss = 1.5 # стоп-лосс в ATR
stop_profit = 5 # стоп-профит в ATR

In [4]:
def wwma(values, n):
    """
     J. Welles Wilder's EMA 
    """
    return values.ewm(alpha=1/n, adjust=False).mean()

def atr(df, n=10):
    data = df.copy()
    high = data['High']
    low = data['Low']
    close = data['Close']
    data['tr0'] = abs(high - low)
    data['tr1'] = abs(high - close.shift())
    data['tr2'] = abs(low - close.shift())
    tr = data[['tr0', 'tr1', 'tr2']].max(axis=1)
    atr = wwma(tr, n)
    return atr

def atr_labeling(sl_level = 1.5, sp_level = 5, n_atr = 10):
    
    # имена колонок
    col_atr = 'ATR_' + str(n_atr)
    col_label = 'Y_' + str(sl_level) + '_' + str(sp_level)
    
    # начальное значения и расчет ATR
    df[col_label] = 0  # 0 - остается, если условия не срабатывают
    df[col_atr] = atr(df, n_atr)

    df['ATR_sl_long'] = df['Close'] - sl_level*df[col_atr] # временная колонка
    df['ATR_sp_long'] = df['Close'] + sp_level*df[col_atr] # временная колонка
    
    df['ATR_sl_short'] = df['Close'] + sl_level*df[col_atr] # временная колонка
    df['ATR_sp_short'] = df['Close'] - sp_level*df[col_atr] # временная колонка

    
    for i in tqdm_notebook(range (n_atr, df.shape[0]-1)):
        # индексы в df первого срабатывания SL или SP
        sl_indx = 0
        sp_indx = 0
        
        # проверяем лонг
        not_long = True
        sl_indx = ((df['Low'] <= df['ATR_sl_long'][i]) & (df.index > i)).idxmax()
        sp_indx = ((df['High'] >= df['ATR_sp_long'][i]) & (df.index > i)).idxmax()
        
        if sl_indx > sp_indx:
            df[col_label][i] = sp_indx # индекс с '+' для лонга
            not_long = False
        
        # проверяем шорт
        if not_long:
            sl_indx = ((df['High'] >= df['ATR_sl_short'][i]) & (df.index > i)).idxmax()
            sp_indx = ((df['Low'] <= df['ATR_sp_short'][i]) & (df.index > i)).idxmax()
            if sl_indx > sp_indx:
                df[col_label][i] = -sp_indx # индекс с '-' для шорта         

#    df.drop('ATR_sl_long', axis=1, inplace = True) # удаляем временную колонку
#    df.drop('ATR_sp_long', axis=1, inplace = True) # удаляем временную колонку
#    df.drop('ATR_sl_short', axis=1, inplace = True) # удаляем временную колонку
#    df.drop('ATR_sp_short', axis=1, inplace = True) # удаляем временную колонку

    return

In [5]:
def plot_chart(df):
    # рисуем свечной график с объемами

    #INCREASING_COLOR = '#17BECF'
    #DECREASING_COLOR = '#7F7F7F'
    
    ticker_name = df.Ticker[0]
    data = [ dict(
        type = 'candlestick',
        open = df.Open,
        high = df.High,
        low = df.Low,
        close = df.Close,
        x = df.Date_2,
        yaxis = 'y2',
        name = ticker_name,
    #    increasing = dict( line = dict( color = INCREASING_COLOR ) ),
    #    decreasing = dict( line = dict( color = DECREASING_COLOR ) ),
    ) ]

    layout=dict()

    fig = dict(data=data, layout=layout)

    fig['layout'] = dict()
    #fig['layout']['plot_bgcolor'] = 'rgb(250, 250, 250)'
    #categoryorder = 'category ascending', 
    fig['layout']['xaxis'] = dict(type ='category', \
                                  tickfont=dict(size=10),\
                                  rangeselector = dict(visible = True), rangeslider = dict(visible = False))
    fig['layout']['yaxis'] = dict(domain = [0, 0.2], showticklabels = False, autorange=True, type='log')
    fig['layout']['yaxis2'] = dict(domain = [0.2, 0.8], autorange=True, type='log')
    fig['layout']['legend'] = dict( orientation = 'h', y=0.9, x=0.3, yanchor='bottom' )
    fig['layout']['margin'] = dict( t=40, b=40, r=40, l=40 )

    fig['data'].append( dict( x=df.Date_2, y=df.Volume,  
    #                         marker=dict( color=colors ),
                             type='bar', yaxis='y', name='Volume') )
    plotly.offline.iplot(fig)
    return


In [11]:
# читаем базу, строим колонку Data2 для графиков

df = pd.read_csv(file_ticker)
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d', errors='ignore')
#df['Date'] = pd.to_datetime(df['Date'], format='%d%m%Y', errors='ignore')
df['Date_2'] = df['Date'].dt.strftime('%d-%m-%Y')
#df['Date_2'] = pd.to_datetime(df['Date_2'])

#df['Date_2'] = df['Date_2'].astype(str)
#df['Volume_2'] = df['Volume']-df['Volume'].min()

In [12]:
# считаем Y-ки

atr_labeling(sl_level = stop_loss, sp_level = stop_profit, n_atr = n_atr)

HBox(children=(IntProgress(value=0, max=8554), HTML(value='')))




In [13]:
df

Unnamed: 0,Ticker,Date,Open,High,Low,Close,Volume,Date_2,Y_1.5_5,ATR_10,ATR_sl_long,ATR_sp_long,ATR_sl_short,ATR_sp_short
0,AAPL,1985-01-02,0.52009,0.52011,0.49332,0.49779,43825644,02-01-1985,0,0.026790,0.457605,0.631740,0.537975,0.363840
1,AAPL,1985-01-03,0.50000,0.52011,0.49554,0.50671,41652840,03-01-1985,0,0.026568,0.466858,0.639550,0.546562,0.373870
2,AAPL,1985-01-04,0.50670,0.50893,0.50000,0.50671,34316836,04-01-1985,0,0.024804,0.469504,0.630731,0.543916,0.382689
3,AAPL,1985-01-07,0.50670,0.50893,0.49107,0.50446,42728044,07-01-1985,0,0.024110,0.468295,0.625009,0.540625,0.383911
4,AAPL,1985-01-08,0.50446,0.50893,0.50000,0.50000,35280036,08-01-1985,0,0.022592,0.466112,0.612959,0.533888,0.387041
5,AAPL,1985-01-09,0.50000,0.52011,0.50000,0.51339,41680840,09-01-1985,0,0.022344,0.479875,0.625108,0.546905,0.401672
6,AAPL,1985-01-10,0.51118,0.53796,0.51118,0.53571,69266472,10-01-1985,0,0.022787,0.501529,0.649646,0.569891,0.421774
7,AAPL,1985-01-11,0.53571,0.54018,0.52679,0.53125,51262452,11-01-1985,0,0.021848,0.498479,0.640488,0.564021,0.422012
8,AAPL,1985-01-14,0.53125,0.55136,0.52679,0.54689,67608864,14-01-1985,0,0.022120,0.513710,0.657489,0.580070,0.436291
9,AAPL,1985-01-15,0.54911,0.55582,0.53125,0.53571,66242468,15-01-1985,0,0.022365,0.502163,0.647534,0.569257,0.423886


In [14]:
# проверка процент Y-ков

round(df[df['Y_1.5_5']!=0].shape[0]*100/df.shape[0],1)

37.3

In [15]:
# рисуем график

plot_chart(df)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8565 entries, 0 to 8564
Data columns (total 14 columns):
Ticker          8565 non-null object
Date            8565 non-null datetime64[ns]
Open            8565 non-null float64
High            8565 non-null float64
Low             8565 non-null float64
Close           8565 non-null float64
Volume          8565 non-null int64
Date_2          8565 non-null object
Y_1.5_5         8565 non-null int64
ATR_10          8565 non-null float64
ATR_sl_long     8565 non-null float64
ATR_sp_long     8565 non-null float64
ATR_sl_short    8565 non-null float64
ATR_sp_short    8565 non-null float64
dtypes: datetime64[ns](1), float64(9), int64(2), object(2)
memory usage: 936.9+ KB


In [17]:
# не удалять, формула для нормированного АТР пригодится :) # 

df['ATR_10'] = atr(df, n_atr)
df['NATR_10'] = (df['ATR_10'] * 100 / df['Close']).round(2)