In [1]:
import pandas as pd
from scipy.interpolate import interp1d
from scipy.stats import norm
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import price_data as price
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport
import mplfinance as mpf
import matplotlib.dates as mpl_dates
import chart

# Select your transport with a defined url endpoint
transport = AIOHTTPTransport(url="https://saturn.hasura.app/v1/graphql", headers={'x-hasura-admin-secret': 'Rc07SJt4ryC6RyNXDKFRAtFmRkGBbT8Ez3SdaEYsHQoHemCldvs52Kc803oK8X62'})

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

async def get_stored_data(symbol,timeframe):
    # Provide a GraphQL query
    split_symbol=symbol.split('/')
    base_currency=split_symbol[0]
    quote_currency=split_symbol[1]
    table=base_currency+quote_currency+'_'+timeframe
    if table=='BTCUSD_1d':
        query = gql(
            """
            query MyQuery {
                BTCUSD_1d {
                    unix
                    close
                    high
                    low
                    open
                }
            }
        """
        )
    elif table=='ETHUSD_1d':
        query = gql(
            """
            query MyQuery {
                ETHUSD_1d {
                    unix
                    close
                    high
                    low
                    open
                }
            }
        """
        )
    elif table=='ETHBTC_1d':
        query = gql(
            """
            query MyQuery {
                ETHBTC_1d {
                    unix
                    close
                    high
                    low
                    open
                }
            }
        """
        )
    else:
        return 'no such table'

    # Execute the query on the transport
    result = await client.execute_async(query)
    candles=result[table]
    df=pd.DataFrame({},columns=['unix','close','high','low','open'])
    for candle in candles:
        df=pd.concat([df,pd.DataFrame(data=candle, index=[0])], ignore_index=True)

    return df.sort_values(by=['unix'], ignore_index=True)

def find_start(timestamps):
    start_found=False
    timestamps=list(map(lambda x:x[0]/1000,timestamps))
    index=len(timestamps)-1
    while not(start_found):
        print(timestamps[index])
        day=date.fromtimestamp(timestamps[index]).weekday()
        if day==0:
            start_found=True
        else:   
            index=index-1
    return index

def read_data(filename):
    raw=pd.read_csv('data/'+filename)
    timestamps=raw['unix']
    for i in range(len(timestamps)):
        if np.log10(timestamps.iloc[i])<12:
            raw.loc[i,'unix']=timestamps[i]*1000
    return raw.sort_values(by=['unix'], ignore_index=True)

# Creating Heikin Ashi candles
First we must download the candlestick data

In [2]:
eth=await get_stored_data('ETH/USD','1d')
eth["Date"] = pd.to_datetime(eth["unix"], unit="ms")
weekly_candles_original=price.get_price_data('1w',data=eth)
weekly_candles = weekly_candles_original.copy()
weekly_candles["Date"] = pd.to_datetime(weekly_candles["unix"], unit="ms")
weekly_candles.columns = ['Unix', 'Close', 'High', 'Low', 'Open', 'Date']
weekly_candles.set_index('Date', inplace=True)

In [3]:
eth

Unnamed: 0,unix,close,high,low,open,Date
0,1457481600000,11.20,11.20,10.10,10.30,2016-03-09
1,1457568000000,11.75,11.85,11.07,11.20,2016-03-10
2,1457654400000,11.95,11.95,11.75,11.75,2016-03-11
3,1457740800000,12.92,13.45,11.95,11.95,2016-03-12
4,1457827200000,15.07,15.07,12.92,12.92,2016-03-13
...,...,...,...,...,...,...
2183,1646092800000,2978.30,3038.80,2855.00,2922.60,2022-03-01
2184,1646179200000,2948.60,3046.70,2914.60,2978.30,2022-03-02
2185,1646265600000,2834.20,2972.10,2785.00,2948.70,2022-03-03
2186,1646352000000,2622.60,2836.10,2573.70,2834.20,2022-03-04


In [4]:
weekly_candles

Unnamed: 0_level_0,Unix,Close,High,Low,Open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-03-14,1457913600000,10.180,15.07,8.338,15.07
2016-03-21,1458518400000,10.400,12.47,10.140,10.18
2016-03-28,1459123200000,11.750,12.30,10.400,10.40
2016-04-04,1459728000000,8.757,11.67,7.520,11.60
2016-04-11,1460332800000,9.455,10.90,6.855,8.56
...,...,...,...,...,...
2022-01-31,1643587200000,3057.500,3078.80,2476.800,2602.60
2022-02-07,1644192000000,2872.800,3286.50,2837.200,3057.50
2022-02-14,1644796800000,2622.100,3199.40,2575.700,2872.80
2022-02-21,1645401600000,2617.800,2881.50,2301.100,2622.10


Then we define a function to convert each candle stick into Heikin Ashi Candlestick

In [5]:
def heikin_ashi(previous_open, previous_close, candle):
    # print(candle)
    price_values = candle[["open","high","low","close"]]
    close = np.mean(price_values)
    open_price = 0.5*(previous_open+previous_close)
    high=max([max(price_values), open_price,close])
    low = min([min(price_values), open_price,close])
    
    return candle["unix"], open_price, high, close, low

def convert_data_to_heikin_ashi(data):
    timestamps, opens, closes, highs, lows  = [0], [0],[0],[0],[0]
    #initialise values
    for i in range(1,len(data)):
        timestamp, open_price, high, close, low = heikin_ashi(opens[-1], closes[-1],data.iloc[i])
        timestamps.append(timestamp)
        opens.append(open_price)
        highs.append(high)
        lows.append(low)
        closes.append(close)

    return pd.DataFrame({'unix':timestamps,'open':opens,'high':highs,'low':lows,'close':closes}).sort_values(by=['unix'], ignore_index=True)
    


Then we convert weekly candles and observe

In [6]:
weekly_candles
# convert_data_to_heikin_ashi(weekly_candles)

Unnamed: 0_level_0,Unix,Close,High,Low,Open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-03-14,1457913600000,10.180,15.07,8.338,15.07
2016-03-21,1458518400000,10.400,12.47,10.140,10.18
2016-03-28,1459123200000,11.750,12.30,10.400,10.40
2016-04-04,1459728000000,8.757,11.67,7.520,11.60
2016-04-11,1460332800000,9.455,10.90,6.855,8.56
...,...,...,...,...,...
2022-01-31,1643587200000,3057.500,3078.80,2476.800,2602.60
2022-02-07,1644192000000,2872.800,3286.50,2837.200,3057.50
2022-02-14,1644796800000,2622.100,3199.40,2575.700,2872.80
2022-02-21,1645401600000,2617.800,2881.50,2301.100,2622.10


Shift timestamps up one, delete the top and bottom row (top row is null, bottom row is incomplete). The candle at each timestamp represents the previous week, any data we observe at time=t is from the past.

In [7]:
def convert_data_to_heikin_ashi_processed(data):
    timestamps, opens, closes, highs, lows  = [0], [0],[0],[0],[0]
    #initialise values
    for i in range(1,len(data)):
        timestamp, open_price, high, close, low = heikin_ashi(opens[-1], closes[-1],data.iloc[i])
        timestamps.append(timestamp)
        opens.append(open_price)
        highs.append(high)
        lows.append(low)
        closes.append(close)

    candles = pd.DataFrame({'unix':timestamps,'ha_Open':opens,'ha_High':highs,'ha_Low':lows,'ha_Close':closes}).sort_values(by=['unix'], ignore_index=True)
    timestamp_df=candles['unix'].shift(periods=-1)
    candles["unix"] = timestamp_df
    candles["Date"] = pd.to_datetime(candles["unix"], unit="ms")
    candles["Green"] = candles["ha_Close"]>candles["ha_Open"]
    candles.drop([0,1,len(candles)-1], inplace=True)
    candles.set_index("Date", inplace=True)
    candles.drop("unix", axis=1, inplace=True)
    return candles

heikin_ashi_candles = convert_data_to_heikin_ashi_processed(weekly_candles_original)
# weekly_candles.to_csv('weekly.csv')
# heikin_ashi_candles.to_csv('weekly_ha.csv')
heikin_ashi_candles


Unnamed: 0_level_0,ha_Open,ha_High,ha_Low,ha_Close,Green
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016-04-04,5.398750,12.300000,5.398750,11.21250,True
2016-04-11,8.305625,11.670000,7.520000,9.88675,True
2016-04-18,9.096187,10.900000,6.855000,8.94250,False
2016-04-25,9.019344,9.550000,7.730000,8.66975,False
2016-05-02,8.844547,9.360000,7.120000,8.32050,False
...,...,...,...,...,...
2022-01-31,3175.070219,3175.070219,2159.500000,2506.50000,False
2022-02-07,2840.785110,3078.800000,2476.800000,2803.92500,False
2022-02-14,2822.355055,3286.500000,2822.355055,3013.50000,True
2022-02-21,2917.927527,3199.400000,2575.700000,2817.50000,False


Now we plot the heikin ashi candles using the mpfinance module.

In [8]:
# weekly_candles.drop(labels=["unix"], inplace=True)
# heikin_ashi_candles.drop(labels=["unix"], inplace=True)
# heikin_ashi_candles=heikin_ashi_candles.drop(["unix"], axis=1)
trimmed_weekly=weekly_candles[3:]
combnined_df=pd.concat([trimmed_weekly, heikin_ashi_candles], axis=1)
testing_df_weekly = combnined_df.loc[:,["Open", "Green"]]
testing_df_weekly

Unnamed: 0_level_0,Open,Green
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-04-04,11.600,True
2016-04-11,8.560,True
2016-04-18,9.302,False
2016-04-25,8.050,False
2016-05-02,8.828,False
...,...,...
2022-01-31,2602.600,False
2022-02-07,3057.500,False
2022-02-14,2872.800,True
2022-02-21,2622.100,False


Note: We will assess action on the close of the heikin ashi candle, say timestamp t, at which the current price will be the open of the normal candle at timestamp t.

In [9]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(x=heikin_ashi_candles.index,
                open=heikin_ashi_candles['ha_Open'],
                high=heikin_ashi_candles['ha_High'],
                low=heikin_ashi_candles['ha_Low'],
                close=heikin_ashi_candles['ha_Close'])])

fig2 = go.Figure(data=[go.Candlestick(x=trimmed_weekly.index,
                open=trimmed_weekly['Open'],
                high=trimmed_weekly['High'],
                low=trimmed_weekly['Low'],
                close=trimmed_weekly['Close'])])

# fig.update_yaxes(type="log")
# fig2.update_yaxes(type="log")
fig.show()
fig2.show()


For each candle calculate the following metrics:
-  Close/low
-  close/high
-  close/open

In [10]:
# closes = np.array(heikin_ashi_candles["Close"])
# open_prices = np.array(heikin_ashi_candles["Open"])
# highs = np.array(heikin_ashi_candles["High"])
# lows = np.array(heikin_ashi_candles["Low"])

# c_l = np.divide(closes,lows)
# c_h = np.divide(closes, highs)
# c_o = np.divide(closes, open_prices)

In [11]:
daily = convert_data_to_heikin_ashi_processed(eth)
eth["Date"] = pd.to_datetime(eth["unix"], unit="ms")
eth.set_index('Date', inplace=True)
combined_data=pd.concat([eth, daily], axis=1).dropna()
combined_data

Unnamed: 0_level_0,unix,close,high,low,open,ha_Open,ha_High,ha_Low,ha_Close,Green
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-03-12,1457740800000,12.92,13.45,11.95,11.95,5.733750,11.95000,5.733750,11.8500,True
2016-03-13,1457827200000,15.07,15.07,12.92,12.92,8.791875,13.45000,8.791875,12.5675,True
2016-03-14,1457913600000,12.50,15.07,11.40,15.07,10.679688,15.07000,10.679688,13.9950,True
2016-03-15,1458000000000,13.06,13.42,11.98,12.63,12.337344,15.07000,11.400000,13.5100,True
2016-03-16,1458086400000,12.88,13.89,12.62,13.06,12.923672,13.42000,11.980000,12.7725,False
...,...,...,...,...,...,...,...,...,...,...
2022-03-01,1646092800000,2978.30,3038.80,2855.00,2922.60,2708.765805,2954.30000,2569.000000,2765.9250,True
2022-03-02,1646179200000,2948.60,3046.70,2914.60,2978.30,2737.345402,3038.80000,2737.345402,2948.6750,True
2022-03-03,1646265600000,2834.20,2972.10,2785.00,2948.70,2843.010201,3046.70000,2843.010201,2972.0500,True
2022-03-04,1646352000000,2622.60,2836.10,2573.70,2834.20,2907.530101,2972.10000,2785.000000,2885.0000,False


In [12]:
fig = go.Figure(data=[go.Candlestick(x=daily.index,
                open=daily['ha_Open'],
                high=daily['ha_High'],
                low=daily['ha_Low'],
                close=daily['ha_Close'])])

fig2 = go.Figure(data=[go.Candlestick(x=eth.index,
                open=eth['open'],
                high=eth['high'],
                low=eth['low'],
                close=eth['close'])])

fig.show()
fig2.show()

In [13]:
combined_data.index.date

array([datetime.date(2016, 3, 12), datetime.date(2016, 3, 13),
       datetime.date(2016, 3, 14), ..., datetime.date(2022, 3, 3),
       datetime.date(2022, 3, 4), datetime.date(2022, 3, 5)], dtype=object)

### Backtesting
Now that we have matched the heikin ashi candles to the normal candles, we can backtest a simple trading strategy that sells when the HA candle is red and buys when it's green.

In [14]:
def single_test(raw_data: pd.DataFrame, sample_size=None):
    normal_candles = raw_data.__copy__()
    ha = convert_data_to_heikin_ashi_processed(raw_data)
    normal_candles["Date"] = pd.to_datetime(normal_candles["unix"], unit="ms")
    normal_candles.columns = ['Unix', 'Close', 'High', 'Low', 'Open', 'Date']
    normal_candles.set_index('Date', inplace=True)
    combined_data=pd.concat([normal_candles, ha], axis=1).dropna()
    test_df = combined_data.loc[:,["Open", "Green"]]
    if sample_size !=None:
        test_df = test_df.iloc[len(test_df)-sample_size-1:]
    

    longs, shorts, outcome, profit, final_equity, equity_curve=backtest_heikin_ashi(test_df)
    fig = go.Figure(data=[go.Candlestick(x=combined_data.index,
                    open=combined_data['Open'],
                    high=combined_data['High'],
                    low=combined_data['Low'],
                    close=combined_data['Close'])])
    
    fig2 = go.Figure(data=[go.Candlestick(x=combined_data.index,
                    open=combined_data['ha_Open'],
                    high=combined_data['ha_High'],
                    low=combined_data['ha_Low'],
                    close=combined_data['ha_Close'])])
    # plt.vlines(longs, ymin=raw_data['low'].min(), ymax=raw_data['high'].max(), colors='green')
    # plt.vlines(shorts, ymin=raw_data['low'].min(), ymax=raw_data['high'].max(), colors='red')
    print(np.sum(outcome)/len(outcome))
    print(np.mean(profit))
    print(final_equity)
    return longs, shorts, outcome, profit, final_equity, equity_curve, fig, fig2

def single_test_lower_timeframe(raw_data: pd.DataFrame, no_consecutive, sample_size=None):
    normal_candles = raw_data.__copy__()
    ha = convert_data_to_heikin_ashi_processed(raw_data)
    normal_candles["Date"] = pd.to_datetime(normal_candles["unix"], unit="ms")
    normal_candles.columns = ['Unix', 'Close', 'High', 'Low', 'Open', 'Date']
    normal_candles.set_index('Date', inplace=True)
    combined_data=pd.concat([normal_candles, ha], axis=1).dropna()
    test_df = combined_data.loc[:,["Open", "Green"]]
    if sample_size !=None:
        test_df = test_df.iloc[len(test_df)-sample_size-1:]
    
    longs, shorts, outcome, profit, final_equity, equity_curve=backtest_heikin_ashi_lower_timeframe(test_df, no_consecutive)
    fig = go.Figure(data=[go.Candlestick(x=combined_data.index,
                    open=combined_data['Open'],
                    high=combined_data['High'],
                    low=combined_data['Low'],
                    close=combined_data['Close'])])
    
    fig2 = go.Figure(data=[go.Candlestick(x=combined_data.index,
                    open=combined_data['ha_Open'],
                    high=combined_data['ha_High'],
                    low=combined_data['ha_Low'],
                    close=combined_data['ha_Close'])])
    # plt.vlines(longs, ymin=raw_data['low'].min(), ymax=raw_data['high'].max(), colors='green')
    # plt.vlines(shorts, ymin=raw_data['low'].min(), ymax=raw_data['high'].max(), colors='red')
    print(np.sum(outcome)/len(outcome))
    print(np.mean(profit))
    print(final_equity)
    return longs, shorts, outcome, profit, final_equity, equity_curve, fig, fig2

    

def backtest_heikin_ashi(test_df): # raw data contains normal OHCL candles
    equity=1
    fee_rate=0.0006
    state='neutral'
    timestamps = test_df.index.date
    equity_record=[]
    longs=[]
    shorts=[]
    outcome=[]
    profit=[]
    sl=None
    entry=None
    equity_curve=[]
    print('Datapoints: ',len(test_df))
    for i in range(1,len(test_df)):

        time=timestamps[i]
        
        current=test_df.iloc[i]['Open'].item() # uses opens now as opens are confirmed
        green= test_df.iloc[i]["Green"]
        
        if green and state != 'long':
            #remember to calculate profit if flipping from short
            if state=='short':
                outcome.append(current<entry)
                profit.append(1-(current/entry))
                equity=equity*(1+(1-(current/entry))-fee_rate)
                # equity=(entry*trade_amount-current*trade_amount)+equity
            entry=current
            equity_curve.append([time,equity])
            state='long'
            longs.append(time)
        elif not(green) and state != 'short':
            if state=='long':
                outcome.append(current>entry)
                profit.append(current/entry -1)
                equity=equity*(1+(current/entry -1)-fee_rate)
            entry=current
            equity_curve.append([time,equity])    
            # trade_amount=equity*entry
            state='short'
            shorts.append(time)
            #print(state+' from $'+str(current))
        if equity < 0.05:
            equity=0
            print('went broke')
            break

        if i==len(test_df)-1:
            if state=='long':
                equity=equity*(current/entry)
            elif state=='short':
                equity=equity*(2-current/entry)
            equity_curve.append([time,equity])

    
    return longs,shorts,np.array(outcome), np.array(profit), equity, np.array(equity_curve)

def backtest_heikin_ashi_lower_timeframe(test_df, no_consecutive): # raw data contains normal OHCL candles
    equity=1
    fee_rate=0.0006
    state='neutral'
    timestamps = test_df.index.date
    equity_record=[]
    longs=[]
    shorts=[]
    outcome=[]
    profit=[]
    sl=None
    entry=None
    equity_curve=[]
    print('Datapoints: ',len(test_df))
    for i in range(1,len(test_df)):

        time=timestamps[i]
        
        current=test_df.iloc[i]['Open'].item() # uses opens now as opens are confirmed
        green= test_df.iloc[i-no_consecutive:i+1]["Green"].all()
        
        if green and state != 'long':
            #remember to calculate profit if flipping from short
            if state=='short':
                outcome.append(current<entry)
                profit.append(1-(current/entry))
                equity=equity*(1+(1-(current/entry))-fee_rate)
                # equity=(entry*trade_amount-current*trade_amount)+equity
            entry=current
            equity_curve.append([time,equity])
            state='long'
            longs.append(time)
        elif not(green) and state != 'short':
            if state=='long':
                outcome.append(current>entry)
                profit.append(current/entry -1)
                equity=equity*(1+(current/entry -1)-fee_rate)
            entry=current
            equity_curve.append([time,equity])    
            # trade_amount=equity*entry
            state='short'
            shorts.append(time)
            #print(state+' from $'+str(current))
        if equity < 0.05:
            equity=0
            print('went broke')
            break

        if i==len(test_df)-1:
            if state=='long':
                equity=equity*(current/entry)
            elif state=='short':
                equity=equity*(2-current/entry)
            equity_curve.append([time,equity])

    
    return longs,shorts,np.array(outcome), np.array(profit), equity, np.array(equity_curve)

In [15]:
longs, shorts, outcome, profit, final_equity, equity_curve, fig, fig2 = single_test(weekly_candles_original, 30)
# fig.add_vline(x=longs[0], line_width=3, line_dash="dash", line_color="green")
fig.show()
fig2.show()
print(equity_curve)

Datapoints:  31
0.6
0.11303322320124683
1.6230706715064223


[[datetime.date(2021, 8, 9) 1]
 [datetime.date(2021, 9, 20) 1.1039226797624184]
 [datetime.date(2021, 10, 11) 1.0747063132583718]
 [datetime.date(2021, 11, 22) 1.3405035548342852]
 [datetime.date(2022, 2, 14) 1.7765095113588418]
 [datetime.date(2022, 2, 21) 1.620413344409454]
 [datetime.date(2022, 2, 28) 1.6230706715064223]]


In [18]:
longs2, shorts2, outcome2, profit2, final_equity2, equity_curve2, fig, fig2 = single_test_lower_timeframe(eth, 1)

Datapoints:  2185
0.36
0.002319620180747548
0.06170522067980019


In [None]:
hourly = price.get_price_data('1h',symbol='ETH/USD' )
hourly
# longs, shorts, outcome, profit, final_equity, equity_curve, fig = single_test(hourly)

Unnamed: 0,unix,close,high,low,open
0,1640552400000,4085.1,4107.3,4044.2,4074.0
1,1640556000000,4084.6,4095.7,4079.5,4085.1
2,1640559600000,4066.2,4093.7,4061.2,4084.6
3,1640563200000,4063.3,4081.8,4059.8,4066.2
4,1640566800000,4077.0,4080.8,4049.5,4063.3
...,...,...,...,...,...
1496,1645938000000,2709.8,2724.5,2698.3,2706.9
1497,1645941600000,2733.5,2739.0,2709.3,2709.8
1498,1645945200000,2737.4,2745.4,2733.5,2733.5
1499,1645948800000,2743.4,2748.3,2723.5,2737.4
