<a href="https://colab.research.google.com/github/CookCL/stocks-data/blob/main/SPYGap.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [None]:
pip install polygon-api-client

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting polygon-api-client
  Downloading polygon_api_client-0.2.11-py3-none-any.whl (22 kB)
Collecting websockets>=8.0.2
  Downloading websockets-10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (112 kB)
[K     |████████████████████████████████| 112 kB 9.5 MB/s 
[?25hCollecting websocket-client>=0.56.0
  Downloading websocket_client-1.3.3-py3-none-any.whl (54 kB)
[K     |████████████████████████████████| 54 kB 2.9 MB/s 
Installing collected packages: websockets, websocket-client, polygon-api-client
Successfully installed polygon-api-client-0.2.11 websocket-client-1.3.3 websockets-10.3


In [None]:
import time
import datetime
import json
import os

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from polygon import RESTClient

# Data preparation

In [None]:

def polygonData(key:str,ticker:str,qty:int=1,period:str='day',start:str='2015-01-01',
                end:str='2022-06-01',freeVersion:bool=True) -> pd.DataFrame:

    '''
    Downloads price data of the specified ticker for the date range desired.

    Parameters
    ----------
    key: str
        String with the polygon API key.
    ticker: str
        String with the ticker to look for.
    qty: int
        Multiplier for the timeframe creation.
    period: str
        Timeframe period. May be 'minute', 'hour', 'day', etc. 
        For more look in polygon documentation.
    start: str
        Initial date for the data.
    end: str
        Final date for the data.
    freeVersion: bool
        True to wait 13 seconds on each call to not meet the free version 
        usage limits.
    
    Returns
    -------
    df: pd.DataFrame
        Contains the data as DataFrame.
    '''

    df = pd.DataFrame(columns=['Open', 'High', 'Low', 'Close','Volume','VWAP'])
    with RESTClient(key) as client:

        resp = client.stocks_equities_aggregates(ticker, qty, period, start, end, unadjusted=False, limit=50000)
        try:
            for result in resp.results:
                row = [result['o'],result['h'],result['l'],result['c'],result['v'],result['vw']]
                df.loc[datetime.datetime.fromtimestamp(result['t']/1000.0).strftime('%Y-%m-%d %H:%M:%S')] = row
        except:
            print(resp.status)

        if freeVersion:
            time.sleep(13)

    return df

In [None]:
polygon_key = 'your_polygon_key'
complete_df = pd.DataFrame()
error = False
end = datetime.date.today()
while not error:
    try:
        df = polygonData(polygon_key,'SPY',1,'day',(end - datetime.timedelta(days=2*365)).strftime('%Y-%m-%d'),end.strftime('%Y-%m-%d'),freeVersion=False)
        df.index = pd.to_datetime(df.index)
        end = (end - datetime.timedelta(days=(df.index[-1]-df.index[0]).days))
        error = True
    except Exception as e:
        print(e)
        error = True
df.to_csv('SPY1.csv')

## Feature engineering

In [None]:
spy = df
spy['%GapUp'] = np.where(spy['Open']/spy['Close'].shift(1) - 1 > 0,(spy['Open']/spy['Close'].shift(1) - 1)*100, float('nan'))
spy['%GapDn'] = np.where(spy['Open']/spy['Close'].shift(1) - 1 < 0,(spy['Open']/spy['Close'].shift(1) - 1)*100, float('nan'))

follow = []
pos = 0
for i in spy.index:
    candle = spy.loc[i]
    # Exit logic
    if pos == 1 and candle['Close'] < candle['Open']:
        pos = 0
    elif pos == -1 and candle['Close'] > candle['Open']:
        pos = 0
    # Entry logic
    if candle['%GapUp'] > 0.01:
        pos = 1
    elif candle['%GapDn'] < -0.01:
        pos = -1

    follow.append(pos)
    
spy['Follow'] = follow

a = spy['Follow'] > 0
spy['FollowUp'] = a.cumsum()-a.cumsum().where(~a).ffill().fillna(0).astype(int)

a = spy['Follow'] < 0
spy['FollowDn'] = a.cumsum()-a.cumsum().where(~a).ffill().fillna(0).astype(int)

avg_follow_up = spy['FollowUp'][(spy['FollowUp'] != 0) & (spy['FollowUp'].shift(-1) == 0)].mean()
avg_follow_dn = spy['FollowDn'][(spy['FollowDn'] != 0) & (spy['FollowDn'].shift(-1) == 0)].mean()

print(f'''Avg. Follow Up: {avg_follow_up}
Avg. Follow Down: {avg_follow_dn}''')
spy

Avg. Follow Up: 2.236220472440945
Avg. Follow Down: 1.6904761904761905


Unnamed: 0,Open,High,Low,Close,Volume,VWAP,%GapUp,%GapDn,Follow,FollowUp,FollowDn
2020-07-21 04:00:00,326.45,326.9300,323.940,325.01,57691215.0,325.6259,,,0,0,0
2020-07-22 04:00:00,324.62,327.2000,324.500,326.86,57917101.0,325.8703,,-0.119996,-1,0,1
2020-07-23 04:00:00,326.47,327.2300,321.480,322.96,75841843.0,324.3429,,-0.119317,-1,0,2
2020-07-24 04:00:00,320.95,321.9900,319.246,320.88,72946759.0,320.9104,,-0.622368,-1,0,3
2020-07-27 04:00:00,321.63,323.4100,320.775,323.22,48212747.0,322.3792,0.233732,,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...
2022-07-14 04:00:00,373.61,379.0498,371.040,377.91,89931568.0,375.5888,,-1.377927,-1,0,5
2022-07-15 04:00:00,382.55,385.2500,380.540,385.13,79637368.0,383.3361,1.227806,,1,1,0
2022-07-18 04:00:00,388.38,389.0900,380.660,381.95,63245022.0,385.0900,0.843871,,1,2,0
2022-07-19 04:00:00,386.08,392.8700,385.390,392.27,78528370.0,389.3655,1.081293,,1,3,0


# Manual Strategy testing

In [None]:
sell = False
gap = 0.5
avg_follow_up = 1

## Continuation pattern

In [None]:
cont = spy.copy()
cont['Signal'] = np.where(cont['%GapUp'] > gap, 1, 
                 np.where((cont['%GapDn'] < -gap) & sell,-1,0))
cont['Change'] = (cont['Close'] - cont['Open'])/cont['Open']
cont['PosChange'] = np.where(cont['Signal'] != 0 ,cont['Change']+cont['Change'].shift(-int(avg_follow_up)), 0)
cont

## Reversion pattern

In [None]:
rev = spy.copy()
rev['Signal'] = np.where(rev['%GapDn'] < -gap, 1, 
                np.where((rev['%GapUp'] > gap) & sell,-1,0))
rev['Change'] = (rev['Close'] - rev['Open'])/rev['Open']
rev['PosChange'] = np.where(rev['Signal'] != 0 ,rev['Change']+rev['Change'].shift(-int(avg_follow_up)), 0)
rev

## Analysis

In [None]:
trades = {
    'Reversion': rev[rev['Signal'] != 0],
    'Continuation': cont[cont['Signal'] != 0]
}

for t in trades:
    temp = trades[t]
    n_win = len(temp[(temp['PosChange']*temp['Signal'] > 0)])
    n_loss = len(temp[(temp['PosChange']*temp['Signal'] < 0)])
    n_total = len(temp[(temp['PosChange']*temp['Signal'] != 0)])
    avg_win = temp['PosChange'][(temp['PosChange']*temp['Signal'] > 0)].mean()
    avg_loss = temp['PosChange'][(temp['PosChange']*temp['Signal'] < 0)].mean()
    winrate = n_win/(n_total)
    spec = (avg_win*winrate-(1-winrate)*abs(avg_loss))
    freq = n_total / (datetime.datetime.strptime(spy.index[-1],'%Y-%m-%d %H:%M:%S') \
                      - datetime.datetime.strptime(spy.index[0],'%Y-%m-%d %H:%M:%S')).days
    print(t+'--------------------------------------------------------------------------')
    print('Avg. move: ',temp['PosChange'].mean()*1000000//1/10000,'%')
    print('#Win: ',n_win)
    print('Avg. win: ',avg_win*1000000//1/10000,'%')
    print('#Loss: ',n_loss)
    print('Avg. loss: ',avg_loss*1000000//1/10000,'%')
    print('Winrate: ',winrate*10000//1/100,'%')
    print('Trade Expectancy: ',spec*1000000//1/10000, '%')
    print('Monthly Expectancy: ',((1+spec)**(20*freq) - 1)*1000000//1/10000, '%')
    print('Yearly Expectancy: ',((1+spec)**(5*52*freq) - 1)*1000000//1/10000, '%')
    print('')

# Parameters testing

In [None]:
def signals(df:pd.DataFrame,strat:str='continuation',gap:float=0.0,hold:int=1,
            sell:bool=True,overnight:bool=False) -> pd.DataFrame:

    '''
    Generates signals and returns.

    Parameters
    ----------
    df: pd.DataFrame
        DataFrame containing all the data and needed features.
    strat: str
        String with the strategy for the signals.
    gap: float
        Gap Percentage to look for.
    hold: int
        Number of days to hold the position.
    sell: bool
        False to go only long.
    overnight: bool
        True to hold positions overnight.
    
    Returns
    -------
    df: pd.DataFrame
        Contains the input data as DataFrame and the signals and position change.
    '''

    if strat in ['Reversion','reversion']:
        df['Signal'] = np.where(df['%GapDn'] < -gap, 1, 
                        np.where((df['%GapUp'] > gap) & sell,-1,0))
    elif strat in ['Continuation','continuation']:
        df['Signal'] = np.where(df['%GapUp'] > gap, 1, 
                        np.where((df['%GapDn'] < -gap) & sell,-1,0))
    else:
        raise ValueError('No strategy with that name. Must be: continuation or reversion.')

    if overnight:
        df['PosChange'] = np.where(df['Signal'] != 0 ,(df['Close'].shift(1-int(hold))-df['Open'])/df['Open'], 0)
    else:
        df['Change'] = (df['Close'] - df['Open'])/df['Open']

        price_change = df['Change'].tolist()
        signal = df['Signal'].tolist()
        pos_change = []
        for i in range(len(price_change)):
            if signal[i] != 0:
                pos_change.append(np.prod([j+1 for j in price_change[i:i+hold]]) - 1)
            else:
                pos_change.append(0.0)

        df['PosChange'] = pos_change
    
    return df

def stats(df:pd.DataFrame,trades:pd.DataFrame,show:bool=True) -> dict:

    '''
    Generates trading statistics.

    Parameters
    ----------
    df: pd.DataFrame
        DataFrame containing all the data and needed features.
    trades: str
        DataFrame containing only the trades.
    show: bool
        True to print the statistics.
    
    Returns
    -------
    stats: dict
        Contains the input data as DataFrame and the signals and position change.
    '''


    n_win = len(trades[(trades['PosChange']*trades['Signal'] > 0)])
    n_loss = len(trades[(trades['PosChange']*trades['Signal'] < 0)])
    n_total = len(trades[(trades['PosChange']*trades['Signal'] != 0)])
    avg_win = abs(trades['PosChange'][(trades['PosChange']*trades['Signal'] > 0)]).mean()
    avg_loss = abs(trades['PosChange'][(trades['PosChange']*trades['Signal'] < 0)]).mean()
    winrate = n_win/(n_total)
    expec = (avg_win*winrate-(1-winrate)*abs(avg_loss))
    freq = n_total / (df.index[-1].to_pydatetime() \
                      - df.index[0].to_pydatetime()).days

    stats_dict = {'Winrate':winrate, '#Wins':n_win, '#Loss':n_loss, '#Trades':n_total, 
                  'AvgWin':avg_win,'AvgLoss':avg_loss,'RR':abs(avg_win/avg_loss),
                  'Kelly':expec/avg_win,'Expectancy':expec,'Frequency':freq,
                  'MonthExp':((1+expec)**(20*freq)-1),'YearExp':((1+expec)**(5*52*freq)-1)}

    if show:
        print('Avg. move: ',trades['PosChange'].mean()*1000000//1/10000,'%')
        print('#Win: ',n_win,' #Loss: ',n_loss,' Winrate: ',winrate*10000//1/100,'%')
        print('Avg. win: ',avg_win*1000000//1/10000,'%',' Avg. loss: ',avg_loss*1000000//1/10000,'%')
        print('Trade Expectancy: ',expec*1000000//1/10000, '%')
        print('Monthly Expectancy: ',((1+expec)**(20*freq) - 1)*1000000//1/10000, '%')
        print('Yearly Expectancy: ',((1+expec)**(5*52*freq) - 1)*1000000//1/10000, '%')
        print('')

    return stats_dict

def surfacePlot(df:pd.DataFrame):

    '''
    Plots surface to compare the tested parameters.

    Parameters
    ----------
    df: pd.DataFrame
        DataFrame containing all the data and needed features.
    '''

    print(df[0.4].iloc[1])
    fig = go.Figure(data=[go.Surface(x=df.index,y=df.columns,z=df.values)])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True, highlightcolor="limegreen", project_z=True),
                        contours_y=dict(show=True, usecolormap=True, highlightcolor="limegreen", project_y=True),
                        contours_x=dict(show=True, usecolormap=True, highlightcolor="limegreen", project_x=True))
    fig.update_layout(title=f'{s} Anual expectancy', autosize=False,
                        width=1000,
                        height=700,
                        scene = dict(
                            xaxis_title='Hold Candles',
                            yaxis_title='Gap (%)',
                            zaxis_title='Return (%)'),
        )

    fig.show()


In [None]:
strats = {'Reversion':{},'Continuation':{}}
test = {}
gap = [i/100 for i in range(0,200+10,10)]
hold = range(1,20+1,1)
for s in strats:
    test[s] = {}
    for g in gap:
        test[s][g] = {}
        for h in hold:
            strats[s][str(g)+' '+str(h)] = {}
            #print(s+f'(gap={g},hold={h})--------------------------------------------------------------------------')
            data_raw = signals(spy,strat=s,gap=g,hold=h,sell=True,overnight=True)
            strats[s][str(g)+' '+str(h)]['Data'] = data_raw
            strats[s][str(g)+' '+str(h)]['Trades'] = data_raw[ data_raw['Signal'] != 0 ]
            strats[s][str(g)+' '+str(h)]['Stats'] = stats(spy,data_raw,show=False)
            test[s][g][h] = strats[s][str(g)+' '+str(h)]['Stats']['YearExp']*10000//1/100
    test[s] = pd.DataFrame(test[s])
    surfacePlot(test[s])

-6.19


6.58


In [None]:
strats['Continuation']['0.4 2']['Stats']

{'#Loss': 102,
 '#Trades': 224,
 '#Wins': 122,
 'AvgLoss': 0.013604914027515643,
 'AvgWin': 0.012841156542343399,
 'Expectancy': 0.0007987494078540115,
 'Frequency': 0.30727023319615915,
 'Kelly': 0.06220229503628859,
 'MonthExp': 0.004918736455635342,
 'RR': 0.9438616456063182,
 'Winrate': 0.5446428571428571,
 'YearExp': 0.06586516077966964}