In [1]:
#Import functions
import polars as pl
from functools import partial
from itertools import product, combinations
import multiprocessing
from numba import jit
import numpy as np
import pickle

In [2]:
#Load dataframe from CSV
currentDf=pl.read_csv("../Data/SP500/minuteHist2021/tradingHours/AAPL.csv", infer_schema_length=None)

In [3]:
#Get prices and timestamps from dataframe
openPrices=currentDf['open'].to_numpy()
closePrices=currentDf['close'].to_numpy()
timestamps=currentDf['time'].to_numpy()

In [4]:
#Join data into one 2d array
dataArray = np.dstack((openPrices,closePrices, timestamps))[0]
len(dataArray)

259972

In [17]:
#Algo
#Numba for speed
@jit(nopython=True, nogil=True)
def algo(paramsList, dataArray):
    fee=paramsList[0] #Percent change in price to look for
    feeType=paramsList[1] #The type of fee, 0 for percent, 1 for fixed
    timeout=int(paramsList[2]) #The timeout when it takes too long
    gap=int(paramsList[3]) #The gap between the two prices
    params=paramsList[4:]
    openPrices=dataArray[:,0]
    
    #params has an array of arrays with ticks before, change, version
    
    tradePercent=0
    totalReturn=1

    total=0

    start=60 #Start of data
    end=int(len(openPrices)-61) #End of data
    
    for j in range(start, end):
        #Check that its not within the first or last hour of trading
        #if 0<dataArray[j][2]-dataArray[j-60][2]<4500000 and 0<dataArray[j+60][2]-dataArray[j][2]<4500000:

        #Check that its all within one day of trading
        #if 0<dataArray[j][2]-dataArray[j-(gap*len(params))][2]<(60000*gap*len(params)*1.25) and 0<dataArray[j+timeout][2]-dataArray[j][2]<(60000*timeout*1.25):

        #Check that its all within first or last hour of trading
        if 4500000<dataArray[j][2]-dataArray[j-60][2] or 4500000<dataArray[j+60][2]-dataArray[j][2]:

            #Set start price and continue
            continueParams=True

            for k in range(len(params)):
                change = params[k]

                #Set price we are checking
                afterPrice=dataArray[j-(gap*(k))][0]
                beforePrice=dataArray[j-(gap*(k+1))][0]

                #If price now is above price we are checking, then continue, 1 means a price increase
                if afterPrice*change<beforePrice*change:
                    continueParams = False
                    break

            if continueParams:

                #Add to tally
                total+=1

                #Return on investment = new price / old price
                if feeType==0:
                    returnValue = (dataArray[j+timeout][0]*(1-fee))/dataArray[j][0]
                elif feeType==1:
                    returnValue = (dataArray[j+timeout][0]-fee)/dataArray[j][0]
                totalReturn*=returnValue
                tradePercent+=returnValue
        
    if total>0:
        tradePercent=tradePercent/total
        
    #Return the average trade percent, the total number of times, timeout, gap, and the parameters
    returnList = [tradePercent, totalReturn, total, timeout, gap] + [param for param in params]
    return returnList
    # print(tradePercent)
    # print(tradePerDay)

#algo with all the dataframes already passed through
partialAlgo = partial(algo, dataArray=dataArray)

In [18]:
#Test algo to compile it
algo(np.array([0.001, 0, 50.00, 1, 1, 1, 1]), dataArray)

[0.9992054398894269, 0.0001546979225064843, 10492.0, 50.0, 1.0, 1.0, 1.0, 1.0]

In [19]:
#Params
#rough commission fee
fee=[0.005]

feeType=[1] #0 for percent, 1 for fixed

#change
changeList=[1]

#timeout
timeoutList=[1,2,3,5,10,20,30,50]

#Gap between price points
gapList=[1,2,3,5]

#Up, down
upDown=[1, -1]

fullCombinations=list(product(fee, feeType, timeoutList, gapList, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown))

len(fullCombinations)

32768

In [20]:
#Run the algo over the full combinations list with multiprocessing

#List of results to store in right order
allResults=[]

#Start multiprocessing
with multiprocessing.Pool(15) as pool:
    for result in pool.map(
        partialAlgo,
        fullCombinations
    ):
        #Check if the total tally is high enough to be reasonable
        allResults.append(result)

In [21]:
#Save results in a pickle file
file_path = "../Results/sp500/V4/AAPL-minute2021-outday-33kParams-incFees.pkl"

# Save the allResults variable using pickle
with open(file_path, "wb") as file:
    pickle.dump(allResults, file)

In [22]:
#Function to sort the results
def getSortedResults(currentResults, minimum, maximum, n):
    results=[]

    for result in currentResults:
        if maximum>=result[2]>=minimum:
            results.append(result)

    sortedResults = sorted(results.copy(), reverse=True)
    sortedTotalResults = sorted(results.copy(), reverse=True, key=lambda x: x[1])
    resultsDf=pl.DataFrame(sortedResults[0:n], schema=["Return", "Total return", "Frequency", "Timeout", "Gap"] + [f"Param {i}" for i in range(1, len(sortedResults[0])-4)])
    resultsDfTotal=pl.DataFrame(sortedTotalResults[0:n], schema=["Return", "Total return", "Frequency", "Timeout", "Gap"] + [f"Param {i}" for i in range(1, len(sortedResults[0])-4)])
    return resultsDf, resultsDfTotal

In [26]:
#Get all day
file_path1 = "../Results/sp500/V4/AAPL-minute2021-allday-33kParams-incFees.pkl"

with open(file_path1, 'rb') as file:
    allResults1 = pickle.load(file)

sortedResults1, sortedTotalResults1=getSortedResults(allResults1, 100, 100000, 5)
sortedTotalResults1

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.001253,1.260324,186.0,50.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0
1.001217,1.257932,190.0,50.0,2.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
1.000895,1.236272,239.0,30.0,2.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0
1.000941,1.233656,225.0,50.0,5.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,1.0
1.000699,1.218976,287.0,50.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,1.0,-1.0,1.0,-1.0


In [27]:
#Get mid day
file_path2 = "../Results/sp500/V4/AAPL-minute2021-midday-33kParams-incFees.pkl"
# Load data from pickle file
with open(file_path2, 'rb') as file:
    allResults2 = pickle.load(file)

sortedResults2, sortedTotalResults2=getSortedResults(allResults2, 100, 100000, 5)
sortedTotalResults2

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.001353,1.263721,174.0,50.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0
1.001313,1.230331,159.0,50.0,2.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
1.000863,1.220146,233.0,50.0,5.0,1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0
1.000988,1.22012,203.0,50.0,5.0,1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,-1.0
1.000905,1.21332,217.0,50.0,1.0,-1.0,1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0


In [28]:
#Get out day
file_path3 = "../Results/sp500/V4/AAPL-minute2021-outday-33kParams-incFees.pkl"
# Load data from pickle file
with open(file_path3, 'rb') as file:
    allResults3 = pickle.load(file)

sortedResults3, sortedTotalResults3=getSortedResults(allResults3, 100, 100000, 5)
sortedTotalResults3

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.00299,1.398995,114.0,50.0,5.0,1.0,-1.0,1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0
1.003175,1.364268,100.0,50.0,3.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0
1.002567,1.353184,120.0,50.0,2.0,-1.0,-1.0,-1.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0
1.002911,1.340924,103.0,50.0,1.0,-1.0,1.0,-1.0,-1.0,-1.0,1.0,1.0,1.0,-1.0,-1.0
1.002713,1.303919,100.0,50.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,1.0,-1.0,-1.0


In [300]:
print(partialAlgo(np.array([0.005, 1]+list(sortedTotalResults1[0,3:].rows()[0]))))

[1.000031287315712, 2.0994693587823123, 32110.0, 50.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]


In [323]:
sortedResults3, sortedTotalResults3=getSortedResults(allResults, 100, 100000, 5)
sortedResults3

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.001026,1.124877,116.0,50.0,2.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000947,1.116499,117.0,30.0,5.0,1.0,-1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0
1.000932,1.0997,103.0,50.0,1.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,1.0,-1.0
1.000898,1.114422,122.0,50.0,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0
1.000881,1.104694,114.0,50.0,1.0,1.0,1.0,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0
