In [81]:
#Import functions
import polars as pl
from functools import partial
from itertools import product, combinations
import multiprocessing
from numba import jit
import numpy as np
import pickle

In [82]:
#Load dataframe from CSV
currentDf=pl.read_csv("../Data/SP500/minuteHist2021/tradingHours/AAPL.csv", infer_schema_length=None)

In [83]:
#Get prices and timestamps from dataframe
openPrices=currentDf['open'].to_numpy()
closePrices=currentDf['close'].to_numpy()
timestamps=currentDf['time'].to_numpy()

In [84]:
#Join data into one 2d array
dataArray = np.dstack((openPrices,closePrices, timestamps))[0]
len(dataArray)

259972

In [295]:
#Algo
#Numba for speed
@jit(nopython=True, nogil=True)
def algo(paramsList, dataArray):
    fee=paramsList[0] #Percent change in price to look for
    feeType=paramsList[1] #The type of fee, 0 for percent, 1 for fixed
    timeout=int(paramsList[2]) #The timeout when it takes too long
    gap=int(paramsList[3]) #The gap between the two prices
    params=paramsList[4:]
    openPrices=dataArray[:,0]
    
    #params has an array of arrays with ticks before, change, version
    
    tradePercent=0
    totalReturn=1

    total=0

    start=60 #Start of data
    end=int(len(openPrices)-61) #End of data
    
    for j in range(start, end):
        #Check that its not within the first or last hour of trading
        if 0<dataArray[j][2]-dataArray[j-60][2]<4500000 and 0<dataArray[j+60][2]-dataArray[j][2]<4500000:

        #Check that its all within one day of trading
        #if 0<dataArray[j][2]-dataArray[j-(gap*len(params))][2]<(60000*gap*len(params)*1.25) and 0<dataArray[j+timeout][2]-dataArray[j][2]<(60000*timeout*1.25):

        #Check that its all within first or last hour of trading
        #if 4500000<dataArray[j][2]-dataArray[j-60][2] or 4500000<dataArray[j+60][2]-dataArray[j][2]:

            #Set start price and continue
            startPrice=dataArray[j][0]
            continueParams=True

            for k in range(len(params)):
                change = params[k]

                #Set price we are checking
                checkPrice=dataArray[j-(gap*(k+1))][0]

                #If price now is above price we are checking, then continue, 1 means a price increase
                if startPrice*change<checkPrice*change:
                    continueParams = False
                    break

            if continueParams:

                #Add to tally
                total+=1

                #Return on investment = new price / old price
                if feeType==0:
                    returnValue = (dataArray[j+timeout][0]*(1-fee))/dataArray[j][0]
                elif feeType==1:
                    returnValue = (dataArray[j+timeout][0]-fee)/dataArray[j][0]
                totalReturn*=returnValue
                tradePercent+=returnValue
        
    if total>0:
        tradePercent=tradePercent/total
        
    #Return the average trade percent, the total number of times, timeout, gap, and the parameters
    returnList = [tradePercent, totalReturn, total, timeout, gap] + [param for param in params]
    return returnList
    # print(tradePercent)
    # print(tradePerDay)

#algo with all the dataframes already passed through
partialAlgo = partial(algo, dataArray=dataArray)

In [296]:
#Test algo to compile it
algo(np.array([0.001, 0, 50.00, 1, 1, 1, 1]), dataArray)

[0.9990414901766157, 4.5215246688507535e-25, 57943.0, 50.0, 1.0, 1.0, 1.0, 1.0]

In [308]:
#Params
#rough commission fee
fee=[0.005]

feeType=[1] #0 for percent, 1 for fixed

#change
changeList=[1]

#timeout
timeoutList=[1,2,3,5,10,20,30,50]

#Gap between price points
gapList=[1,2,3,5]

#Up, down
upDown=[1, -1]

fullCombinations=list(product(fee, feeType, timeoutList, gapList, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown, upDown))

len(fullCombinations)

32768

In [298]:
#Run the algo over the full combinations list with multiprocessing

#List of results to store in right order
allResults=[]

#Start multiprocessing
with multiprocessing.Pool(15) as pool:
    for result in pool.map(
        partialAlgo,
        fullCombinations
    ):
        #Check if the total tally is high enough to be reasonable
        allResults.append(result)

In [299]:
#Save results in a pickle file
file_path = "../Results/sp500/V3/AAPL-minute2021-midday-33kParams-incFees.pkl"

# Save the allResults variable using pickle
with open(file_path, "wb") as file:
    pickle.dump(allResults, file)

In [310]:
#Function to sort the results
def getSortedResults(currentResults, minimum, maximum, n):
    results=[]

    for result in currentResults:
        if maximum>=result[2]>=minimum:
            results.append(result)

    sortedResults = sorted(results.copy(), reverse=True)
    sortedTotalResults = sorted(results.copy(), reverse=True, key=lambda x: x[1])
    resultsDf=pl.DataFrame(sortedResults[0:n], schema=["Return", "Total return", "Frequency", "Timeout", "Gap"] + [f"Param {i}" for i in range(1, len(sortedResults[0])-4)])
    resultsDfTotal=pl.DataFrame(sortedTotalResults[0:n], schema=["Return", "Total return", "Frequency", "Timeout", "Gap"] + [f"Param {i}" for i in range(1, len(sortedResults[0])-4)])
    return resultsDf, resultsDfTotal

In [311]:
#Get all day
file_path1 = "../Results/sp500/V3/AAPL-minute2021-allday-33kParams-incFees.pkl"

with open(file_path1, 'rb') as file:
    allResults1 = pickle.load(file)

sortedResults1, sortedTotalResults1=getSortedResults(allResults1, 100, 100000, 5)
sortedTotalResults1

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.000124,46.944432,33796.0,50.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000104,33.586937,37534.0,50.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000103,27.284332,35530.0,50.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000103,17.199888,30488.0,50.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000052,5.677716,37683.0,30.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [312]:
#Get mid day
file_path2 = "../Results/sp500/V3/AAPL-minute2021-midday-33kParams-incFees.pkl"
# Load data from pickle file
with open(file_path2, 'rb') as file:
    allResults2 = pickle.load(file)

sortedResults2, sortedTotalResults2=getSortedResults(allResults2, 100, 100000, 5)
sortedTotalResults2

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.00011,19.802931,29688.0,50.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000114,19.629677,28637.0,50.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000098,14.491792,30189.0,50.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.00009,12.115788,30930.0,50.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000047,3.344269,31310.0,50.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [313]:
#Get out day
file_path3 = "../Results/sp500/V3/AAPL-minute2021-outday-33kParams-incFees.pkl"
# Load data from pickle file
with open(file_path3, 'rb') as file:
    allResults3 = pickle.load(file)

sortedResults3, sortedTotalResults3=getSortedResults(allResults3, 100, 100000, 5)
sortedTotalResults3

Return,Total return,Frequency,Timeout,Gap,Param 1,Param 2,Param 3,Param 4,Param 5,Param 6,Param 7,Param 8,Param 9,Param 10
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.000315,70.556912,15480.0,50.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1.000197,10.221622,14897.0,50.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1.000208,9.574106,13810.0,50.0,2.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1.000165,8.987912,15480.0,30.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1.000189,7.266941,13653.0,50.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [300]:
print(partialAlgo(np.array([0.005, 1]+list(sortedTotalResults1[0,3:].rows()[0]))))

[1.000031287315712, 2.0994693587823123, 32110.0, 50.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
