In [4]:
from sklearn.neighbors import NearestNeighbors
import multiprocessing as mp
import empyrical
import portfolioGeneration
import pandas as pd
import numpy as np
import dataAck
import portfolio
from sklearn.preprocessing import MinMaxScaler


import params
from google.cloud import datastore, storage, logging
import time
import pickle
import hashlib
import sys

In [None]:
class TradeableCurvePredictor:
    def __init__(self, inputSeries, targetTicker, lookbackDistance, predictionDistance, radius, minConfidence, minNeighbors, tradeableMinReturn):
        self.parallelism = 16
        self.inputSeries = inputSeries
        self.targetTicker = targetTicker
        self.lookbackDistance = lookbackDistance
        self.predictionDistance = predictionDistance
        self.radius = radius
        self.minConfidence = minConfidence
        self.minNeighbors = minNeighbors
        self.tradeableMinReturn = tradeableMinReturn
    
    def describe(self):
        return (self.inputSeries.describe(), self.targetTicker, self.lookbackDistance, self.predictionDistance, self.radius, self.minConfidence, self.minNeighbors, self.tradeableMinReturn)

    def getHash(self):
        return hashlib.sha224(str(self.describe()).encode('utf-8')).hexdigest()

    def getReverseHash(self):
        return self.getHash()

    def getAllHashes(self):
        return [self.getHash()]

    def formUploadDictionary(self):
        toUpload = {}
        toUpload["ticker"] = self.targetTicker
        toUpload["predictionLength"] = self.predictionDistance
        toUpload["lookbackDistance"] = self.lookbackDistance
        toUpload["radius"] = self.radius
        toUpload["minConfidence"] = self.minConfidence
        toUpload["minNeighbors"] = self.minNeighbors
        toUpload["series"] = str(self.inputSeries.describe())
        toUpload["tradeableMinReturn"] = self.tradeableMinReturn
        return toUpload

    def numberOfPredictors(self):
        return 1

    def returnAllTickersInvolved(self):
        return [self.targetTicker, self.inputSeries.ticker]



    @staticmethod
    def ensureNoShifts(nearestIndicies):
        breadth = 5
        keptIndicies = []
        for item in nearestIndicies:
            k = item-breadth
            shouldAdd = True
            while k < item + breadth:
                if k in keptIndicies:
                    shouldAdd = False
                    break
                k += 1
            if shouldAdd == True:
                keptIndicies.append(item)
        return keptIndicies
    
    def generateWindowReturn(self, predictions, short):
        if predictions[0] > 0: ##OTHERWISE INVALID
            return predictions[2] if short == False else -predictions[2]
        return 0.0 ##NO POSITION TAKEN
    
    @staticmethod
    def enforceBuyPreference(returns):
        ##ASSUME SHORT ON LEFT
        if returns[0] != 0 and returns[1] != 0:
            return returns[1]
        elif returns[0] == 0 and returns[1] != 0:
            return returns[1]
        elif returns[0] != 0 and returns[1] == 0:
            return returns[0]
        else:
            return 0.0
    
    def generateWindows(self, joinedData):
        transformedSeries = pd.DataFrame(self.inputSeries.transformJoinedData(joinedData))
        transformedSeries.columns = ["INPUT"]
        targetSeries = pd.DataFrame(dataAck.createPriceSeries(joinedData, self.targetTicker))
        targetSeries.columns = ["OUTPUT"]
        
        joinedTrain = targetSeries.join(transformedSeries).dropna() ##DONE TO ENSURE SAME INDEXES
        
        inputData = joinedTrain[["INPUT"]].values
        outputData = joinedTrain[["OUTPUT"]].values
        outputDays = joinedTrain[["OUTPUT"]].index
        
        xVals = []
        yValsShort = []
        yValsLong = []
        yVals = []
        yIndex = []
        for i in range(len(inputData) - self.lookbackDistance - self.lookbackDistance - self.predictionDistance):
            xVals.append(MinMaxScaler().fit_transform(inputData[i:i+self.lookbackDistance]).flatten())
            ##SKIP LOOKBACK DISTANCE * 2 TO AVOID ANY OVERLAP WITH ANYTHING IN TRAINING
            targetArr = outputData[i+self.lookbackDistance + self.lookbackDistance:i+self.lookbackDistance+self.lookbackDistance+self.predictionDistance]
            daysArr = outputDays[i+self.lookbackDistance + self.lookbackDistance:i+self.lookbackDistance+self.lookbackDistance+self.predictionDistance]
            
            
            ##SET yVAL FOR BOTH SHORT AND LONG TRADEABLE OPPORTUNITY
            yValsShort.append(abs(min([(item - targetArr[0])/targetArr[0] for item in targetArr]))[0] if abs(min([(item - targetArr[0])/targetArr[0] for item in targetArr]))[0] > self.tradeableMinReturn else 0.0)
            yValsLong.append(abs(max([(item - targetArr[0])/targetArr[0] for item in targetArr]))[0] if abs(max([(item - targetArr[0])/targetArr[0] for item in targetArr]))[0] > self.tradeableMinReturn else 0.0)
            yVals.append((targetArr[-1] - targetArr[0])/targetArr[0])
            yIndex.append(daysArr[0])
        return xVals, yValsShort, yValsLong, yVals, yIndex, MinMaxScaler().fit_transform(transformedSeries[-self.lookbackDistance:]).flatten()
    
    def runDay(self, xVals, yValsShort, yValsLong, xTarget, identifier=None, sharedDict=None):
        
        
        nn = NearestNeighbors(p=2, n_jobs = 1)
        nn.fit(xVals)
        closest = nn.radius_neighbors([xTarget], self.radius)
        keptNeighbors = TradeableCurvePredictor.ensureNoShifts(closest[1][0])
        shortPred = 0.0
        longPred = 0.0
        if len(keptNeighbors) > self.minNeighbors:
            shortPredictions = []
            for sampleIndex in keptNeighbors:
                shortPredictions.append(yValsShort[sampleIndex])
            shortPredictions = np.array(shortPredictions)

            shortPred = len(shortPredictions[shortPredictions > 0])/float(len(shortPredictions))
            
            longPredictions = []
            for sampleIndex in keptNeighbors:
                longPredictions.append(yValsLong[sampleIndex])
            longPredictions = np.array(longPredictions)

            longPred = len(longPredictions[longPredictions > 0])/float(len(longPredictions))
        
        if shortPred < self.minConfidence:
            shortPred = 0.0
         
        if longPred < self.minConfidence:
            longPred = 0.0
        
        toRet = {
            "short":shortPred,
            "long":longPred
        }
        if identifier is not None:
            sharedDict[identifier] = toRet
        else:
            return toRet
    
    def runDayChunking(self, xVals, yValsShort, yValsLong, identifiers, sharedDict, k):
        j= 0
        for i in identifiers:
            pred = self.runDay(xVals[:int(i)],  yValsShort[:int(i)], yValsLong[:int(i)], xVals[int(i)+44])
            sharedDict[str(i)] = pred
            j += 1
            if j % 30 == 0:
                print("THREAD ", k, "PROGRESS:", j/len(identifiers))
        
    
    def runModelsChunksSkipMP(self, dataOfInterest, daysToCheck = None, earlyStop=False):
        xVals, yValsShort, yValsLong, yVals, yIndex, xToday = self.generateWindows(dataOfInterest)
        mpEngine = mp.get_context('fork')
        with mpEngine.Manager() as manager:
            returnDict = manager.dict()
            
            identifiersToCheck = []
            
            for i in range(len(xVals) - 44): ##44 is lag...should not overlap with any other predictions or will ruin validity of walkforward optimization
                if i < 600:
                    ##MIN TRAINING
                    continue
                identifiersToCheck.append(str(i))
                
            if daysToCheck is not None:
                identifiersToCheck = identifiersToCheck[-daysToCheck:]


            ##FIRST CHECK FIRST 500 IDENTIFIERS AND THEN IF GOOD CONTINUE
            

            identifierWindows = [identifiersToCheck[:252], identifiersToCheck[252:600], identifiersToCheck[600:900], identifiersToCheck[900:1200], identifiersToCheck[1200:]] ##EXACTLY TWO YEARS
            if earlyStop == False:
                identifierWindows = [identifiersToCheck]
            
            returnStream = None
            shortSeen = 0 if earlyStop == True else -1
            for clippedIdentifiers in identifierWindows:
                
                splitIdentifiers = np.array_split(np.array(clippedIdentifiers), 16)
                
                
                runningP = []
                k = 0
                for identifiers in splitIdentifiers:
                    p = mpEngine.Process(target=TradeableCurvePredictor.runDayChunking, args=(self, xVals,  yValsShort, yValsLong, identifiers, returnDict,k))
                    p.start()
                    runningP.append(p)
                    
                    k += 1
                    

                while len(runningP) > 0:
                    newP = []
                    for p in runningP:
                        if p.is_alive() == True:
                            newP.append(p)
                        else:
                            p.join()
                    runningP = newP
                    
                
                preds = []
                actualShort = []
                actualLong = []
                actual = []
                days = []
                for i in clippedIdentifiers:
                    preds.append(returnDict[i])
                    actualShort.append(yValsShort[int(i) + 44])
                    actualLong.append(yValsLong[int(i) + 44])
                    actual.append(yVals[int(i) + 44])
                    days.append(yIndex[int(i) + 44])
                
                ##CREATE ACCURATE BLENDING ACROSS DAYS
                predsTable = pd.DataFrame(preds, index=days)
                
                shortTable = predsTable[["short"]]\
                        .join(pd.DataFrame(actualShort, index=days, columns=["actual short"]))\
                        .join(pd.DataFrame(actual, index=days, columns=["actual"]))
                        
                shortReturns = pd.DataFrame(shortTable.apply(lambda x:self.generateWindowReturn(x, short=True), axis=1, raw=True), columns=["short return"])
                
                
                longTable = predsTable[["long"]]\
                        .join(pd.DataFrame(actualLong, index=days, columns=["actual long"]))\
                        .join(pd.DataFrame(actual, index=days, columns=["actual"]))
                
                longReturns = pd.DataFrame(longTable.apply(lambda x:self.generateWindowReturn(x, short=False), axis=1, raw=True), columns=["long return"])
                
                ##OPT FOR BUY RETURN IF BOTH RETURNS VALID
                totalReturn = pd.DataFrame(shortReturns.join(longReturns).apply(lambda x: TradeableCurvePredictor.enforceBuyPreference(x), axis=1, raw=True), columns=["Total Return"])
                if returnStream is None:
                    returnStream = totalReturn
                else:
                    returnStream = pd.concat([returnStream, totalReturn])
                
                print(empyrical.cum_returns(returnStream.values)[-1][0])
                if empyrical.cum_returns(returnStream.values)[-1][0] <= 0:
                    return
                
#                 i = 1
#                 tablesToJoin = []
#                 while i < self.predictionDistance:
#                     thisTable = predsTable.shift(i)
#                     thisTable.columns = ["Predictions_" + str(i)]
#                     tablesToJoin.append(thisTable)
#                     i += 1
                
#                 predsTable = predsTable.join(tablesToJoin)
                
#                 transformedPreds = pd.DataFrame(predsTable.apply(lambda x:dataAck.computePosition(x), axis=1), columns=["Predictions"]).dropna()
#                 dailyFactorReturn = dataAck.getDailyFactorReturn(self.targetTicker, dataOfInterest)
#                 transformedPreds = transformedPreds.join(dailyFactorReturn).dropna()
#                 returnStream = pd.DataFrame(transformedPreds.apply(lambda x:x[0] * x[1], axis=1), columns=["Algo Return"]) if returnStream is None else pd.concat([returnStream, pd.DataFrame(transformedPreds.apply(lambda x:x[0] * x[1], axis=1), columns=["Algo Return"])])
#                 factorReturn = pd.DataFrame(transformedPreds[["Factor Return"]]) if factorReturn is None else pd.concat([factorReturn, pd.DataFrame(transformedPreds[["Factor Return"]])])
#                 predictions = pd.DataFrame(transformedPreds[["Predictions"]]) if predictions is None else pd.concat([predictions, pd.DataFrame(transformedPreds[["Predictions"]])])

#                 alpha, beta = empyrical.alpha_beta(returnStream, factorReturn)
#                 activity = np.count_nonzero(returnStream)/float(len(returnStream))
#                 rawBeta = abs(empyrical.alpha_beta(returnStream.apply(lambda x:dataAck.applyBinary(x), axis=0), factorReturn.apply(lambda x:dataAck.applyBinary(x), axis=0))[1])
#                 shortSharpe = empyrical.sharpe_ratio(returnStream)
#                 activity = np.count_nonzero(returnStream)/float(len(returnStream))
#                 algoAnnualReturn = empyrical.annual_return(returnStream.values)[0]
#                 algoVol = empyrical.annual_volatility(returnStream.values)
#                 factorAnnualReturn = empyrical.annual_return(factorReturn.values)[0]
#                 factorVol = empyrical.annual_volatility(factorReturn.values)
#                 treynor = ((empyrical.annual_return(returnStream.values)[0] - empyrical.annual_return(factorReturn.values)[0]) \
#                            / abs(empyrical.beta(returnStream, factorReturn)))
#                 sharpeDiff = empyrical.sharpe_ratio(returnStream) - empyrical.sharpe_ratio(factorReturn)
#                 relativeSharpe = sharpeDiff / empyrical.sharpe_ratio(factorReturn) * (empyrical.sharpe_ratio(factorReturn)/abs(empyrical.sharpe_ratio(factorReturn)))
#                 stability = empyrical.stability_of_timeseries(returnStream)

#                 ##CALCULATE SHARPE WITH SLIPPAGE
#                 estimatedSlippageLoss = portfolioGeneration.estimateTransactionCost(predictions)
#                 estimatedSlippageLoss.columns = returnStream.columns
#                 slippageAdjustedReturn = (returnStream - estimatedSlippageLoss).dropna()
#                 slippageSharpe = empyrical.sharpe_ratio(slippageAdjustedReturn)
#                 sharpeDiffSlippage = empyrical.sharpe_ratio(slippageAdjustedReturn) - empyrical.sharpe_ratio(factorReturn)
#                 relativeSharpeSlippage = sharpeDiffSlippage / empyrical.sharpe_ratio(factorReturn) * (empyrical.sharpe_ratio(factorReturn)/abs(empyrical.sharpe_ratio(factorReturn)))
                
#                 if np.isnan(shortSharpe) == True:
#                     return None, {"sharpe":shortSharpe}, None, None, None

#                 elif (empyrical.sharpe_ratio(returnStream) < 0.0  or activity < 0.3 or abs(rawBeta) > 0.6 or stability < 0.3) and shortSeen == 0:
#                     return None, {
#                             "sharpe":shortSharpe, ##OVERLOADED IN FAIL
#                             "activity":activity,
#                             "factorSharpe":empyrical.sharpe_ratio(factorReturn),
#                             "sharpeSlippage":slippageSharpe,
#                             "beta":abs(beta),
#                             "alpha":alpha,
#                             "activity":activity,
#                             "treynor":treynor,
#                             "period":"first 252 days",
#                             "algoReturn":algoAnnualReturn,
#                             "algoVol":algoVol,
#                             "factorReturn":factorAnnualReturn,
#                             "factorVol":factorVol,
#                             "sharpeDiff":sharpeDiff,
#                             "relativeSharpe":relativeSharpe,
#                             "sharpeDiffSlippage":sharpeDiffSlippage,
#                             "relativeSharpeSlippage":relativeSharpeSlippage,
#                             "rawBeta":rawBeta,
#                             "stability":stability
#                     }, None, None, None
                
#                 elif (((empyrical.sharpe_ratio(returnStream) < 0.25 and sharpeDiff < 0.0) and shortSeen == 1) or ((empyrical.sharpe_ratio(returnStream) < 0.4 and sharpeDiff < 0.0) and (shortSeen == 2 or shortSeen == 3)) or abs(rawBeta) > 0.6 or activity < 0.3 or stability < 0.4) and (shortSeen == 1 or shortSeen == 2 or shortSeen == 3):
#                     periodName = "first 600 days"
#                     if shortSeen == 2:
#                         periodName = "first 900 days"
#                     elif shortSeen == 3:
#                         periodName = "first 1200 days"
#                     return None, {
#                             "sharpe":shortSharpe, ##OVERLOADED IN FAIL
#                             "activity":activity,
#                             "factorSharpe":empyrical.sharpe_ratio(factorReturn),
#                             "sharpeSlippage":slippageSharpe,
#                             "alpha":alpha,
#                             "beta":abs(beta),
#                             "activity":activity,
#                             "treynor":treynor,
#                             "period":periodName,
#                             "algoReturn":algoAnnualReturn,
#                             "algoVol":algoVol,
#                             "factorReturn":factorAnnualReturn,
#                             "factorVol":factorVol,
#                             "sharpeDiff":sharpeDiff,
#                             "relativeSharpe":relativeSharpe,
#                             "sharpeDiffSlippage":sharpeDiffSlippage,
#                             "relativeSharpeSlippage":relativeSharpeSlippage,
#                             "rawBeta":rawBeta,
#                             "stability":stability
#                     }, None, None, None
                    
#                 elif shortSeen < 4:
#                     print("CONTINUING", "SHARPE:", shortSharpe, "SHARPE DIFF:", sharpeDiff, "RAW BETA:", rawBeta, "TREYNOR:", treynor)
               
#                 shortSeen += 1

#             return returnStream, factorReturn, predictions, slippageAdjustedReturn, rawPredictions
    
    def runModelHistorical(self, dataOfInterest, earlyStop=False):
        return self.runModelsChunksSkipMP(dataOfInterest, earlyStop=earlyStop)


#     def runModelToday(self, dataOfInterest):
#         xVals, yVals, yIndex, xToday = self.generateWindows(dataOfInterest)
#         return self.runDay(xVals, yVals, xToday, identifier=None, sharedDict=None)
        

In [None]:
import dataAck
import portfolio
import TreePredictor
import curveTreeDB
import params


allTickers = dataAck.getAllTickersPlain()
while True:
    import random
    ##ADVANCED TICKER TO TRADE SELECTION
#     modelCount, modelSplitByTicker, predictionCount, numPredictors = curveTreeDB.getModelCounts(params.curveModels)

#     validTickersToTrade = []

#     for ticker in allTickers:
#         if ticker not in modelSplitByTicker:
#             validTickersToTrade.append(ticker)
#             print("NOT PRESENT", ticker)

#     if len(validTickersToTrade) == 0:
#         ##MEANS ALL TICKERS HAVE AT LEAST ONE MODEL
#         for ticker in sorted(modelSplitByTicker, key=modelSplitByTicker.get)[:40]:
#             validTickersToTrade.append(ticker)
#             print(ticker, modelSplitByTicker[ticker])



    tickerToTrade = "EEM"#validTickersToTrade[random.randint(0, len(validTickersToTrade)) - 1]
    print(tickerToTrade)

    tData = dataAck.getTrainingData(tickerToTrade)
    joinedData = None
    validTickers = None



    if tData is None:
        dataAck.logModel("Cache", {
            "type":"miss",
            "ticker":tickerToTrade,
            "day":str(portfolio.getToday())
        })

        tickersToPull = dataAck.getDataSourcesForTicker(tickerToTrade)
        print(tickersToPull)

        pulledData, validTickers = dataAck.downloadTickerData(tickersToPull)

        joinedData = dataAck.joinDatasets([pulledData[ticker] for ticker in pulledData])

        dataAck.storeTrainingData(tickerToTrade, (joinedData, validTickers))
    else:
        joinedData = tData[0]
        validTickers = tData[1]
        dataAck.logModel("Cache", {
            "type":"hit",
            "ticker":tickerToTrade,
            "day":str(portfolio.getToday())
        })


    sManager = dataAck.seriesManager(validTickers)
    print(sManager.describe())

    import time
    import warnings
    import numpy as np
    warnings.filterwarnings("ignore")
    ##GET ALGOS INITIALLY GOOD
    runsSeen = 0
    while True:
        s = sManager.createSeries()
        while s.checkValidity(s.transformJoinedData(joinedData[:"2016-01-01"])) == False:
            s = sManager.createSeries()

        print("*********")
        print("NEW SERIES", s.describe())
        print("*********")

        for lookback in [5, 10, 22, 44]:
            for prediction in [2, 3, 5, 7, 10, 15]:
                for radius in [0.3, 0.5, 0.7, 1.0, 1.5, 2.0]:
                    for minConfidence in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7]:
                        for minNeighbors in [5, 10, 20]:
                            for tradeableMinReturn in [0.005, 0.01, 0.015, 0.02, 0.025, 0.03, 0.05]:
                                if random.uniform(0,1) < 0.97: ##RANDOMLY SKIP A LOT...FAILING FAST ON SERIES ALLOWS US TO EXAMINE MUCH LARGER SAMPLE SPACE
                                    continue
                                cPre = TradeableCurvePredictor(s, tickerToTrade, lookback, prediction, radius, minConfidence, minNeighbors, tradeableMinReturn)
                                print(cPre.describe())
                                totalReturn = cPre.runModelHistorical(joinedData, earlyStop=True)
                                
  


        runsSeen += 1

        if runsSeen > 40:
            break

    

EEM
ATTEMPTING PULL EEM
['AGG', 'DIA', 'DVY', 'DXJ', 'EFA', 'EWC', 'EWG', 'EWH', 'EWJ', 'EWT', 'EWU', 'EWW', 'FEZ', 'FXE', 'GDX', 'GLD', 'IAU', 'IBB', 'IEF', 'IJH', 'IJR', 'ITB', 'IVE', 'IVV', 'IVW', 'IWB', 'IWD', 'IWF', 'IWM', 'IWN', 'IWO', 'IWR', 'IYF', 'IYR', 'IYT', 'KBE', 'KRE', 'LQD', 'MDY', 'OEF', 'OIH', 'QQQ', 'RSP', 'SDY', 'SH', 'SHY', 'SLV', 'SMH', 'SOXX', 'SPY', 'TIP', 'TLT', 'USO', 'VB', 'VBR', 'VFH', 'VGK', 'VGT', 'VIG', 'VNQ', 'VO', 'VTI', 'VTV', 'VUG', 'XBI', 'XHB', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY', 'XRT', 'EEM']
*********
NEW SERIES ('IWO', 41, None, 7, 4)
*********
(('IWO', 41, None, 7, 4), 'EEM', 5, 2, 0.3, 0.1, 10, 0.01)
0.299274361649
0.669432185524
-0.0949022124965
(('IWO', 41, None, 7, 4), 'EEM', 5, 2, 0.3, 0.7, 5, 0.015)
0.0
(('IWO', 41, None, 7, 4), 'EEM', 5, 2, 0.5, 0.1, 5, 0.025)
0.0
(('IWO', 41, None, 7, 4), 'EEM', 5, 2, 0.5, 0.4, 20, 0.025)
0.0
(('IWO', 41, None, 7, 4), 'EEM', 5, 2, 0.7, 0.4, 5, 0.02)
0.0
(('IWO', 41, None, 7, 4),