In [None]:
#Imports
from deap import creator, tools, base
import numpy as np
import random
from stage import fullPrint, getStage
from datetime import datetime, date
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
import pickle
from multiprocessing import Pool
# import mysqlToolboxÂ 
# import mariadb
import sys
import os

In [None]:
## Warning Statements
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning) 

## Imports Functions from Toolbox
from estimatorToolbox import calculateGroupReturn
from estimatorToolbox import evalReturn
from estimatorToolbox import generate_random_num_attr
from estimatorToolbox import mutate

In [None]:
## Initialize Creator & Toolbox
# Initialize Deap Creator Objects
creator.create("FitnessMax", base.Fitness, weights=(1.0,1,0))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [None]:
# Initialize Toolbox
toolbox = base.Toolbox()
toolbox.register("attr_bool", generate_random_num_attr) # Attribute generator 
toolbox.register("individual", tools.initRepeat, creator.Individual, 
    toolbox.attr_bool, 1) # Structure initializers
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evalReturn)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=3)

In [None]:
## Initialize Variables 
now = datetime.now()

#Initilize Backtest Transaction Database
transactionTemplate = pd.read_pickle('stockData/industriesData/XLB/DJMining.pkl').drop(['Open','High','Low','Close','Volume','Currency'],axis = 1)
transactionTemplate['Dates'] = pd.to_datetime(transactionTemplate.index)
transactionTemplate = transactionTemplate[transactionTemplate['Dates'].dt.weekday == 6]
transactionTemplate = transactionTemplate.drop('Dates', axis = 1)
transactionTemplate = transactionTemplate[~transactionTemplate.index.duplicated()]
transactionTemplate.to_pickle("transactionTemplate.pkl")

In [None]:
## Get list of returns of tickers
industryList = pd.read_pickle("stockData/industryList.pkl")
listOfDf = calculateGroupReturn(industryList)

In [None]:
## Initialize Test/Train Stock Lists and Check test vs train return

train, test = train_test_split(industryList, test_size=0.3, shuffle=True)
testTrainR = []
def calculateTestTrainRatio(train,test):
    testTrainRatio = [1,1]
    for i in range(2):
        if i == 0:
            l = test
        else:
            l = train
        index = 0
        industries = [x[1] for x in l]
        while index != len(l):
            if industries[index] not in list(listOfDf.columns):
                lindustries = np.delete(industries, index)
            else:
                index += 1
        for index, element in listOfDf[industries].iterrows():
            #print(element.to_list())
            listOfStockRet = element.to_list()
            while 1.0 in listOfStockRet:
                listOfStockRet.remove(1.0)
            if len(listOfStockRet) != 0:
                testTrainRatio[i] = testTrainRatio[i] * np.mean(listOfStockRet)     
    testTrainRatio[1] = testTrainRatio[1] / testTrainRatio[0]
    testTrainRatio[0] = 1
    return testTrainRatio
testTrainR = calculateTestTrainRatio(train,test)
while(abs(1-testTrainR[1])/np.average(testTrainR)>0.2):
    train, test = train_test_split(industryList, test_size=0.3, shuffle=True)
    testTrainR = calculateTestTrainRatio(train,test)

In [None]:
## Calculate (and normalize) returns of each folds 
trainSet1, trainSet2, trainSet3  = np.array_split(train,3,)
trainSets = [trainSet1, trainSet2, trainSet3]
trainSetsR = []
def trainSetsRatio(ratio):
    ratio = [1,1,1,1,1,1]
    for i in range(6):
        if i < 3:
            l = trainSets[i]
        else:
            l = np.concatenate((trainSets[(i+1)%3],trainSets[(i+2)%3]))
        industries = [x[1] for x in l]
        index = 0
        while index != len(l):
            if industries[index] not in list(listOfDf.columns):
                industries[index] = np.delete(industries, index)
            else:
                index += 1
        for index, element in listOfDf[industries].iterrows():
            #print(element.to_list())
            listOfStockRet = element.to_list()
            while 1.0 in listOfStockRet:
                listOfStockRet.remove(1.0)
            if len(listOfStockRet) != 0:
                ratio[i] = ratio[i] * np.mean(listOfStockRet)
    for i in range(1,6):
        ratio[i] = ratio[i] / ratio[0]
    ratio[0] = 1.0
    return ratio

In [None]:
trainSetsR = trainSetsRatio(trainSetsR)
while(np.std(trainSetsR)/np.average(trainSetsR)>0.2):
        np.random.shuffle(train)
        trainSet1, trainSet2, trainSet3  = np.array_split(train,3)
        trainSets = [trainSet1, trainSet2, trainSet3]
        trainSetsR = trainSetsRatio(trainSetsR)

In [None]:
with open('testSetPickle/trainSet.pkl', 'wb') as f:
    pickle.dump(trainSets, f)
with open('testSetPickle/trainSetRatio.pkl', 'wb') as f:
    pickle.dump(trainSetsR, f)

In [None]:
#Initilize Output File & Write Testsets to the TXT File
resultFile = open("estimatorData/resultML"+date.today().strftime('%Y-%m-%d')+".txt","a")
resultFile.write("trainSets "+str(trainSets)+"\n")
resultFile.write("test "+str(test)+"\n")
resultFile.close()

In [None]:
pop = toolbox.population(n=10)
for ind in pop:
    paramStr = ' '.join(map(str, ind[0]))
    # cur.execute("INSERT INTO Params (param,result) VALUES (?, NULL)", (paramStr,))

In [None]:
# Evaluate the entire population
# here
pool = Pool()
# tempResult = pool.map(toolbox.evaluate, pop)

In [None]:
fitnesses = map(toolbox.evaluate, pop)
# print(fitnesses)
# here
# pool.close()

In [None]:
for ind, fit in zip(pop, fitnesses):
    ind.fitness.values = fit
    if ind.fitness.values[0]<=100:
        del ind.fitness.values

In [None]:
df = pd.read_pickle('stockData/industriesData/XLB/DJAluminum.pkl')
fullPrint(df)