# Evaluate the FS algo

Take a time window of prices and decide if SELL, HOLD, BUY, untill the next transaction time.
Iterate through whole history of one stock

In [1]:
using EO, Plots, Lasso

## Stock picking

In [2]:
function generate_predictions(x_max, price, training_width, prediction_width, timestep, expert_cnt)

    ps = Vector{Vector{Vector{Float64}}}()

    iter = 1
    training_i = 2:training_width+1
    while training_i[end] + prediction_width < x_max
        x = float.(collect(training_i))                                         # training set
        xt = float.(collect(training_i[1]:(training_i[end]+prediction_width)))  # testing set

        # define training
        basis_functions = [+, -, *, /, sin, cos]
        basis_variables = Vector{Any}([#= 2., 1., 0.,  =#:var1])
        pop_size = 100

        objective_function  = enclose_arguments(EO.f_function_diff_squared, x, float.(price[training_i]))
        initialization      = enclose_noargs(EO.expression_initialization, pop_size, objective_function, basis_functions, basis_variables)
        selection           = enclose_arguments(EO.s_tournament, 10, 3)
        crossover           = enclose_arguments(EO.cr_subtree, pop_size, basis_functions, basis_variables)
        mutation            = enclose_arguments(EO.subtree_mutation!, basis_functions, basis_variables)
        replacement         = enclose_replacement(EO.r_best_n_diverse, pop_size, 0.3)       # chosen fraction of the population will consist of the best n, the rest is random
        termination         = enclose_argument(iteration_termination, pop_size*10)
        
        # train expert models
        predictions = Vector{Vector{Float64}}()
        while length(predictions) < expert_cnt
            print("\rtraining expert $(length(predictions)+1) at iter $iter")
            try         # optimization can diverge
                # train a model

                @time solution = solvink_hart(objective_function, initialization, selection, crossover, mutation, replacement, termination)

                model = EO.Expr_parser(solution);
                prediction = model.(xt)
                push!(predictions, prediction)
            catch
                display("failed")
                continue
            end
        end
        iter += 1
        # evaluate the predictions
        push!(ps, predictions)

        #training_i .+= timestep
        training_i = (training_i[1]+timestep):(training_i[end]+timestep)
    end
    return ps
end

generate_predictions (generic function with 1 method)

In [3]:
using CSV, DataFrames, FileIO

set = "Hanz"
names = ["3M", "Apple", "Coca-Cola", "Exxon Mobil", "Ford Motor", "Intel", "Microsoft", "NVIDIA", "Pfizer"]
#set = "SAT"
#names = ["EA", "K"]
name = names[1]
if set == "SAT"
    data = CSV.read("../trading_data/archive(4)/sp500_stocks.csv", DataFrame)
    price = filter("Symbol" => x->x == name, data).Open
else
    data = CSV.read("../trading_data/whole_dataset/"*name*" Stock Price History.csv", DataFrame)
    price = reverse(data.Price)
end

x = collect(1:length(price))

training_width = 100
prediction_width = 10
timestep = 10
expert_cnt = 5

#res = generate_predictions(length(price), price, training_width, prediction_width, timestep, expert_cnt);
res = generate_predictions(length(price), price, training_width, prediction_width, timestep, expert_cnt);
# save for reuse
save(joinpath(@__DIR__, "evaluations", "GP", name, "predictions.jld2"), "res", [res, price])

training expert 1 at iter 1  5.607940 seconds (37.75 M allocations: 1.463 GiB, 5.60% gc time, 70.49% compilation time)
training expert 2 at iter 1  0.679322 seconds (13.01 M allocations: 485.464 MiB, 12.50% gc time, 11.35% compilation time)
training expert 3 at iter 1  2.388478 seconds (48.55 M allocations: 1.752 GiB, 16.60% gc time)
training expert 4 at iter 1  1.267242 seconds (22.48 M allocations: 832.819 MiB, 18.37% gc time, 3.35% compilation time)
training expert 5 at iter 1  0.820289 seconds (17.06 M allocations: 632.026 MiB, 20.13% gc time, 0.02% compilation time)
training expert 1 at iter 2  3.409390 seconds (61.20 M allocations: 2.201 GiB, 16.71% gc time)
training expert 2 at iter 2  2.664052 seconds (52.25 M allocations: 1.900 GiB, 18.16% gc time)
training expert 3 at iter 2  1.066594 seconds (22.91 M allocations: 848.810 MiB, 15.54% gc time)
training expert 4 at iter 2  0.984725 seconds (21.04 M allocations: 777.862 MiB, 17.62% gc time, 4.14% compilation time)
training exper

In [5]:
using EO, Plots, Lasso, CSV, DataFrames, FileIO

set = "Hanz"
names = [#= "3M",  =#"Apple", "Coca-Cola", "Exxon Mobil", "Ford Motor", "Intel", "Microsoft", "NVIDIA", "Pfizer"]
#set = "SAT"
#names = ["EA", "K"]

for name in names

    if set == "SAT"
        data = CSV.read("../trading_data/archive(4)/sp500_stocks.csv", DataFrame)
        price = filter("Symbol" => x->x == name, data).Open
    else
        data = CSV.read("../trading_data/whole_dataset/"*name*" Stock Price History.csv", DataFrame)
        price = reverse(data.Price)
    end
    
    x = collect(1:length(price))

    training_width = 100
    prediction_width = 10
    timestep = 10
    expert_cnt = 5
    
    res = generate_predictions(length(price), price, training_width, prediction_width, timestep, expert_cnt);
    # save for reuse
    save(joinpath(@__DIR__, "evaluations", "tree_GP", name, "predictions.jld2"), "res", [res, price])
end

training expert 1 at iter 1  1.538818 seconds (25.30 M allocations: 938.218 MiB, 21.19% gc time, 0.64% compilation time: 100% of which was recompilation)
training expert 2 at iter 1  1.267068 seconds (22.58 M allocations: 839.139 MiB, 20.90% gc time)
training expert 3 at iter 1  0.941156 seconds (17.65 M allocations: 652.250 MiB, 19.62% gc time)
training expert 4 at iter 1  0.593711 seconds (12.44 M allocations: 461.898 MiB, 22.71% gc time)
training expert 5 at iter 1  1.243295 seconds (24.03 M allocations: 893.025 MiB, 23.63% gc time)
training expert 1 at iter 2  2.390744 seconds (43.79 M allocations: 1.568 GiB, 22.40% gc time)
training expert 2 at iter 2  3.226524 seconds (57.61 M allocations: 2.098 GiB, 21.65% gc time)
training expert 3 at iter 2  1.331323 seconds (23.49 M allocations: 862.647 MiB, 23.51% gc time)
training expert 4 at iter 2  0.707013 seconds (14.46 M allocations: 535.458 MiB, 22.07% gc time)
training expert 5 at iter 2  2.748527 seconds (53.81 M allocations: 1.951 

Excessive output truncated after 524321 bytes.