In [324]:
using CSV, JuMP, Gurobi, DataFrames, Statistics;

In [114]:
GUROBI_ENV = Gurobi.Env();

Academic license - for non-commercial use only


# Reading Data

# IP Formulation

In [392]:
function optimize_lineups(players; nb_lineups=1, max_overlap=6, fp_column="prediction", use_std=false, adj=false, std_column="leaf_std", std_weight = 0.5, MAX_PG=2, MAX_SG=2, MAX_SF=2, MAX_PF=2, MAX_C=1, BUDGET=60000.0, save=true, output_path="../output/lineups.csv")
    # Columns names
    NAME = Symbol("Nickname")
    TEAM = Symbol("Team")
    POSITION = Symbol("Position")
    SALARY = Symbol("Salary")
    INJURY = Symbol("Injury Indicator")
    FP = Symbol(fp_column)
    STD = Symbol(std_column)
    if use_std
        output_columns = [NAME, POSITION, TEAM , FP, STD]
        output_column_names = ["Name_", "Position_", "Team_", "FP_", "STD_"]
    else
        output_columns = [NAME, POSITION, TEAM , FP]
        output_column_names = ["Name_", "Position_", "Team_", "FP_"]
    end 
    
    # Reading inputs
    ## Fantasy points
    fp = players[:,FP];
    ## Positions
    PG = Int.(players[:,POSITION].=="PG")
    SG = Int.(players[:,POSITION].=="SG")
    SF = Int.(players[:,POSITION].=="SF")
    PF = Int.(players[:,POSITION].=="PF")
    C  = Int.(players[:,POSITION].=="C")
    ## Salary
    salary = players[:,SALARY];
    ## Injuries
#     injury = 1 .- ismissing.(players[:,INJURY]) 
#     o_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="O")
#     q_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="Q")
#     p_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="P");

    ## Number of players
    nb_players = size(players)[1]

    # Model
    model = Model(solver=GurobiSolver(OutputFlag=0, GUROBI_ENV))

    # Variable
    @variable(model, z[i=1:nb_players], Bin)

    # Objective function
    if use_std
        ## With std 
        stds = players[:,STD];
        if adj
            @objective(model, Max, sum((fp .+ std_weight.*abs.(stds)).*z))
        else            
            @objective(model, Max, (1-std_weight)*sum(fp.*z) + std_weight*sum(stds.^2 .*z))
        end
    else
        ## Without std        
        @objective(model, Max, sum(fp.*z))
    end
    
    # Constrains without the overleap constraint
    @constraint(model, sum(salary.*z) <= BUDGET)
    @constraint(model, sum(PG.*z) == MAX_PG)
    @constraint(model, sum(SG.*z) == MAX_SG)
    @constraint(model, sum(SF.*z) == MAX_SF)
    @constraint(model, sum(PF.*z) == MAX_PF)
    @constraint(model, sum(C.*z) == MAX_C)
#     @constraint(model, z .<= (1 .- injury))

    # Initialization (iteration=1)
    solve(model)
    x = round.(Int,getvalue(z))
    lineups = players[x.==1, output_columns]    
    names!(lineups, Symbol.(string.(output_column_names, 1)))  

    # Rest of iterations
    @constraint(model, sum(x.*z) <= max_overlap)
    for i=2:nb_lineups
        solve(model)
        x = hcat(x,round.(Int,getvalue(z)))
        lineups_names = players[x[:,i].==1, output_columns]
        names!(lineups_names, Symbol.(string.(output_column_names, i)))    
        lineups = hcat(lineups, lineups_names)
        @constraint(model, sum(x[:,i].*z) <= max_overlap)
    end
    if save
        CSV.write(output_path, lineups);
    end
    score_per_lineup = get_score_lineups(lineups)
    return lineups, score_per_lineup
end;
    

# Annexe functions

In [254]:
function get_number_of_lineups(lineups; fp_column = "FP_")
    # Return the number of lineups using the columns fp_column_i
    colnames = names(lineups)
    nb_lineups = 0
    for col in colnames
        if startswith(string(col), fp_column)
            nb_lineups = nb_lineups+1
        end
    end
    nb_lineups
end;

In [255]:
function get_score_lineups(lineups; fp_column = "FP_")
    # Returns the scores of the lineups based on the columns 
    nb_lineups = get_number_of_lineups(lineups; fp_column = fp_column)
    return aggregate(lineups[:,Symbol.([string(fp_column,i) for i=1:nb_lineups])], sum)
end;

In [256]:
function add_true_scores(lineups, test_set; fp_column = "FP_", true_fp_column = "FP")
    # Adds the columns true_fp_column_i from the test_set to the lineups
    nb_lineups = get_number_of_lineups(lineups; fp_column = fp_column)
    for lineup=1:nb_lineups
        player_names = lineups[!,Symbol(string("Name_",lineup))]
        lineups[!,Symbol(string("True_FP_",lineup))] = filter(row -> row[:Nickname] in player_names, test_set)[!,Symbol(true_fp_column)]
    end
    return order_lineups(lineups)
end;

In [257]:
function order_lineups(lineups)
    # Orders the column of lineups based on the lineup index
    nb_lineups = get_number_of_lineups(lineups)
    colnames = names(lineups)
    ordered_colnames = []
    for lineup=1:nb_lineups
        for col in colnames
            if endswith(string(col), string("_",lineup))
                push!(ordered_colnames, col)
            end
        end
    end
    return lineups[:,ordered_colnames]
end;

In [258]:
function get_lineup(lineups, lineup_idx)
    # Returns all the columns of the lineup with index lineup_idx
    all_colnames = names(lineups)
    lineup_colnames = []
    for col in all_colnames
        if endswith(string(col), string(lineup_idx))
            push!(lineup_colnames, col)
        end
    end
    return lineups[:,lineup_colnames]
end;

In [259]:
function get_best_lineup(lineups; fp_column = "FP_")
    # Returns the best lineup and its score based on the column fp_column_i
    nb_lineups = get_number_of_lineups(lineups)
    scores = get_score_lineups(lineups, fp_column=fp_column)
    best_lineup_idx = 1
    for lineup=2:nb_lineups
        if scores[1,lineup]>scores[1,best_lineup_idx]
            best_lineup_idx=lineup
        end
    end
    return get_lineup(lineups, best_lineup_idx), scores[:,[best_lineup_idx]]
end;

In [260]:
function test_lineups(lineups, test_set; pred_column = "prediction", true_column="FP")
    best_lineup, best_score = optimize_lineups(test_set, fp_column=true_column, nb_lineups=1)
    lineup_with_true_scores = add_true_scores(lineups, test_set; fp_column = "FP_", true_fp_column = true_column)
    best_lineup_with_true_scores, best_lineup_true_score = get_best_lineup(lineup_with_true_scores; fp_column = "True_FP_")
    captured_score = (best_lineup_true_score[1,1]/best_score[1,1])*100
    return captured_score, lineup_with_true_scores, best_lineup, best_lineup_with_true_scores  
end;

# Testing Functions

In [263]:
function generate_paths(suffix)
    paths = DataFrame(keys = String[], 
        optimization_inputs_paths = String[], 
        lineups_output_paths = String[], 
        best_lineup_output_paths = String[], 
        scores_ouput_paths = String[])
    for (root, dirs, files) in walkdir("../data/optimization_inputs/")
        for file in files
            if occursin("_fake", file)
                # Keys
                key = split(file,"_")[1]
                # Paths
                optimization_inputs_path = string("../data/optimization_inputs/",key,"_fake.csv")
                lineups_output_path = string("../data/test_results/",key,"_fake_lineups_",suffix,".csv")
                best_lineup_output_path = string("../data/test_results/",key,"_fake_best_lineup_",suffix,".csv")
                scores_ouputs_path = string("../data/test_results/results_",suffix,".csv")
                push!(paths, [key, optimization_inputs_path, lineups_output_path, best_lineup_output_path, scores_ouputs_path])
            end
        end
    end
    sort!(paths);
    return paths
end;

In [340]:
function run_tests(paths, nb_lineups, max_overlap, use_std; adj=false, std_weight=0.1)
    results = DataFrame(keys = String[], score = Float64[])
    for i=1:size(paths)[1]
        println(paths[i,:keys])
        key =  paths[i,:keys]
        optimization_inputs_path = paths[i,:optimization_inputs_paths]
        lineups_output_path = paths[i,:lineups_output_paths]
        best_lineup_output_path = paths[i,:best_lineup_output_paths]

        players = CSV.read(optimization_inputs_path);

        lineups, scores = optimize_lineups(players; nb_lineups=nb_lineups, max_overlap=max_overlap, use_std=use_std, adj=adj, std_weight = std_weight, save=false);

        captured_score, lineup_with_true_scores, best_lineup, best_lineup_with_true_scores = test_lineups(lineups, players)

        push!(results, [key, captured_score])

        CSV.write(lineups_output_path, lineup_with_true_scores)
        CSV.write(best_lineup_output_path, best_lineup)

    end
    CSV.write(paths[1,:scores_ouput_paths], results)
    return results
end;

# function count_positive(results)
#     size(filter(x->x[:score]>=90,results))[1]
    

### Baseline

In [365]:
nb_lineups = 50
max_overlap = 5
use_std = false

paths = generate_paths("no_std")
results = run_tests(paths, nb_lineups, max_overlap, use_std)
sort!(results, 2, rev=true)

2012-01-14
2012-03-01
2013-03-05
2013-10-30
2013-11-17
2014-01-11
2014-02-11
2014-03-12
2014-03-17
2014-11-17
2015-01-11
2015-02-24
2015-03-13
2015-04-11
2015-05-11
2015-11-27


Unnamed: 0_level_0,keys,score
Unnamed: 0_level_1,String,Float64
1,2015-05-11,95.6286
2,2013-03-05,94.3958
3,2015-11-27,89.9864
4,2013-11-17,89.8762
5,2014-03-12,84.6154
6,2012-01-14,84.6008
7,2013-10-30,84.0945
8,2015-02-24,83.5415
9,2014-01-11,82.952
10,2015-03-13,81.7791


In [371]:
mean(results[,:score])

85.3633259547918

In [410]:
size(filter(x->round(Int, x[:score])>=90,results))[1]/size(results)[1]

0.25

In [412]:
size(filter(x->round(Int, x[:score])>=95,results))[1]/size(results)[1]

0.0625

### Not adjusted

In [377]:
nb_lineups = 50
max_overlap = 5
use_std = true
std_weight = 0.2

paths_1 = generate_paths("with_std")
results_1 = run_tests(paths_1, nb_lineups, max_overlap, use_std; std_weight=std_weight)
sort!(results_1, 2, rev=true)

2012-01-14
2012-03-01
2013-03-05
2013-10-30
2013-11-17
2014-01-11
2014-02-11
2014-03-12
2015-01-11
2015-02-24
2015-03-13
2015-04-11
2015-05-11
2015-11-27


Unnamed: 0_level_0,keys,score
Unnamed: 0_level_1,String,Float64
1,2015-05-11,96.0942
2,2013-03-05,89.7211
3,2013-10-30,88.7514
4,2012-01-14,88.1666
5,2013-11-17,86.9326
6,2015-02-24,85.8437
7,2014-03-12,83.5023
8,2015-11-27,83.4352
9,2014-01-11,82.7264
10,2012-03-01,79.7262


In [415]:
mean(results_1[:,:score])

83.75092290855038

In [414]:
size(filter(x->round(Int, x[:score])>=90,results_1))[1]/size(results_1)[1]

0.14285714285714285

In [417]:
size(filter(x->round(Int, x[:score])>=95,results_1))[1]/size(results_1)[1]

0.07142857142857142

### Adjusted

In [418]:
nb_lineups = 50
max_overlap = 5
use_std = true
adj = true
std_weight = 1

paths_2 = generate_paths("with_std_adj")
results_2 = run_tests(paths_2, nb_lineups, max_overlap, use_std; adj=adj, std_weight=std_weight)
sort!(results_2, 2, rev=true)

2012-01-14
2012-03-01
2013-03-05
2013-10-30
2013-11-17
2014-01-11
2014-02-11
2014-03-12
2015-01-11
2015-02-24
2015-03-13
2015-04-11
2015-05-11
2015-11-27


Unnamed: 0_level_0,keys,score
Unnamed: 0_level_1,String,Float64
1,2015-05-11,95.6286
2,2013-03-05,91.9522
3,2013-11-17,91.6919
4,2015-11-27,89.9864
5,2015-02-24,88.5623
6,2014-03-12,88.1276
7,2013-10-30,86.8166
8,2014-01-11,85.2401
9,2012-01-14,84.9164
10,2012-03-01,82.9304


In [405]:
mean(results_2[1:14,:score])

85.71531970217062

In [406]:
size(filter(x->round(Int, x[:score])>=95,results_1))[1]/size(results_1)[1]

0.0625

In [407]:
size(filter(x->round(Int, x[:score])>=90,results_1))[1]/size(results_1)[1]

0.1875