In [1]:
using CSV, JuMP, Gurobi, DataFrames;

In [2]:
GUROBI_ENV = Gurobi.Env();

Academic license - for non-commercial use only


# Reading Data

In [3]:
PLAYERS_DATA_PATH = "../data/optimization_input.csv";
OUTPUT_PATH = "../output/lineups.csv";
TEST_DATA_PATH = "../data/test_input.csv"

"../data/test_input.csv"

In [4]:
players = CSV.read(PLAYERS_DATA_PATH);

In [5]:
test_set = CSV.read(TEST_DATA_PATH);

In [6]:
players[:,:Game].pool

3-element Array{String,1}:
 "WAS@LAC"
 "UTA@TOR"
 "GS@ORL" 

# IP Formulation

In [7]:
function optimize_lineups(players; nb_lineups=1, max_overlap=6, fp_column="prediction", use_std=false, std_column="leaf_std", std_weight = 0.5, MAX_PG=2, MAX_SG=2, MAX_SF=2, MAX_PF=2, MAX_C=1, BUDGET=60000.0, save=true, output_path="../output/lineups.csv")
    # Columns names
    NAME = Symbole("name")
    TEAM = Symbole("team_key")
    POSITION = Symbol("Position")
    SALARY = Symbol("Salary")
    INJURY = Symbol("Injury Indicator")
    FP = Symbol(fp_column)
    STD = Symbol(std_column)
    if use_std
        output_columns = [NAME, POSITION, TEAM , FP, STD]
        output_column_names = ["Name_", "Position_", "TEAM_", "FP_", "STD_"]
    else
        output_columns = [NAME, POSITION, TEAM , FP]
        output_column_names = ["Name_", "Position_", "TEAM_", "FP_"]
    end 
    
    # Reading inputs
    ## Fantasy points
    fp = players[:,FP];
    ## Positions
    PG = Int.(players[:,POSITION].=="PG")
    SG = Int.(players[:,POSITION].=="SG")
    SF = Int.(players[:,POSITION].=="SF")
    PF = Int.(players[:,POSITION].=="PF")
    C  = Int.(players[:,POSITION].=="C")
    ## Salary
    salary = players[:,SALARY];
    ## Injuries
    injury = 1 .- ismissing.(players[:,INJURY]) 
    o_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="O")
    q_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="Q")
    p_injury = Int.(Missings.coalesce.(players[:,INJURY], 0).=="P");

    ## Number of players
    nb_players = size(players)[1]

    # Model
    model = Model(solver=GurobiSolver(OutputFlag=0, GUROBI_ENV))

    # Variable
    @variable(model, z[i=1:nb_players], Bin)

    # Objective function
    if use_std
        ## With std 
        std = players[:,STD];
        @objective(model, Max, (1-std_weight)*sum(fp.*z) + std_weight*sum(std.*z))
    else
        ## Without std        
        @objective(model, Max, sum(fp.*z))
    end

    # Constrains without the overleap constraint
    @constraint(model, sum(salary.*z) <= BUDGET)
    @constraint(model, sum(PG.*z) == MAX_PG)
    @constraint(model, sum(SG.*z) == MAX_SG)
    @constraint(model, sum(SF.*z) == MAX_SF)
    @constraint(model, sum(PF.*z) == MAX_PF)
    @constraint(model, sum(C.*z) == MAX_C)
    @constraint(model, z .<= (1 .- injury))

    # Initialization (iteration=1)
    solve(model)
    x = Int.(getvalue(z))
    lineups = players[x.==1, output_columns]    
    names!(lineups, Symbol.(string.(output_column_names, 1)))  

    # Rest of iterations
    @constraint(model, sum(x.*z) <= max_overlap)
    for i=2:nb_lineups
        solve(model)
        x = hcat(x,Int.(getvalue(z)))
        lineups_names = players[x[:,i].==1, output_columns]
        names!(lineups_names, Symbol.(string.(output_column_names, i)))    
        lineups = hcat(lineups, lineups_names)
        @constraint(model, sum(x[:,i].*z) <= max_overlap)
    end
    if save
        CSV.write(output_path, lineups);
    end
    score_per_lineup = get_score_lineups(lineups)
    return lineups, score_per_lineup
end;
    

# Annexe functions

In [8]:
function get_number_of_lineups(lineups; fp_column = "FP_")
    # Return the number of lineups using the columns fp_column_i
    colnames = names(lineups)
    nb_lineups = 0
    for col in colnames
        if startswith(string(col), fp_column)
            nb_lineups = nb_lineups+1
        end
    end
    nb_lineups
end;

In [9]:
function get_score_lineups(lineups; fp_column = "FP_")
    # Returns the scores of the lineups based on the columns 
    nb_lineups = get_number_of_lineups(lineups; fp_column = fp_column)
    return aggregate(lineups[:,Symbol.([string(fp_column,i) for i=1:nb_lineups])], sum)
end;

In [10]:
function add_true_scores(lineups, test_set; fp_column = "FP_", true_fp_column = "fp")
    # Adds the columns true_fp_column_i from the test_set to the lineups
    nb_lineups = get_number_of_lineups(lineups; fp_column = fp_column)
    for lineup=1:nb_lineups
        player_names = lineups[!,Symbol(string("Name_",lineup))]
        lineups[!,Symbol(string("True_FP_",lineup))] = filter(row -> row[:name] in player_names, test_set)[!,Symbol(true_fp_column)]
    end
    return order_lineups(lineups)
end;

In [11]:
function order_lineups(lineups)
    # Orders the column of lineups based on the lineup index
    nb_lineups = get_number_of_lineups(lineups)
    colnames = names(lineups)
    ordered_colnames = []
    for lineup=1:nb_lineups
        for col in colnames
            if endswith(string(col), string("_",lineup))
                push!(ordered_colnames, col)
            end
        end
    end
    return lineups[:,ordered_colnames]
end;

In [12]:
function get_lineup(lineups, lineup_idx)
    # Returns all the columns of the lineup with index lineup_idx
    all_colnames = names(lineups)
    lineup_colnames = []
    for col in all_colnames
        if endswith(string(col), string(lineup_idx))
            push!(lineup_colnames, col)
        end
    end
    return lineups[:,lineup_colnames]
end;

In [13]:
function get_best_lineup(lineups; fp_column = "FP_")
    # Returns the best lineup and its score based on the column fp_column_i
    nb_lineups = get_number_of_lineups(lineups)
    scores = get_score_lineups(lineups, fp_column=fp_column)
    best_lineup_idx = 1
    for lineup=2:nb_lineups
        if scores[1,lineup]>scores[1,best_lineup_idx]
            best_lineup_idx=lineup
        end
    end
    return get_lineup(lineups, best_lineup_idx), scores[:,[best_lineup_idx]]
end;

In [14]:
function test_lineups(lineups, test_set; pred_column = "prediction", true_column="fp")
    best_lineup, best_score = optimize_lineups(test_set, fp_column=true_column, nb_lineups=1)
    lineup_with_true_scores = add_true_scores(lineups, test_set; fp_column = "FP_", true_fp_column = true_column)
    best_lineup_with_true_scores, best_lineup_true_score = get_best_lineup(lineup_with_true_scores; fp_column = "True_FP_")
    captured_score = (best_lineup_true_score[1,1]/best_score[1,1])*100
    return best_lineup_with_true_scores, best_lineup, captured_score
end;

# Testing Functions

In [15]:
lineups, lineups_scores =  optimize_lineups(players; nb_lineups=10, max_overlap=6, fp_column="prediction", save=true, output_path="../output/lineups.csv");

UndefVarError: UndefVarError: Symbole not defined

In [16]:
lineups

UndefVarError: UndefVarError: lineups not defined

In [17]:
lineups_scores

UndefVarError: UndefVarError: lineups_scores not defined

In [18]:
get_number_of_lineups(lineups; fp_column = "FP_")

UndefVarError: UndefVarError: lineups not defined

In [19]:
get_score_lineups(lineups; fp_column = "FP_")

UndefVarError: UndefVarError: lineups not defined

In [20]:
lineup_with_true_scores = add_true_scores(lineups, test_set; fp_column = "FP_", true_fp_column = "fp")

UndefVarError: UndefVarError: lineups not defined

In [21]:
get_lineup(lineup_with_true_scores, 2)

UndefVarError: UndefVarError: lineup_with_true_scores not defined

In [22]:
# Returns the best lineup based on the predictions
get_best_lineup(lineup_with_true_scores; fp_column = "FP_")

UndefVarError: UndefVarError: lineup_with_true_scores not defined

In [23]:
# Returns the best lineup based on the true FP scores
get_best_lineup(lineup_with_true_scores; fp_column = "True_FP_")

UndefVarError: UndefVarError: lineup_with_true_scores not defined

In [24]:
best_lineup_with_true_scores, best_lineup, captured_score = test_lineups(lineups, test_set; pred_column = "prediction", true_column="fp");

UndefVarError: UndefVarError: lineups not defined

In [25]:
best_lineup_with_true_scores

UndefVarError: UndefVarError: best_lineup_with_true_scores not defined

In [26]:
sum(best_lineup_with_true_scores[!,:True_FP_4])

UndefVarError: UndefVarError: best_lineup_with_true_scores not defined

In [27]:
best_lineup

UndefVarError: UndefVarError: best_lineup not defined

In [28]:
sum(best_lineup[!,:FP_1])

UndefVarError: UndefVarError: best_lineup not defined

In [29]:
captured_score

UndefVarError: UndefVarError: captured_score not defined