In [1]:
using Optim
using Random
using StatsBase
using Statistics
using Distributions
using LinearAlgebra

In [2]:
function creat_players(clubs, players_per_club, mean = 0, var = 1, lb = 0, ub = 100)
    all_players = rand(Truncated(Normal(mean, var), lb, ub), clubs * players_per_club)
    all_players = all_players / (sum(all_players) / clubs)
    players = Dict{Int64, Float64}(zip(1:clubs * players_per_club, all_players))
    return players
end

function simulating_game(club1, club2, sims = 1000000)
    λ₁ = sum(club1)
    λ₂ = sum(club2)
    X₁ = Poisson(λ₁ / λ₂)
    X₂ = Poisson(λ₂ / λ₁)
    Y₁ = rand(X₁, sims)
    Y₂ = rand(X₂, sims)
    wins₁ = sum(Y₁ .> Y₂)
    draws = sum(Y₁ .== Y₂)
    wins₂ = sum(Y₁ .< Y₂)
    return wins₁, draws, wins₂
end

function create_games(seasons, clubs, ppc)
    results = [[[0 for i in 1:4] for j in 1:clubs * (clubs - 1)] for s in 1:seasons]
    players = creat_players(clubs, ppc)
    squads = []
    for i in 1:seasons
        line = 1
        append!(squads, [reshape(shuffle(collect(1:length(players))), (clubs, ppc))])
        actual_clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
        for i in 1:clubs
            for j in 1:ppc
                actual_clubs[i, j] = players[actual_clubs[i, j]]
            end
        end

        for j in 1:clubs
            for k in 1:clubs
                if j != k
                    Xⱼ = Poisson(sum(actual_clubs[j, :]) / sum(actual_clubs[k, :]))
                    Xₖ = Poisson(sum(actual_clubs[k, :]) / sum(actual_clubs[j, :]))
                    results[i][line][1] = j
                    results[i][line][2] = rand(Xⱼ)
                    results[i][line][3] = rand(Xₖ)
                    results[i][line][4] = k
                    line += 1
                end
            end
        end
    end
    return results, squads, players
end

# results be an array like [1, 1, 0, 3]
# [home club, score home, score away, away club]
function likelihood(players, results, squads)
    if typeof(players) != Dict{Int64, Float64}
        players = Dict{Int64, Float64}(zip(1:length(players), players))
    end
    
    loglikelihood = 0
    for i in 1:length(results)
        actual_clubs = convert(Matrix{Float64}, (deepcopy(squads[i])))
        for j in 1:size(actual_clubs)[1]
            for k in 1:size(actual_clubs)[2]
                actual_clubs[j, k] = players[actual_clubs[j, k]]
            end
        end
        
        for j in 1:length(results[i])
            clubₕ, scoreₕ, scoreₐ, clubₐ = results[i][j]
            loglikelihood -= logpdf(Poisson(sum(actual_clubs[clubₕ, :]) / sum(actual_clubs[clubₐ, :])),
                                    scoreₕ)
            loglikelihood -= logpdf(Poisson(sum(actual_clubs[clubₐ, :]) / sum(actual_clubs[clubₕ, :])),
                                    scoreₐ)
        end
    end
    
    return loglikelihood
end

function gradient(players, results, squads, ∇likelihood)
    if typeof(players) != Dict{Int64, Float64}
        players = Dict{Int64, Float64}(zip(1:length(players), players))
    end
    
    # ∇likelihood = zeros(length(players))
    for i in 1:length(results)
        for j in 1:length(results[i])
            clubₕ, scoreₕ, scoreₐ, clubₐ = results[i][j]
            λₕ, λₐ = 0, 0
            for k in 1:size(squads[i], 2)
                λₕ += players[squads[i][clubₕ, k]]
                λₐ += players[squads[i][clubₐ, k]]
            end
            
            for p in 1:length(players)
                if p in squads[i][clubₕ, :] && p in squads[i][clubₐ, :]
                    ∇likelihood[p] += (λₕ - λₐ) / λₐ ^ 2 + scoreₕ * (λₐ - λₕ) / (λₕ * λₐ)
                    ∇likelihood[p] += (λₐ - λₕ) / λₕ ^ 2 + scoreₐ * (λₕ - λₐ) / (λₐ * λₕ)
                elseif p in squads[i][clubₕ, :]
                    ∇likelihood[p] += scoreₕ / λₕ - 1 / λₐ
                    ∇likelihood[p] += λₐ / λₕ ^ 2 - scoreₐ / λₕ
                elseif p in squads[i][clubₐ, :]
                    ∇likelihood[p] += scoreₐ / λₐ - 1 / λₕ
                    ∇likelihood[p] += λₕ / λₐ ^ 2 - scoreₕ / λₐ
                end
            end
        end
    end
    
    return ∇likelihood
end

gradient (generic function with 1 method)

In [3]:
seasons = 10
n_clubs = 20
ppc = 11

# compilando
results, squads, players = @time create_games(seasons, n_clubs, ppc)
clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
for i in 1:n_clubs
    for j in 1:ppc
        clubs[i, j] = players[clubs[i, j]]
    end
end

@time likelihood(players, results, squads)
@time gradient(players, results, squads, zeros(length(players)))

# reexecutando
results, squads, players = @time create_games(seasons, n_clubs, ppc)
clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
for i in 1:n_clubs
    for j in 1:ppc
        clubs[i, j] = players[clubs[i, j]]
    end
end

@time likelihood(players, results, squads)
@time gradient(players, results, squads, zeros(length(players)))
println()

  1.107656 seconds (2.14 M allocations: 126.719 MiB, 7.96% gc time, 99.13% compilation time)
  0.438442 seconds (2.55 M allocations: 146.259 MiB, 10.13% gc time, 98.18% compilation time)
  1.246122 seconds (5.30 M allocations: 470.564 MiB, 4.10% gc time, 13.24% compilation time)
  0.011517 seconds (69.11 k allocations: 3.847 MiB)
  0.007893 seconds (83.26 k allocations: 3.674 MiB)
  1.037426 seconds (5.15 M allocations: 461.898 MiB, 2.87% gc time)



# Teste para Otimização

In [4]:
f(x) = likelihood(x, results, squads)
g(∇likelihood, x) = gradient(x, results, squads, ∇likelihood)
lower = zeros(220)
upper = 20 * ones(220)
x_inicial = rand(220)
od = OnceDifferentiable(f, g, x_inicial)
@time res1  = optimize(od,
                       lower,
                       upper,
                       x_inicial,
                       Fminbox(GradientDescent()),
                       Optim.Options(iterations = 1000))
@time res2  = optimize(od,
                       lower,
                       upper,
                       x_inicial,
                       Fminbox(NelderMead()),
                       Optim.Options(iterations = 1000))
@time res3 = optimize(x -> likelihood(x, results, squads),
                      lower,
                      upper,
                      x_inicial,
                      Fminbox(NelderMead()),
                      Optim.Options(iterations = 1000))

println()

  5.111020 seconds (22.30 M allocations: 1.505 GiB, 6.00% gc time, 58.39% compilation time)
  4.977191 seconds (38.97 M allocations: 1.707 GiB, 6.43% gc time, 23.54% compilation time)
1668.326053 seconds (17.44 G allocations: 753.369 GiB, 2.30% gc time, 0.03% compilation time)



In [5]:
Optim.converged(res1), Optim.minimizer(res1), Optim.minimum(res1), likelihood(players, results, squads)

(true, [0.533736805231835, 0.1326511178608072, 0.22991537454608557, 0.3179972108851201, 0.26954835801662647, 0.6934799495987365, 0.03510926969539452, 0.9287393966840538, 0.5730340965009539, 0.9833713758200204  …  0.9660715036858183, 0.2936913315104581, 0.952181595748149, 0.9864366845866896, 0.9470388023049914, 0.06013761537959983, 0.1889890448160858, 0.714514098636047, 0.3538026792774871, 0.047061941533944385], 10799.333858608168, 9977.449999745528)

In [6]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res1)), Optim.minimizer(res1)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]
grad = gradient(Optim.minimizer(res1), results, squads, zeros(length(players)))
println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))
println("||Gradiente||: ", norm(grad))

Correlação de Pearson: -0.04220444963145277
Correlação de Spearman: -0.048845111375328945
Correlação de Kendall: -0.030635118306351183
||Gradiente||: 351.02130905252307


In [7]:
Optim.converged(res2), Optim.minimizer(res2), Optim.minimum(res2), likelihood(players, results, squads)

(true, [0.533736805231835, 0.1326511178608072, 0.22991537454608557, 0.3179972108851201, 0.26954835801662647, 0.6934799495987365, 0.03510926969539452, 0.9287393966840538, 0.5730340965009539, 0.9833713758200204  …  0.9660715036858183, 0.2936913315104581, 0.952181595748149, 0.9864366845866896, 0.9470388023049914, 0.06013761537959983, 0.1889890448160858, 0.714514098636047, 0.3538026792774871, 0.047061941533944385], 10785.368089422136, 9977.449999745528)

In [8]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res2)), Optim.minimizer(res2)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]
grad = gradient(Optim.minimizer(res2), results, squads, zeros(length(players)))
println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))
println("||Gradiente||: ", norm(grad))

Correlação de Pearson: -0.03756027455460266
Correlação de Spearman: -0.04749384378715563
Correlação de Kendall: -0.029970942299709424
||Gradiente||: 346.2884225231217


In [9]:
Optim.converged(res3), Optim.minimizer(res3), Optim.minimum(res3), likelihood(players, results, squads)

(true, [0.15698266645581468, 0.869663155564679, 1.179466022892029, 0.04360852475889209, 0.5282879226639082, 0.2922323964147149, 1.9954678083700446, 0.030604257464743732, 1.3773472808003344, 0.172318195524848  …  0.9486686069432069, 0.15418870884905192, 0.7822314264312866, 0.1775503314004498, 0.8343506005267303, 0.5631759684343873, 0.05528861955845645, 0.9657670311843729, 0.3675135201312056, 1.2975326876927962], 9899.148944259236, 9977.449999745528)

In [10]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res3)), Optim.minimizer(res3)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]
grad = gradient(Optim.minimizer(res3), results, squads, zeros(length(players)))
println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))
println("||Gradiente||: ", norm(grad))

Correlação de Pearson: 0.781773264087585
Correlação de Spearman: 0.7475406140998405
Correlação de Kendall: 0.5560813615608137
||Gradiente||: 9.186940558490596


In [11]:
@time res4  = optimize(od,
                       lower,
                       upper,
                       Optim.minimizer(res3),
                       Fminbox(GradientDescent()),
                       Optim.Options(iterations = 1000))

res4

  2.094019 seconds (10.54 M allocations: 934.922 MiB, 2.80% gc time)


 * Status: success

 * Candidate solution
    Final objective value:     9.899149e+03

 * Found with
    Algorithm:     Fminbox with Gradient Descent

 * Convergence measures
    |x - x'|               = 0.00e+00 ≤ 0.0e+00
    |x - x'|/|x'|          = 0.00e+00 ≤ 0.0e+00
    |f(x) - f(x')|         = NaN ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = NaN ≰ 0.0e+00
    |g(x)|                 = NaN ≰ 1.0e-08

 * Work counters
    Seconds run:   2  (vs limit Inf)
    Iterations:    1
    f(x) calls:    2
    ∇f(x) calls:   2


In [12]:
Optim.minimizer(res3) == Optim.minimizer(res4)

true