In [1]:
using Optim
using Random
using StatsBase
using Statistics
using Distributions

In [2]:
function creat_players(clubs, players_per_club, mean = 0, var = 1, lb = 0, ub = 100)
    all_players = rand(Truncated(Normal(mean, var), lb, ub), clubs * players_per_club)
    all_players = all_players / (sum(all_players) / clubs)
    players = Dict{Int64, Float64}(zip(1:clubs * players_per_club, all_players))
    return players
end

function simulating_game(club1, club2, sims = 1000000)
    λ₁ = sum(club1)
    λ₂ = sum(club2)
    X₁ = Poisson(λ₁ / λ₂)
    X₂ = Poisson(λ₂ / λ₁)
    Y₁ = rand(X₁, sims)
    Y₂ = rand(X₂, sims)
    wins₁ = sum(Y₁ .> Y₂)
    draws = sum(Y₁ .== Y₂)
    wins₂ = sum(Y₁ .< Y₂)
    return wins₁, draws, wins₂
end

function create_games(seasons, clubs, ppc)
    results = [[[0 for i in 1:4] for j in 1:clubs * (clubs - 1)] for s in 1:seasons]
    players = creat_players(clubs, ppc)
    squads = []
    for i in 1:seasons
        line = 1
        append!(squads, [reshape(shuffle(collect(1:length(players))), (clubs, ppc))])
        actual_clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
        for i in 1:clubs
            for j in 1:ppc
                actual_clubs[i, j] = players[actual_clubs[i, j]]
            end
        end

        for j in 1:clubs
            for k in 1:clubs
                if j != k
                    Xⱼ = Poisson(sum(actual_clubs[j, :]) / sum(actual_clubs[k, :]))
                    Xₖ = Poisson(sum(actual_clubs[k, :]) / sum(actual_clubs[j, :]))
                    results[i][line][1] = j
                    results[i][line][2] = rand(Xⱼ)
                    results[i][line][3] = rand(Xₖ)
                    results[i][line][4] = k
                    line += 1
                end
            end
        end
    end
    return results, squads, players
end

# results be an array like [1, 1, 0, 3]
# [home club, score home, score away, away club]
function likelihood(players, results, squads)
    if typeof(players) != Dict{Int64, Float64}
        players = Dict{Int64, Float64}(zip(1:length(players), players))
    end
    
    loglikelihood = 0
    for i in 1:length(results)
        actual_clubs = convert(Matrix{Float64}, (deepcopy(squads[i])))
        for j in 1:size(actual_clubs)[1]
            for k in 1:size(actual_clubs)[2]
                actual_clubs[j, k] = players[actual_clubs[j, k]]
            end
        end
        
        for j in 1:length(results[i])
            clubₕ, scoreₕ, scoreₐ, clubₐ = results[i][j]
            loglikelihood -= logpdf(Poisson(sum(actual_clubs[clubₕ, :]) / sum(actual_clubs[clubₐ, :])),
                                    scoreₕ)
            loglikelihood -= logpdf(Poisson(sum(actual_clubs[clubₐ, :]) / sum(actual_clubs[clubₕ, :])),
                                    scoreₐ)
        end
    end
    
    return loglikelihood
end

function gradient(players, results, squads, ∇likelihood)
    if typeof(players) != Dict{Int64, Float64}
        players = Dict{Int64, Float64}(zip(1:length(players), players))
    end
    
    # ∇likelihood = zeros(length(players))
    for i in 1:length(results)
        for j in 1:length(results[i])
            clubₕ, scoreₕ, scoreₐ, clubₐ = results[i][j]
            λ₁, λ₂ = 0, 0
            for k in 1:size(squads[i], 2)
                λ₁ += players[squads[i][clubₕ, k]]
                λ₂ += players[squads[i][clubₐ, k]]
            end
            
            for p in 1:length(players)
                if p in squads[i][clubₕ, :] && p in squads[i][clubₐ, :]
                    ∇likelihood[p] += (λ₁ - λ₂) / λ₂ ^ 2 + scoreₕ * (λ₂ - λ₁) / (λ₁ * λ₂)
                    ∇likelihood[p] += (λ₂ - λ₁) / λ₁ ^ 2 + scoreₐ * (λ₁ - λ₂) / (λ₂ * λ₁)
                elseif p in squads[i][clubₕ, :]
                    ∇likelihood[p] += scoreₕ / λ₁ - 1 / λ₂
                    ∇likelihood[p] += λ₂ / λ₁ ^ 2 - scoreₐ / λ₁
                elseif p in squads[i][clubₐ, :]
                    ∇likelihood[p] += scoreₐ / λ₂ - 1 / λ₁
                    ∇likelihood[p] += λ₁ / λ₂ ^ 2 - scoreₕ / λ₂
                end
            end
        end
    end
    
    return ∇likelihood
end

gradient (generic function with 1 method)

In [3]:
seasons = 10
n_clubs = 20
ppc = 11

# compilando
results, squads, players = @time create_games(seasons, n_clubs, ppc)
clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
for i in 1:n_clubs
    for j in 1:ppc
        clubs[i, j] = players[clubs[i, j]]
    end
end

@time likelihood(players, results, squads)
@time gradient(players, results, squads, zeros(length(players)))

# reexecutando
results, squads, players = @time create_games(seasons, n_clubs, ppc)
clubs = convert(Matrix{Float64}, (deepcopy(last(squads))))
for i in 1:n_clubs
    for j in 1:ppc
        clubs[i, j] = players[clubs[i, j]]
    end
end

@time likelihood(players, results, squads)
@time gradient(players, results, squads, zeros(length(players)))
println()

  1.262195 seconds (2.14 M allocations: 126.719 MiB, 8.07% gc time, 99.09% compilation time)
  0.540601 seconds (2.55 M allocations: 146.259 MiB, 8.15% gc time, 98.21% compilation time)
  1.639709 seconds (5.30 M allocations: 470.564 MiB, 5.34% gc time, 12.19% compilation time)
  0.010647 seconds (69.11 k allocations: 3.847 MiB)
  0.009521 seconds (83.26 k allocations: 3.674 MiB)
  1.409841 seconds (5.15 M allocations: 461.898 MiB, 5.29% gc time)



# Teste para Otimização

In [4]:
f(x) = likelihood(x, results, squads)
g(∇likelihood, x) = gradient(x, results, squads, ∇likelihood)
lower = zeros(220)
upper = 20 * ones(220)
x_inicial = rand(220)
od = OnceDifferentiable(f, g, x_inicial)
res1  = optimize(od,
                 lower,
                 upper,
                 x_inicial,
                 Fminbox(GradientDescent()),
                 Optim.Options(iterations = 1000))
res2  = optimize(od,
                 lower,
                 upper,
                 x_inicial,
                 Fminbox(NelderMead()),
                 Optim.Options(iterations = 1000))
res3 = optimize(x -> likelihood(x, results, squads),
                lower,
                upper,
                x_inicial,
                Fminbox(NelderMead()),
                Optim.Options(iterations = 1000))

println()




In [5]:
Optim.converged(res1), Optim.minimizer(res1), Optim.minimum(res1), likelihood(players, results, squads)

(true, [0.7530073356357216, 0.914269188988067, 0.5147477400729443, 0.5116269907212323, 0.012124245804017386, 0.790231149094109, 0.24726278934716683, 0.4017958882497299, 0.3051904747801959, 0.5088425081176353  …  0.21345503271280175, 0.593689405278288, 0.5928960236446867, 0.89539163500473, 0.09924924364899113, 0.9906580529598881, 0.1693196946383133, 0.8630723043195043, 0.057231714678050105, 0.9827728397825888], 10562.459472721874, 9984.927023398232)

In [6]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res1)), Optim.minimizer(res1)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]

println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))

Correlação de Pearson: 0.10202686790191992
Correlação de Spearman: 0.08168801383950457
Correlação de Kendall: 0.053383146533831465


In [7]:
Optim.converged(res2), Optim.minimizer(res2), Optim.minimum(res2), likelihood(players, results, squads)

(true, [0.7530073356357216, 0.914269188988067, 0.5147477400729443, 0.5116269907212323, 0.012124245804017386, 0.790231149094109, 0.24726278934716683, 0.4017958882497299, 0.3051904747801959, 0.5088425081176353  …  0.21345503271280175, 0.593689405278288, 0.5928960236446867, 0.89539163500473, 0.09924924364899113, 0.9906580529598881, 0.1693196946383133, 0.8630723043195043, 0.057231714678050105, 0.9827728397825888], 10548.951537764093, 9984.927023398232)

In [8]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res2)), Optim.minimizer(res2)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]

println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))

Correlação de Pearson: 0.1193658833419563
Correlação de Spearman: 0.08343034886145281
Correlação de Kendall: 0.0547945205479452


In [9]:
Optim.converged(res3), Optim.minimizer(res3), Optim.minimum(res3), likelihood(players, results, squads)

(true, [0.9173195144742836, 1.4556518218503105, 1.683243257772773, 1.08760251893424, 0.02594177167033089, 0.851240424791657, 1.2211166031570604, 1.2071281942135506, 0.05920845700872021, 0.3054595463600786  …  1.4177321456598515, 0.9660902034977767, 1.1752910503715321, 0.041704967195839375, 0.05245747863937957, 1.2339044621126123, 1.6038811224260956, 0.22543483592224778, 1.039963051575982, 0.39500515810550557], 9896.836254926047, 9984.927023398232)

In [10]:
max_lik_players = Dict{Int64, Float64}(zip(1:length(Optim.minimizer(res3)), Optim.minimizer(res3)))

original_players = zeros(220)
optimized_players = zeros(220)

for i in 1:220
    original_players[i] = players[i]
    optimized_players[i] = max_lik_players[i]
end

original_players /= original_players[1]
optimized_players /= optimized_players[1]

println("Correlação de Pearson: ", cor(original_players, optimized_players))
println("Correlação de Spearman: ", corspearman(original_players, optimized_players))
println("Correlação de Kendall: ", corkendall(original_players, optimized_players))

Correlação de Pearson: 0.7190880652450682
Correlação de Spearman: 0.7066374399170531
Correlação de Kendall: 0.5110004151100042
