In [None]:
using Pkg
Pkg.add(PackageSpec(url="https://github.com/AxelvL/AHPQ.jl", rev="master"))
using AHPQ

[?25l

[32m[1m   Updating[22m[39m git-repo `https://github.com/AxelvL/AHPQ.jl`




[32m[1m   Updating[22m[39m registry at `C:\Users\Axel9\.julia\registries\General`


[?25l

[32m[1m   Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`


[2K[?25h

[32m[1m  Resolving[22m[39m package versions...
[32m[1mUpdating[22m[39m `C:\Users\Axel9\.julia\environments\v1.5\Project.toml`
 [90m [b1450aca] [39m[93m~ AHPQ v0.1.0 `https://github.com/AxelvL/AHPQ.jl#master` ⇒ v0.1.0 `https://github.com/AxelvL/AHPQ.jl#master`[39m
[32m[1mUpdating[22m[39m `C:\Users\Axel9\.julia\environments\v1.5\Manifest.toml`
 [90m [b1450aca] [39m[93m~ AHPQ v0.1.0 `https://github.com/AxelvL/AHPQ.jl#master` ⇒ v0.1.0 `https://github.com/AxelvL/AHPQ.jl#master`[39m
┌ Info: Precompiling AHPQ [b1450aca-318d-4cca-a6fa-ceaba11f2713]
└ @ Base loading.jl:1278


## Artificial Data Generation

In [None]:
using Plots
using Statistics: norm

In [None]:
n_dp = 1000
n_dim = 16
n_queries = 100
n_neighbors = 100

data = rand(n_dim, n_dp)
data = data ./ mapslices(norm, data, dims=1)
queries = rand(n_dim, n_queries)
innerproducts = data' * queries
groundtruth = mapslices(x -> partialsortperm(x, 1:n_neighbors, rev=true), innerproducts,dims=1);

## 16x1.000 Dataset and 8x8 codebook

In [None]:
n_codebooks = 8
n_centers   = 8
recalln = 20
stopcond= 1e-2;

### Finding optimal `T`

In [None]:
Ts = 0:0.1:1
scores = zeros(11)
for i in 1:length(Ts)
    traindata=deepcopy(data)
    ahpq = builder(traindata; T=Ts[i], n_codebooks=n_codebooks, 
                                    n_centers=n_centers,
                                    verbose=false,
                                    stopcond=stopcond,
                                    a=0,
                                    training_points=-1)
    yhat = AHPQ.MIPS(ahpq, queries, n_neighbors)
    scores[i] = recall1atN(yhat, groundtruth, recalln)
end

In [None]:
plot(Ts[2:end], scores[2:end], label="Anisotropic", marker=:dot)
hline!([scores[1]], label="L2 Loss")
plot!(title="Recall 1@$(recalln) on 1000x16 artifical data, $(n_codebooks)x$(n_centers) codebook", 
      xlabel="T",
      ylabel="Recall 1@$(recalln)")

### Comparison to L2 loss

In [None]:
function get1atNscores(ranking, n_neighbors, groundtruth)
    scores = zeros(n_neighbors)
    for i in 1:n_neighbors
        scores[i] = recall1atN(ranking, groundtruth, i)
    end
    return scores
end

#### Training Tuned Anisotropic PQ

In [None]:
traindata = deepcopy(data)
ahpq = builder(traindata, T=Ts[argmax(scores)], n_codebooks=n_codebooks, 
                                    n_centers=n_centers,
                                    verbose=false,
                                    stopcond=stopcond,
                                    a=0,
                                    training_points=-1);
yhat = AHPQ.MIPS(ahpq, queries, n_neighbors)
anisotropic_scores  = get1atNscores(yhat, n_neighbors, groundtruth);

#### Training L2 PQ

In [None]:
traindata = deepcopy(data)
ahpq = builder(traindata, T=0, n_codebooks=n_codebooks, 
                                    n_centers=n_centers,
                                    verbose=false,
                                    stopcond=stopcond,
                                    a=0,
                                    training_points=-1);
yhat = AHPQ.MIPS(ahpq, queries, n_neighbors)
L2_scores  = get1atNscores(yhat, n_neighbors, groundtruth);

In [None]:
plot(1:100, anisotropic_scores, label="Anisotropic")
plot!(1:100, L2_scores, label="L2")
plot!(title="Recall on 1000x16 artifical data, $(n_codebooks)x$(n_centers) codebook", 
      xlabel="N",
      ylabel="Recall 1@N",
      legend=:bottomright)

## 100x1.000 Dataset: Varying `n_codebooks`

In [None]:
## Artificial Data Generation ##
n_dp = 1000
n_dim = 100
n_queries = 100
n_neighbors = 100

data = rand(n_dim, n_dp)
data = data ./ mapslices(norm, data, dims=1)
queries = rand(n_dim, n_queries)
innerproducts = data' * queries
groundtruth = mapslices(x -> partialsortperm(x, 1:n_neighbors, rev=true), innerproducts,dims=1);

In [None]:
n_centers = 16
n_codebooks = [2, 5, 10, 25, 50, 100]
nrecall = 20
stopcond=1e-3;

In [None]:
Ts = 0.001:0.1:1
scores = zeros(11)
for i in 1:length(Ts)
    println("Training with T=$(T[i])")
    traindata=deepcopy(data)
    ahpq = builder(traindata; T=Ts[i], n_codebooks=25, 
                                    n_centers=n_centers,
                                    verbose=true,
                                    stopcond=stopcond,
                                    a=0,
                                    training_points=-1)
    yhat = AHPQ.MIPS(ahpq, queries, n_neighbors)
    scores[i] = recall1atN(yhat, groundtruth, recalln)
end
print("Optimal T found at $(Ts[argmax(scores)])")