Sample from discrete power law distribution (Zipf distribution)

In [None]:
import sampling
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

sns.set_theme()

sampler = sampling.Zipf(2, 1_000)
sns.scatterplot(dict(Counter(sampler.generate() for _ in range(100_000))))
plt.loglog()
plt.show()

Weighted sampling without replacement

In [None]:
import sampling
import random

n = 25
population = list(range(n))
weights = [random.randint(1, 100) for _ in range(n)]
sampler = sampling.EfraimidisSpirakis(population, weights)
result = []
for _ in range(100_000):
    k = random.randint(1, n)
    sample = sampler.sample(k)
    result.extend(sample)

weights_sum = sum(weights)
print(weights_sum)
for i in range(n):
    print(population[i], "\tweight:", weights[i], "\texpected:", round(weights[i] / weights_sum, 4), "\t\tsample:", round(result.count(population[i]) / len(result), 4))

Weighted sampling of timestamps with wave distribution

In [None]:
import sampling
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter

sns.set_theme()

weights = sampling.generate_burst_weights(7, 12)
n = len(weights)
timestamps = list(range(n))
wswr = sampling.EfraimidisSpirakis(timestamps, weights)
sampler = sampling.Zipf(2, n)
result = []
for _ in range(100_000):
    k = sampler.generate()
    sample = wswr.sample(k)
    result.extend(sample)

sns.set(rc={"figure.figsize":(20, 6)})
sns.scatterplot(dict(Counter(result)))
plt.yscale("log")
plt.show()