In [1]:
import numpy as np
import scipy as sp
from scipy import linalg as sp_linalg
import matplotlib.pyplot as plt
%matplotlib inline

V = 100
rng = np.random.RandomState(0)

power_law_seed = rng.power(a=1.0, size=V)
P_ij = np.triu(rng.lognormal(mean=1, sigma=1, size=[V, V]), k=1)
P_ij *= power_law_seed
P_ij += P_ij.T #P_ij == P_ji
P_ij /= P_ij.sum()

p_i = P_ij.sum(axis=1)
P_i = np.diag(p_i)
P_i_inv = np.diag(1.0 / p_i)
similarities = P_i_inv.dot(P_ij).dot(P_i_inv)

pmis = np.log(similarities)
pmis[pmis == -np.inf] = 0

assert np.allclose(similarities, similarities.T)
assert np.allclose(similarities.dot(P_i).sum(axis=1), 1)
assert np.allclose(P_ij, P_ij.T)
assert np.allclose(pmis, pmis.T)
assert np.allclose(P_i.sum(), 1)
assert np.allclose(P_ij.sum(), 1)



In [10]:
rng = np.random.RandomState(0)
n = V**2 * 0.3

emp_ij = rng.multinomial(n, P_ij.flatten()).reshape([V, V])
emp_ij = emp_ij / emp_ij.sum()

emp_i = emp_ij.sum(axis=1)
non_zeros = emp_i != 0
emp_ij = emp_ij[non_zeros][:, non_zeros]
emp_i = emp_i[non_zeros]

emp_i_inv = np.diag(1.0 / emp_i)
emp_sim = emp_i_inv.dot(emp_ij).dot(emp_i_inv)

emp_eps_pmi = np.log(emp_sim)
emp_eps_pmi[emp_eps_pmi == -np.inf] = 0

emp_ppmi = emp_eps_pmi.copy()
emp_ppmi[emp_ppmi < 0] = 0

positive_pmis = (pmis[non_zeros][:, non_zeros]).copy()
positive_pmis[positive_pmis < 0] = 0

  from ipykernel import kernelapp as app


In [64]:
emp_ij = rng.multinomial(n, P_ij.flatten()).reshape([V, V])
emp_i = emp_ij.sum(axis=1)
non_zeros = emp_i != 0
emp_ij = emp_ij[non_zeros][:, non_zeros]
emp_i = emp_i[non_zeros]
emp_i = emp_i / emp_i.sum()

### Estimate prior parameters alpha and beta
mu = emp_i.reshape([-1,1]).dot(emp_i.reshape(1, -1))
var = 0.1
alpha = ((1-mu)/var - 1.0/mu)*mu**2
beta = alpha * (1/mu - 1)
assert np.allclose(mu, alpha / (alpha + beta))

### Estimate posterior parameters
alpha_post = alpha + emp_ij
beta_post = beta + n - emp_ij
mu_post = alpha_post / (alpha_post+beta_post)
prediction = n * mu_post

In [70]:
prediction.round(2)

array([[-0., -0., -0., ..., -0., -0., -0.],
       [-0., -0., -0., ...,  1., -0., -0.],
       [-0., -0., -0., ...,  1., -0., -0.],
       ...,
       [-0., -0., -0., ..., -0., -0.,  1.],
       [-0., -0., -0., ..., -0., -0., -0.],
       [-0., -0., -0., ...,  1.,  2., -0.]])

In [68]:
emp_ij

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 2, 0]])

In [45]:
alpha[-2,-2], beta[-2,-2]

(-6.976562670000004, 1.7032726700000016)