In [3]:
%matplotlib notebook
import matplotlib as mpl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ipyparallel as ipp
import seaborn as sns
from tqdm import tqdm_notebook
import math

from numpy.random import multivariate_normal as draw_mn
from numpy.random import chisquare as draw_cs
from scipy.stats import chi2
from scipy.stats import multivariate_normal
from scipy.special import gamma, gammaln

In [2]:
class multivariate_t_distribution:
    def __init__(self,mu,Sigma,df):
        self.d = len(mu)
        self.df = df
        self.num = gamma((self.d+self.df)/2.)
        self.invSig = np.linalg.inv(Sigma)
        self.fact = gamma(df/2.) * np.power(df*np.pi,self.d/2.) * np.sqrt(np.linalg.det(Sigma))
        
        
        
    def pdf(self, x):
        return self.num/(self.fact*np.power(1. + (1./self.df)*np.dot(np.dot((x - mu), self.invSig), (x - mu)),(self.d+self.df)/2.0))



# Importance Sampling Test
## Parameters

In [3]:
D = 80
mu = 0.5
mu_vec = np.full((D), mu)
samples = 10000
nu = 3 # Degrees of freedom
Sigma = 0.5*np.eye(D); #Sigma[0,D-1] = 0.8; Sigma[D-1,0] = 0.8; Sigma[int(D/2),2] = 1.2
#Sigma = Sigma*Sigma.T; Sigma = Sigma + D*np.eye(D)
L = np.linalg.cholesky(Sigma)
def f(x):
    return np.mean(x)

## Importance sampling test for $\mathcal{N}(\mu, \Sigma)$ from $\mathcal{N}(0, 1)$

In [1]:
mvn0 =  multivariate_normal(mean=np.full((D), 0.0), cov=0.03*np.eye(D), allow_singular=False)
mvn =  multivariate_normal(mean=mu_vec, cov=Sigma, allow_singular=False)
Z0 = mvn0.rvs(samples)#draw_mn(np.full((D), 0.), np.eye(D), samples)

X = mvn0.rvs(samples)#mu_vec + np.array([L.dot(Z0i) for Z0i in Z0])
#Linv = np.linalg.inv(L)
prob_factor = 1.#np.log(np.linalg.det(L))
q = np.array([ mvn0.pdf(Xi) for Xi in X])
p = np.array([ mvn.pdf(Xi) for Xi in X])
weight = prob_factor*p/q#np.exp(log_p+log_prob_factor-log_q)#

NameError: name 'multivariate_normal' is not defined

In [9]:
A = np.array([[0.,0.,1.],[0.,0.,1.],[1.,1.,0.]])
for i in range(5):
    A[0,:]/=np.sum(A[0,:])
    A[1,:]/=np.sum(A[1,:])
    A[2,:]/=np.sum(A[2,:])
    print(A)
    A[:,0]/=np.sum(A[:,0])
    A[:,1]/=np.sum(A[:,1])
    A[:,2]/=np.sum(A[:,2])
    print(A)
    print("=====")

[[0.  0.  1. ]
 [0.  0.  1. ]
 [0.5 0.5 0. ]]
[[0.  0.  0.5]
 [0.  0.  0.5]
 [1.  1.  0. ]]
=====
[[0.  0.  1. ]
 [0.  0.  1. ]
 [0.5 0.5 0. ]]
[[0.  0.  0.5]
 [0.  0.  0.5]
 [1.  1.  0. ]]
=====
[[0.  0.  1. ]
 [0.  0.  1. ]
 [0.5 0.5 0. ]]
[[0.  0.  0.5]
 [0.  0.  0.5]
 [1.  1.  0. ]]
=====
[[0.  0.  1. ]
 [0.  0.  1. ]
 [0.5 0.5 0. ]]
[[0.  0.  0.5]
 [0.  0.  0.5]
 [1.  1.  0. ]]
=====
[[0.  0.  1. ]
 [0.  0.  1. ]
 [0.5 0.5 0. ]]
[[0.  0.  0.5]
 [0.  0.  0.5]
 [1.  1.  0. ]]
=====


## Importance sampling test for $t_{\nu}(\mu, \Sigma)$ from $\mathcal{N}(0, 1)$ and $\chi^2(\nu)$ 
Loop version

In [22]:
mvt = multivariate_t_distribution(mu_vec, Sigma, nu) #((nu-2.)/float(nu))*
mvn0 =  multivariate_normal(mean=np.full((D), 0.0), cov=np.eye(D), allow_singular=False)
mvn =  multivariate_normal(mean=mu_vec, cov=Sigma, allow_singular=False)
chi2_dist = chi2(nu)

Z0 = mvn0.rvs(samples)                            # draw N(0,1) samples
c = chi2_dist.rvs(samples)                        # draw chi2 samples
weight = np.zeros(samples, dtype=np.longdouble)
X = np.zeros((samples, D), dtype=np.longdouble)

for i in range(samples):
    Lchi = np.sqrt(float(nu)/c[i])*L
    #Lchi_inv = np.linalg.inv(Lchi)
    prob_factor = 1.#np.log(np.linalg.det(Lchi))#np.log(np.abs(np.linalg.det(np.linalg.inv(Lchi))))
    
    X[i] = mu_vec + Lchi.dot(Z0[i]) 
    log_p = np.log(mvt.pdf(X[i]))
    log_q = mvn.logpdf(X[i]) + chi2_dist.logpdf(c[i])
    #if(log_q < -500):
    #    if log_p < -450:
    #        print("Almost 0")
    #        weight[i] = 0.
    #    else:
    #        print("q == 0!")
    #else:
    #if(np.exp(log_p-log_q) > 1e+120):
    #        print("p: " + str(log_p)) 
    #        print(log_q)
    weight[i] = np.exp(log_p-log_q, dtype=np.longdouble)#prob_factor*p/q#

p: -219.13876991408948601
-1036.2218851028965
p: -202.25694935689791962
-705.2188650431299
p: -183.3613953468407057
-464.03911559660105
p: -199.37376737701257864
-661.1665271602817
p: -187.46699625080186016
-507.4668764235539
p: -195.40264997613972904
-604.8037272747139
p: -186.40364065655286152
-495.746396856394
p: -187.2091721479088468
-504.66631915495697
p: -195.24841295049302291
-602.7412980931631
p: -193.09172405148727933
-574.5707333936477
p: -234.99765488677369316
-1497.1029983282901
p: -244.51691319036105102
-1871.3322495537484
p: -192.13572615483063187
-562.526797093258
p: -185.45931603674326978
-485.69188452765155
p: -201.05586318439243096
-686.4800789352458
p: -195.19017820642888844
-602.0367320473765
p: -225.17111165557573037
-1191.246699865354
p: -211.51215408407254934
-869.924953801632
p: -184.8283579686299533
-478.9779998571557
p: -226.46436968177129767
-1227.4474684889833
p: -184.48241744331773946
-475.4212109346917
p: -262.85956425662010208
-2885.695280917784
p: -224.8



p: -186.57280350595441518
-497.6278561144064
p: -202.10813776552505437
-703.0286139557376
p: -213.48863006528009584
-910.1471489618998
p: -258.77972925738077029
-2619.7080386591374
p: -225.31698742572706032
-1195.1791786321212
p: -208.06067045103041104
-804.3216395413987
p: -233.97491881605104759
-1461.7904063370052
p: -196.5647304365707174
-620.751712060794
p: -215.87982329555913324
-961.62502781727
p: -188.73401467188451185
-521.8917613208539
p: -184.37775014397674364
-474.33578986732493
p: -213.51585344594016441
-910.6268017610374
p: -213.08852750775483641
-901.8515710707146
p: -230.3642015941796681
-1343.8887743957812
p: -205.00716930989505697
-750.4897261301916
p: -212.36731494965038372
-887.0267816253706
p: -189.53314297433801569
-531.1127196766315
p: -275.2267247895717957
-3871.3661813627973
p: -200.13534978607196901
-672.3942503407112
p: -183.5019213443136179
-465.4752390655796
p: -217.81401445829726397
-1005.1940871652971
p: -203.93497956803688477
-732.4044661259891
p: -222.51

Vectorized version

In [None]:
mvt = multivariate_t_distribution(mu_vec, Sigma, nu)
mvn0 =  multivariate_normal(mean=np.full((D), 0.0), cov=np.eye(D), allow_singular=False)
mvn =  multivariate_normal(mean=mu_vec, cov=Sigma, allow_singular=False)

Z0 = mvn0.rvs(samples)#draw_mn(np.full((D), 0.), np.eye(D), samples)
c = draw_cs(nu,samples)

Lchi = np.sqrt(float(nu)/c)*L[:,:,np.newaxis]
Lchi_inv = np.linalg.inv(np.moveaxis(Lchi, 2,0))
log_prob_factor = np.log(np.abs(np.linalg.det(Lchi_inv)))

X = mu_vec + np.array([Lchi[:,:,i].dot(Z0[i]) for i in range(Z0.shape[0])])

log_p = np.array([ mvn0.logpdf(Lchi_inv[i,:,:].dot(X[i,:]-mu_vec)) for i in range(X.shape[0])])
log_q = np.log(np.array([ mvt.pdf(Xi) for Xi in X]))
weight = np.exp(log_p+log_prob_factor-log_q)#log_prob_factor*log_p/log_q#

In [23]:
print( np.mean(weight) )
#print( np.mean( np.apply_along_axis(f, 1, Z0) ) )
#print( np.mean( np.apply_along_axis(f, 1, Z0)*weight))
print( np.mean( np.apply_along_axis(f, 1, X) ))
print( np.mean( np.apply_along_axis(f, 1, X)*weight))

inf
0.50045612773094912465
nan


  ret = umr_sum(arr, axis, dtype, out, keepdims)


In [46]:
weight[300]

1.0

# Plots

In [234]:
r = sns.distplot(weight, bins=20, hist=True)
print(np.mean(weight))

<IPython.core.display.Javascript object>

ValueError: range parameter must be finite.

In [231]:
print( np.mean( np.mean(Z,axis=1) ) )
print( np.mean( np.mean(Z0,axis=1) ) )
print( np.mean( np.mean(X,axis=1)*weight))

NameError: name 'Z' is not defined

In [None]:
df_Z = pd.DataFrame(Z, columns=["x", "y"])
df_X = pd.DataFrame(X, columns=["x", "y"])

In [15]:
g = sns.jointplot(x="x", y="y", data=df_Z, kind="kde", color="b")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$");

<IPython.core.display.Javascript object>

In [42]:
g = sns.jointplot(x="x", y="y", data=df_X, kind="kde", color="b")
g.plot_joint(plt.scatter, c="w", s=30, linewidth=1, marker="+")
g.ax_joint.collections[0].set_alpha(0)
g.set_axis_labels("$X$", "$Y$");

<IPython.core.display.Javascript object>