# Gauss-Hermite Approximation

### Load packages

In [681]:
from numpy.polynomial.hermite import hermgauss
from matplotlib import pyplot as plt
from numpy.linalg import inv
import pandas as pd
import numpy as np
import scipy

### Randomized simulation data for two sites
- Set random seed
- Generate 10 true betas ranged in (-10, 10)
- Generate 2 sigmas for noise variance in different sites
- Generate data X1 and X2 with size (1000, 10)
- Generate result y1 and y2 with bernoulli distibution 

In [593]:
np.random.seed(1)
true_beta = (np.random.rand(10,1) - np.random.rand(10,1)) * 10
true_sigma = np.random.rand(2)
X1 = (np.random.rand(1000, 10) - np.random.rand(1000, 10)) * 10
p1 = 1 / (1 + np.exp(-(X1 @ ture_beta + np.random.normal(0, true_sigma[0], 1000).reshape(1000, 1))))
y1 = np.random.binomial(1,p1)
X2 = (np.random.rand(1000, 10) - np.random.rand(1000, 10)) * 10
p2 = 1 / (1 + np.exp(-(X2 @ ture_beta + np.random.normal(0, true_sigma[1], 1000).reshape(1000, 1))))
y2 = np.random.binomial(1,p2)

### Definitions

Notation for $\pi_{ij}$:

$$\pi_{ij} = \dfrac{\exp{(X_{ij}^\top\beta_0}+\mu_{i0})}{1 + \exp{(X_{ij}^\top\beta_0}+\mu_{i0})}$$

In [950]:
def Pi(x, beta_0, mu):
    return np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))))

Notation for $g(\mu_{i0})$:

$$g(\mu_{i0};\beta_0)=\sum_{j=1}^{n_i}\left[y_{ij}\log\pi_{ij}+(1-y_{ij})\log(1-\pi_{ij})\right]+\log\phi(\mu_{i0};\theta_0)$$

In [951]:
def g(x, y, mu, beta_0, tau=1):
    g = sum(y * np.log(Pi(x, beta_0, mu)) + (1 - y) * np.log(1 - Pi(x, beta_0, mu))) \
    + np.log((np.sqrt(2 * np.pi) * tau)**(-1) * np.exp(-mu**2/(2 * tau**2)))
    return g

Notation for $g'(\mu_{i0})$:

$$\dfrac{\partial g}{\partial \mu_{i0}}=\sum_{j=1}^{n_i}(y_{ij}-\pi_{ij})-\dfrac{\mu_{i0}}{\tau_0^2}$$

In [952]:
def g_1(x, y, mu, beta_0, tau = 1):
    return sum(y - Pi(x, beta_0, mu)) - mu/tau**2

Notation for $g''(\mu_{i0})$:

$$\dfrac{\partial^2 g}{\partial \mu_{i0}^2}=-\sum_{j=1}^{n_i}\dfrac{\partial\pi_{ij}}{\partial\mu_{i0}}-\dfrac{1}{\tau_0^2}$$

In [953]:
def g_2(x, y, mu, beta_0, tau = 1):
    return sum(- np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))**2))) - mu/tau**2

Notation for $\pi_{ij}'(\beta_0)$:

$$\dfrac{\partial\pi_{ij}}{\partial\beta_0}=\dfrac{X_{ij}\exp\left(X_{ij}^\top\beta_0+\mu_{i0}\right)}{\left[1+\left(X_{ij}^\top\beta_0+\mu_{i0}\right)\right]^2}$$

In [954]:
def Pi_1(x, beta_0, mu):
    return np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))**2)) * x

Notation for $\hat\omega$:

$$\hat\omega=\sqrt{-\dfrac{1}{g''(\hat\mu_{i0})}}$$

In [955]:
def omega(x, y, mu, beta_0, tau = 1):
    return np.sqrt(-1/g_2(x, y, mu, beta_0))

Notation for Hermite polynomial function $f_k(\hat\mu_{i0};\beta_0)$:

$$h_k\exp\{g(\hat\mu_{i0}+\sqrt{2\pi}\hat\omega x_k;\beta_0)+x_k^2\}$$

In [956]:
def f_k(k, x, y, mu, beta_0, tau = 1):
    [x_k, h_k] = hermgauss(k)
    return h_k * np.exp(g(x, y, mu + np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0, tau) * x_k, beta_0) + x_k**2)

Notation for $\mathcal L_i$:

$$\mathcal L_i=\sqrt{2\pi}\hat\omega\sum_{k=1}^lh_k\exp\left\{g(\hat\mu_{i0}+\sqrt{2\pi}\hat\omega x_k;\beta_0)+x_k^2\right\}$$

In [957]:
def L(k, x, y, mu, beta_0, tau = 1):
    return np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0) * sum(f_k(k, x, y, mu, beta_0))

## STEP 1: Maximize $g(\mu_{i0})$

In [958]:
def max_mu(x, y, mu, beta_0, tau=1, max_iter=100):
    for step in range(max_iter):
#         print('Step: ', step, '\n')
        mu_new = mu - g_1(x, y, mu, beta_0, tau)/g_2(x, y, mu, beta_0, tau)
        diff = mu_new - mu
#         print(diff)
        if diff == 0:
#             print(mu)
            break;
        mu = mu_new
    return mu[0]
#     print('mu converges to:', mu[0], 'in', step + 1, 'steps, with difference', diff[0])

## STEP 2: Maximization preparation of $\beta_0$ in LOCAL

Notation for $\dfrac{\partial\mathcal L_i}{\partial\beta_0}$

$$\dfrac{\partial\mathcal L_i}{\partial\beta_0}=\sqrt{2\pi}\hat\omega\sum_{k=1}^l\left\{f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\right\}$$

In [959]:
def L_1(k, x, y, mu, beta_0, tau = 1):
    L_1 = 0
    [x_k, h_k] = hermgauss(k)    
    for i in range(k):
        L_1 += np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0) *\
        f_k(k, x, y, mu, beta_0, tau = tau)[i] *\
        np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0)
    return L_1

Notation for $\dfrac{\partial^2\mathcal L_i}{\partial\beta_0^2}$:

$$\dfrac{\partial^2\mathcal L_i}{\partial\beta_0^2}=\sqrt{2\pi}\hat\omega\sum_{k=1}^l\left\{f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\left[{\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\right]^\top+f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}\left(-X_{ij}\dfrac{\partial\pi_{ij}}{\partial\beta_0}\right)\right\}$$

In [960]:
def L_2(k, x, y, mu, beta_0, tau = 1):
    L2 = 0
    [x_k, h_k] = hermgauss(k) 
    for i in range(k):
        boogie = 0
        for j in range(x.shape[0]):
            boogie += -x[j].reshape(x[j].shape[0],1) @\
            Pi_1(x[j], beta_0, mu + np.sqrt(2 * np.pi) *\
                 omega(x, y, mu, beta_0, tau) * x_k[i]).reshape(1,x[j].shape[0])
        L2 += np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0) *\
        (f_k(k, x, y, mu, beta_0, tau = tau)[i] *\
         np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) *\
                               omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0).reshape(x.shape[1],1) @\
         np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) *\
                               omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0).reshape(1,x.shape[1]) +\
        f_k(k, x, y, mu, beta_0, tau = tau)[i] * boogie)
    return L2

## STEP 3: Maximization of $\beta_0$ in GLOBAL

In [961]:
def LL_1(site_num, k, X, Y, mu, beta_0, tau = 1):
    LL1 = 0
    for i in range(site_num):
        LL1 += L_1(k, X[i], Y[i], mu[i], beta_0, tau) / L(k, X[i], Y[i], mu[i], beta_0, tau)
    return LL1

In [962]:
def LL_2(site_num, k, X, Y, mu, beta_0, tau = 1):
    LL2 = 0
    for i in range(site_num):
        LL2 += L_2(k, X[i], Y[i], mu[i], beta_0, tau) / L(k, X[i], Y[i], mu[i], beta_0, tau)
    LL1 = LL_1(site_num, k, X, Y, mu, beta_0, tau)
    return LL2 - LL1.reshape(LL1.shape[0],1) @ LL1.reshape(1,LL1.shape[0])

In [963]:
def update(site_num, k, X, Y, mu, beta_0, tau = 1):
    direction = LL_1(site_num, k, X, Y, mu, beta_0, tau).reshape(1,beta_0.shape[0]) @\
    inv(LL_2(site_num, k, X, Y, mu, beta_0, tau))
    return direction

# Simulation starts below

### Main function

In [967]:
beta_0 = np.repeat(0.1, 10).reshape(10,1)
# betea_0 = true_beta
mu = [0, 0]
X = [X1, X2]
Y = [y1, y2]
k = 5
tau = 1
print('Initial beta:', beta_0, "\n")
for step in range(10):
    print('Step ', step+1, ':\n')
    for i in range(len(mu)): 
        mu[i] = max_mu(X[i], y[i], mu[i], beta_0)
    for substep in range(100):
        print('Mu:\n', mu, '\n')
        new_beta = beta_0 - update(len(mu), k, X, Y, mu, beta_0).reshape(beta_0.shape[0],1)
        delta = new_beta - beta_0
        beta_0 = new_beta
        print('Beta:\n', beta_0, '\n')
        print('Delta:\n', delta, '\n')

Initial beta: [[0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]
 [0.1]] 

Step  1 :

Mu:
 [0.024708187293663673, 0.21466825773673953] 

Beta:
 [[0.10033706]
 [0.10048515]
 [0.1000009 ]
 [0.09988309]
 [0.10039978]
 [0.09996018]
 [0.10011196]
 [0.10066047]
 [0.10062657]
 [0.10041969]] 

Delta:
 [[ 3.37056237e-04]
 [ 4.85148215e-04]
 [ 8.97296476e-07]
 [-1.16907949e-04]
 [ 3.99779538e-04]
 [-3.98184374e-05]
 [ 1.11955159e-04]
 [ 6.60465618e-04]
 [ 6.26569354e-04]
 [ 4.19690932e-04]] 

Mu:
 [0.024708187293663673, 0.21466825773673953] 

Beta:
 [[0.10067624]
 [0.10097282]
 [0.10000269]
 [0.09976664]
 [0.10080164]
 [0.09992098]
 [0.10022515]
 [0.10132361]
 [0.10125565]
 [0.10084149]] 

Delta:
 [[ 3.39184061e-04]
 [ 4.87670086e-04]
 [ 1.79572284e-06]
 [-1.16452790e-04]
 [ 4.01863478e-04]
 [-3.91973500e-05]
 [ 1.13193023e-04]
 [ 6.63141162e-04]
 [ 6.29080596e-04]
 [ 4.21800578e-04]] 

Mu:
 [0.024708187293663673, 0.21466825773673953] 

Beta:
 [[0.10101757]
 [0.10146303]
 [0.10000539

KeyboardInterrupt: 

In [937]:
true_beta

array([[-0.0217251 ],
       [ 0.35104993],
       [-2.04337875],
       [-5.75784864],
       [ 1.19368298],
       [-5.78128915],
       [-2.31044591],
       [-2.13129101],
       [ 2.56380536],
       [ 3.40715245]])

In [938]:
true_sigma

array([0.80074457, 0.96826158])

# The followings are NOT true

## Step 1: Combine data to generate golden rules

In [564]:
col = []
for i in range(10): col += ['V'+str(i+1)]
X1 = pd.DataFrame(X1, columns=col)
X2 = pd.DataFrame(X2, columns=col)
# X = pd.concat([X1, X2], ignore_index = True)
y = np.concatenate((y1, y2))

Notation for $\pi_{ij}$:

$$\pi_{ij} = \dfrac{\exp{(X_{ij}^\top\beta_0})}{1 + \exp{(X_{ij}^\top\beta_0})}$$

In [566]:
def Pi(x, beta_0):
    return np.asarray((np.exp(x @ beta_0) / (1 + np.exp(x @ beta_0)))) #need to add /mu_{i0}?

Notation for $L_1$:

$$L_1 = \sum_{j=1}^{n_i}(y_{ij}X_{ij}-\pi_{ij}X_{ij})$$

In [567]:
def L_1(x, y, beta_0):
    return np.expand_dims(np.asarray((y * x - Pi(x, beta_0) * x).sum(axis = 0)), axis=1)

Notation for $L_2$:

$$L_2 = - \sum_{j=1}^{n_i}[\pi_{ij}(1-\pi_{ij})X_{ij}X_{ij}^\top]$$

In [568]:
def L_2(x, y, beta_0):
    XX = 0
    p = Pi(x, beta_0) * (1- Pi(x, beta_0))
    for i in range(x.shape[0]):
#         XX += p[i] * ((np.asarray(x.iloc[i,:]).reshape(x.shape[1],1)))**2 
        XX += p[i] * (np.asarray(x.iloc[i,:]).reshape(x.shape[1],1) \
        @ np.asarray(x.iloc[i,:]).reshape(x.shape[1],1).transpose())
    L2 = -XX
    return L2

In [588]:
beta_0 = np.repeat(0, 10).reshape(10,1)

In [589]:
print('Initial beta:', beta_0, "\n")
for i in range(100):
    print('Step ', i+1, ':\n')
    beta_0
    beta_new = beta_0 - inv(L_2(X1, y1, beta_0) + L_2(X2, y2, beta_0)) \
    @ (L_1(X1, y1, beta_0) + L_1(X2, y2, beta_0))
    delta = beta_new - beta_0
    beta_0 = beta_new
    print('Beta:\n', beta_0, '\n')
    print('Delta:\n', delta, '\n')
    if (max(np.abs(delta)) == 0):
        break;

Initial beta: [[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]] 

Step  1 :

Beta:
 [[0.10817893]
 [0.15577158]
 [0.04324669]
 [0.0087236 ]
 [0.11681485]
 [0.05317466]
 [0.07200324]
 [0.17340093]
 [0.20606506]
 [0.11675683]] 

Delta:
 [[0.10817893]
 [0.15577158]
 [0.04324669]
 [0.0087236 ]
 [0.11681485]
 [0.05317466]
 [0.07200324]
 [0.17340093]
 [0.20606506]
 [0.11675683]] 

Step  2 :

Beta:
 [[0.19600777]
 [0.28432322]
 [0.08468478]
 [0.01946106]
 [0.208629  ]
 [0.10041206]
 [0.13746322]
 [0.31760525]
 [0.37452347]
 [0.20956382]] 

Delta:
 [[0.08782883]
 [0.12855163]
 [0.04143809]
 [0.01073746]
 [0.09181415]
 [0.0472374 ]
 [0.06545998]
 [0.14420433]
 [0.16845841]
 [0.092807  ]] 

Step  3 :

Beta:
 [[0.29079494]
 [0.4264849 ]
 [0.13218002]
 [0.03452087]
 [0.30504341]
 [0.15209478]
 [0.20653688]
 [0.47079855]
 [0.55824349]
 [0.30929157]] 

Delta:
 [[0.09478717]
 [0.14216169]
 [0.04749524]
 [0.01505981]
 [0.09641441]
 [0.05168272]
 [0.06907365]
 [0.15319329]
 [0.18372002]
 [0.09972775]]

In [591]:
L_2(X1, y1, beta_0)+L_2(X2, y2, beta_0)

array([[-2017.92540591,   278.96089871,    55.13581563,    42.00608349,
           48.96279807,   210.41011784,   147.62714793,   352.95317426,
          260.13291691,    53.38371126],
       [  278.96089871, -1882.57363095,   104.82882103,   -71.34377803,
          332.49827995,   -40.34237896,   356.65541555,   265.13382507,
          523.47528537,   316.84156834],
       [   55.13581563,   104.82882103, -2018.44299488,   -17.02955605,
          -58.167005  ,    96.91290635,   -21.95912822,    81.27766099,
          252.23937134,   165.53816921],
       [   42.00608349,   -71.34377803,   -17.02955605, -1945.83029471,
           63.59659857,    38.07785122,    34.52403033,    49.45718014,
           84.64072536,    18.7808896 ],
       [   48.96279807,   332.49827995,   -58.167005  ,    63.59659857,
        -1959.36670765,    62.41459172,    57.82157479,   327.73823271,
          304.5362868 ,   118.92167227],
       [  210.41011784,   -40.34237896,    96.91290635,    38.07785122,
   

In [550]:
L_2(X1, y1, beta_0)

array([[-4140.35624389],
       [-4107.14077318],
       [-4096.38366046],
       [-4070.5526563 ],
       [-3959.86346214],
       [-4006.78389039],
       [-3930.36488249],
       [-4527.7504776 ],
       [-3984.77596394],
       [-4300.38682221]])

In [565]:
true_beta

array([[-0.0217251 ],
       [ 0.35104993],
       [-2.04337875],
       [-5.75784864],
       [ 1.19368298],
       [-5.78128915],
       [-2.31044591],
       [-2.13129101],
       [ 2.56380536],
       [ 3.40715245]])

In [587]:
inv(L_2(X1, y1, beta_0) + L_2(X2, y2, beta_0)) @ (L_1(X1, y1, beta_0) + L_1(X2, y2, beta_0))

array([[-0.10817893],
       [-0.15577158],
       [-0.04324669],
       [-0.0087236 ],
       [-0.11681485],
       [-0.05317466],
       [-0.07200324],
       [-0.17340093],
       [-0.20606506],
       [-0.11675683]])