# Gauss-Hermite Approximation

### Load packages

In [3]:
from numpy.polynomial.hermite import hermgauss
from matplotlib import pyplot as plt
from numpy.linalg import inv
import pandas as pd
import numpy as np
import scipy

### Randomized simulation data for two sites
- Set random seed
- Generate 10 true betas ranged in (-10, 10)
- Generate 2 sigmas for noise variance in different sites
- Generate data X1 and X2 with size (1000, 10)
- Generate result y1 and y2 with bernoulli distibution 

In [4]:
np.random.seed(1)
true_beta = (np.random.rand(10,1) - np.random.rand(10,1)) * 10
true_sigma = np.random.rand(2)
X1 = (np.random.rand(1000, 10) - np.random.rand(1000, 10)) * 10
p1 = 1 / (1 + np.exp(-(X1 @ true_beta + np.random.normal(0, true_sigma[0], 1000).reshape(1000, 1))))
y1 = np.random.binomial(1,p1)
X2 = (np.random.rand(1000, 10) - np.random.rand(1000, 10)) * 10
p2 = 1 / (1 + np.exp(-(X2 @ true_beta + np.random.normal(0, true_sigma[1], 1000).reshape(1000, 1))))
y2 = np.random.binomial(1,p2)

### Definitions

Notation for $\pi_{ij}$:

$$\pi_{ij} = \dfrac{\exp{(X_{ij}^\top\beta_0}+\mu_{i0})}{1 + \exp{(X_{ij}^\top\beta_0}+\mu_{i0})}$$

In [5]:
def Pi(x, beta_0, mu):
    return np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))))

Notation for $g(\mu_{i0})$:

$$g(\mu_{i0};\beta_0)=\sum_{j=1}^{n_i}\left[y_{ij}\log\pi_{ij}+(1-y_{ij})\log(1-\pi_{ij})\right]+\log\phi(\mu_{i0};\theta_0)$$

In [6]:
def g(x, y, mu, beta_0, tau=1):
    g = sum(y * np.log(Pi(x, beta_0, mu)) + (1 - y) * np.log(1 - Pi(x, beta_0, mu))) \
    + np.log((np.sqrt(2 * np.pi) * tau)**(-1) * np.exp(-mu**2/(2 * tau**2)))
    return g

Notation for $g_\beta(\mu_{i0})$:

$$\dfrac{\partial g}{\partial \beta_0}=\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})$$

In [7]:
def g_b(x, y, mu, beta_0, tau = 1):
    return np.sum(x * y - x * Pi(x, beta_0, mu), axis = 0)

Notation for $g_\mu(\mu_{i0})$:

$$\dfrac{\partial g}{\partial \mu_{i0}}=\sum_{j=1}^{n_i}(y_{ij}-\pi_{ij})-\dfrac{\mu_{i0}}{\tau_0^2}$$

In [8]:
def g_u(x, y, mu, beta_0, tau = 1):
    return sum(y - Pi(x, beta_0, mu)) - mu/tau**2

Notation for $g_{\mu\mu}(\mu_{i0})$:

$$\dfrac{\partial^2 g}{\partial \mu_{i0}^2}=-\sum_{j=1}^{n_i}\dfrac{\partial\pi_{ij}}{\partial\mu_{i0}}-\dfrac{1}{\tau_0^2}$$

In [9]:
def g_uu(x, y, mu, beta_0, tau = 1):
    return sum(- np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))**2))) - 1/tau**2

Notation for $g_{\mu\beta}(\mu_{i0})$:

$$\dfrac{\partial^2 g}{\partial \mu_{i0}\partial \beta_0}=-\sum_{j=1}^{n_i}\dfrac{\partial\pi_{ij}}{\partial\beta_0}$$

In [10]:
def g_ub(x, y, mu, beta_0, tau = 1):
    return np.sum(- x * np.asarray((np.exp(x @ beta_0 + mu) / (1 + np.exp(x @ beta_0 + mu))**2)), axis = 0)

Notation for $g_{\beta\beta}(\mu_{i0})$:

$$\dfrac{\partial^2 g}{\partial \beta_0^2}=-\sum_{j=1}^{n_i}X_{ij}\dfrac{\partial\pi_{ij}}{\partial\beta_0}$$

In [11]:
def g_bb(x, y, mu, beta_0, tau = 1):
    result = 0
    for i in range(len(y)):
        result += -np.asarray(x[i].reshape(x.shape[1],1) @ x[i].reshape(1,x.shape[1])\
        * (np.exp(x[i] @ beta_0 + mu) / (1 + np.exp(x[i] @ beta_0 + mu))**2))
    return result

Notation for $g_{\mu\beta}(\hat\mu_{i0};\beta_0)$:

$$\dfrac{\partial g_u(\hat\mu(\beta_0);\beta_0)}{\partial \beta_0}=\hat\mu_\beta(\beta_0)g_{\mu\mu}(\hat\mu_{i0})+g_{\mu\beta}(\hat\mu_{i0})=0$$

Notation for $g_{\mu\mu\mu}(\hat\mu_{i0};\beta_0)$:

$$g_{\mu\mu\mu}(\hat\mu_{i0};\beta_0) = -\sum_{j=1}^{n_i}\dfrac{\partial^2\pi_{ij}}{\partial\mu_{i0}^2}$$

In [12]:
def g_uuu(x, y, mu, beta_0, tau = 1):
    return sum(- np.asarray((np.exp(x @ beta_0 + mu) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**3)))

Notation for $g_{\mu\mu\beta}(\hat\mu_{i0};\beta_0)$:

$$g_{\mu\mu\beta}(\hat\mu_{i0};\beta_0) = -\sum_{j=1}^{n_i}\dfrac{\partial^2\pi_{ij}}{\partial\mu_{i0}\partial\beta_0}$$

In [13]:
def g_uub(x, y, mu, beta_0, tau = 1):
    return np.sum(- x * np.asarray((np.exp(x @ beta_0 + mu) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**3)), axis = 0)

Notation for $g_{\mu\beta\beta}(\hat\mu_{i0};\beta_0)$:

$$\dfrac{\partial^3 g}{\partial \mu_{i0}\partial \beta_0^2}=-\sum_{j=1}^{n_i}\dfrac{\partial^2\pi_{ij}}{\partial\beta_0^2}$$

In [14]:
def g_ubb(x, y, mu, beta_0, tau = 1):
    result = 0
    for i in range(len(y)):
        result += -np.asarray(x[i].reshape(x.shape[1],1) @ x[i].reshape(1,x.shape[1])\
                         * (np.exp(x[i] @ beta_0 + mu) * (np.exp(x[i] @ beta_0 + mu) - 1)\
                                       / (1 + np.exp(x[i] @ beta_0 + mu))**3))
    return result

Notation for $g_{\mu\mu\mu\mu}(\hat\mu_{i0};\beta_0)$:

$$g_{\mu\mu\mu\mu}(\hat\mu_{i0};\beta_0) = -\sum_{j=1}^{n_i}\dfrac{\partial^3\pi_{ij}}{\partial\mu_{i0}^3}$$

In [15]:
def g_uuuu(x, y, mu, beta_0, tau = 1):
    result = sum(- (np.exp(x @ beta_0 + mu) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**3)\
                 + (3 * np.exp(2 * (x @ beta_0 + mu)) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**4)\
                 - (np.exp(2 * (x @ beta_0 + mu))  / (1 + np.exp(x @ beta_0 + mu))**3)\
                )
    return result

Notation for $g_{\mu\mu\mu\beta}(\hat\mu_{i0};\beta_0)$:

$$g_{\mu\mu\mu\beta}(\hat\mu_{i0};\beta_0) = -\sum_{j=1}^{n_i}\dfrac{\partial^3\pi_{ij}}{\partial\mu_{i0}^2\partial\beta_0}$$

In [16]:
def g_uuub(x, y, mu, beta_0, tau = 1):
    result = np.sum(- x * np.asarray((np.exp(x @ beta_0 + mu) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**3)\
                 + (3 * np.exp(2 * (x @ beta_0 + mu)) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**4)\
                 - (np.exp(2 * (x @ beta_0 + mu))  / (1 + np.exp(x @ beta_0 + mu))**3)), axis = 0)
    return result

Notation for $g_{\mu\mu\beta\beta}(\hat\mu_{i0};\beta_0)$:

$$g_{\mu\mu\beta\beta}(\hat\mu_{i0};\beta_0) = -\sum_{j=1}^{n_i}\dfrac{\partial^3\pi_{ij}}{\partial\mu_{i0}\partial\beta^2_0}$$

In [17]:
def g_uubb(x, y, mu, beta_0, tau = 1):
    result = 0
    for i in range(len(y)):
        result += -np.asarray(x[i].reshape(x.shape[1],1) @ x[i].reshape(1,x.shape[1])\
                             * ((np.exp(x @ beta_0 + mu) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**3)\
                 + (3 * np.exp(2 * (x @ beta_0 + mu)) * (np.exp(x @ beta_0 + mu) - 1)\
                             / (1 + np.exp(x @ beta_0 + mu))**4)\
                 - (np.exp(2 * (x @ beta_0 + mu))  / (1 + np.exp(x @ beta_0 + mu))**3)))
        
    return result

Notation for $\hat\omega$:

$$\hat\omega=\sqrt{-\dfrac{1}{g_{\mu\mu}(\hat\mu_{i0})}}$$

In [18]:
def omega(x, y, mu, beta_0, tau = 1):
    return np.sqrt(-1/g_uu(x, y, mu, beta_0))

Notation for $\hat\omega_\beta$:

$$\dfrac{\partial\hat\omega}{\partial\beta}=\dfrac{1}{2}\hat\omega^3(g_{\mu\mu\mu}\hat\mu_\beta + g_{\mu\mu\beta})$$

In [19]:
def omega_b(x, y, mu, beta_0, tau = 1):
    return 0.5 * omega(x, y, mu, beta_0, tau)**3 * (g_uuu(x, y, mu, beta_0, tau) * mu_b(x, y, mu, beta_0, tau)\
                                                    + g_uub(x, y, mu, beta_0, tau))

Notation for $\hat\mu_\beta$

$$\hat\mu_\beta(\beta_0)=-\dfrac{g_{\mu\beta}(\hat\mu_{i0};\beta_0)}{g_{\mu\mu}(\hat\mu_{i0};\beta_0)}=\hat\omega^2(\beta_0)g_{\mu\beta}(\hat\mu_{i0}(\beta_0);\beta_0)$$

In [20]:
def mu_b(x, y, mu, beta_0, tau = 1):
    return omega(x, y, mu, beta_0, tau)**2 * g_ub(x, y, mu, beta_0, tau)

Notation for $\hat\mu_{\beta\beta'}$:

$$\hat\mu_{\beta\beta'}(\beta_0)=\hat\omega^2(\hat\mu_\beta\hat\mu_{\beta'} g_{\mu\mu\mu}+\hat\mu_{\beta}g_{\mu\mu\beta'}+\hat\mu_{\beta'}g_{\mu\mu\beta}+g_{\mu\beta\beta'})$$

In [21]:
def mu_bb(x, y, mu, beta_0, tau = 1):
    result = omega(x, y, mu, beta_0, tau)**2 * (mu_b(x, y, mu, beta_0, tau).reshape(x.shape[1],1)\
                                              @ mu_b(x, y, mu, beta_0, tau).reshape(1,x.shape[1])\
                                              * g_uuu(x, y, mu, beta_0, tau)\
                                              + 2 * mu_b(x, y, mu, beta_0, tau).reshape(x.shape[1],1)\
                                              @ g_uub(x, y, mu, beta_0, tau).reshape(1,x.shape[1])\
                                              + g_ubb(x, y, mu, beta_0, tau))
    return result

Notation for $\hat\omega_{\beta\beta'}$:

$$\dfrac{\partial^2}{\partial\beta\partial\beta'}\hat\omega(\beta_0)=\frac{3}{4}\hat\omega^5(\hat\mu_{\beta'}g_{\mu\mu\mu}+g_{\mu\mu\beta'})(\hat\mu_{\beta}g_{\mu\mu\mu}+g_{\mu\mu\beta})+\dfrac{1}{2}\hat\omega^3(\hat\mu_{\beta\beta'}g_{\mu\mu\mu}+\hat\mu_\beta\hat\mu_{\beta'}g_{\mu\mu\mu\mu}+\hat\mu_{\beta}g_{\mu\mu\mu\beta'}+\hat\mu_{\beta\beta'}g_{\mu\mu\mu\beta}+g_{\mu\mu\beta\beta'})$$

In [22]:
def omega_bb(x, y, mu, beta_0, tau = 1):
    result = 3/4 * omega(x, y, mu, beta_0, tau)**5 * (mu_b(x, y, mu, beta_0, tau) * g_uuu(x, y, mu, beta_0, tau)\
                                                      + g_uub(x, y, mu, beta_0, tau)).reshape(x.shape[1],1)\
     @ (mu_b(x, y, mu, beta_0, tau) * g_uuu(x, y, mu, beta_0, tau)\
        + g_uub(x, y, mu, beta_0, tau)).reshape(1,x.shape[1]) + 1/2 * omega(x, y, mu, beta_0, tau)**3\
     * (mu_bb(x, y, mu, beta_0, tau) * g_uuu(x, y, mu, beta_0, tau)\
        + (mu_b(x, y, mu, beta_0, tau).reshape(x.shape[1],1)\
           @ mu_b(x, y, mu, beta_0, tau).reshape(1,x.shape[1]) * g_uuuu(x, y, mu, beta_0, tau))\
        + mu_b(x, y, mu, beta_0, tau).reshape(x.shape[1],1) @ g_uuub(x, y, mu, beta_0, tau).reshape(1,x.shape[1])\
        + mu_bb(x, y, mu, beta_0, tau)
       )
    return result

Notation for Hermite polynomial function $f_k(\hat\mu_{i0}, \hat\omega;\beta_0)$:

$$h_k\exp\{g(\hat\mu_{i0}+\sqrt{2\pi}\hat\omega x_k;\beta_0)+x_k^2\}$$

In [23]:
def f_k(k, x, y, mu, beta_0, tau = 1):
    [x_k, h_k] = hermgauss(k)
    return h_k * np.exp(g(x, y, mu + np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0, tau) * x_k, beta_0) + x_k**2)

Notation for $\mathcal l_i$:

$$\mathcal l_i=\dfrac{1}{2}\log{(2\pi)}+\log(\hat\omega)+\log(\sum_{k=1}^lh_k\exp\left\{g(\hat\mu_{i0}+\sqrt{2\pi}\hat\omega x_k;\beta_0)+x_k^2\right\})$$

In [24]:
def l(k, x, y, mu, beta_0, tau = 1):
    return 0.5 * np.log(2 * np.pi) + np.log(omega(x, y, mu, beta_0, tau)) + np.log(sum(f_k(k, x, y, mu, beta_0, tau)))

Notation for $f_{k_\mu}$:

$$f_{k_\mu}(\hat\mu_{i0},\hat\omega;\beta_0)=f_k(\hat\mu_{i0}, \hat\omega;\beta_0)g_\mu(\mu_{i0})$$

In [25]:
def f_k_u(k, x, y, mu, beta_0, tau = 1):
    return f_k(k, x, y, mu, beta_0, tau) * g_u(x, y, mu, beta_0, tau)

Notation for $f_{k_\omega}$:

$$f_{k_\omega}(\hat\mu_{i0},\hat\omega;\beta_0)=f_k(\hat\mu_{i0}, \hat\omega;\beta_0)g_\mu(\mu_{i0})\sqrt{2\pi}x_k$$

In [None]:
def f_k_u(k, x, y, mu, beta_0, tau = 1):
    [x_k, h_k] = hermgauss(k)
    return f_k(k, x, y, mu, beta_0, tau) * g_u(x, y, mu, beta_0, tau) * np.sqrt(2 * np.pi) * x_k

Notation for $\mathcal l_i'$:

$$\mathcal l_i'=\dfrac{\partial\mathcal l_i}{\partial\beta_0}=\dfrac{\hat\omega_\beta}{\hat\omega}+\dfrac{1}{\sum_{k=1}^lf_k}\sum_{k=1}^l(f_{k_\mu}\hat\mu_\beta+f_{k_\omega}\hat\omega_\beta+f_{k_\beta})$$

In [2]:
def l_1(k, x, y, mu, beta_0, tau = 1):
    return 0.5 * np.log(2 * np.pi) + np.log(omega(x, y, mu, beta_0, tau)) + np.log(sum(f_k(k, x, y, mu, beta_0, tau)))

## STEP 1: Maximize $g(\mu_{i0})$

In [18]:
def max_mu(x, y, mu, beta_0, tau=1, max_iter=100):
    for step in range(max_iter):
#         print('Step: ', step, '\n')
        mu_new = mu - g_u(x, y, mu, beta_0, tau)/g_uu(x, y, mu, beta_0, tau)
        diff = mu_new - mu
#         print(diff)
        if np.abs(diff) < 10**(-10):
#             print(mu)
            break;
        mu = mu_new
    return mu

## STEP 2: Maximization preparation of $\beta_0$ in LOCAL

Notation for $\dfrac{\partial\mathcal L_i}{\partial\beta_0}$

$$\dfrac{\partial\mathcal L_i}{\partial\beta_0}=\sqrt{2\pi}\hat\omega\sum_{k=1}^l\left\{f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\right\}$$

In [19]:
def L_1(k, x, y, mu, beta_0, tau = 1):
    L_1 = 0
    [x_k, h_k] = hermgauss(k)    
    for i in range(k):
        L_1 += np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0) *\
        f_k(k, x, y, mu, beta_0, tau = tau)[i] *\
        np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) *\
                              omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0)
    return L_1

Notation for $\dfrac{\partial^2\mathcal L_i}{\partial\beta_0^2}$:

$$\dfrac{\partial^2\mathcal L_i}{\partial\beta_0^2}=\sqrt{2\pi}\hat\omega\sum_{k=1}^l\left\{f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\left[\sum_{j=1}^{n_i}(X_{ij}y_{ij}-X_{ij}\pi_{ij})\right]^\top+f_k(\hat\mu_{i0};\beta_0)\sum_{j=1}^{n_i}\left(-X_{ij}\dfrac{\partial\pi_{ij}}{\partial\beta_0}\right)\right\}$$

In [21]:
def L_2(k, x, y, mu, beta_0, tau = 1):
    L2 = 0
    [x_k, h_k] = hermgauss(k) 
    for i in range(k):
        boogie = 0
        for j in range(x.shape[0]):
            boogie += -x[j].reshape(x[j].shape[0],1) @\
            Pi_1(x[j], beta_0, mu + np.sqrt(2 * np.pi) *\
                 omega(x, y, mu, beta_0, tau) * x_k[i]).reshape(1,x[j].shape[0])
        L2 += np.sqrt(2 * np.pi) * omega(x, y, mu, beta_0) *\
        (f_k(k, x, y, mu, beta_0, tau = tau)[i] *\
         np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) *\
                               omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0).reshape(x.shape[1],1) @\
         np.sum(x * y - x * Pi(x, beta_0, mu + np.sqrt(2 * np.pi) *\
                               omega(x, y, mu, beta_0, tau) * x_k[i]), axis=0).reshape(1,x.shape[1]) +\
        f_k(k, x, y, mu, beta_0, tau = tau)[i] * boogie)
    return L2

## STEP 3: Maximization of $\beta_0$ in GLOBAL

In [23]:
def LL_1(site_num, k, X, Y, mu, beta_0, tau = 1):
    LL1 = 0
    for i in range(site_num):
        LL1 += L_1(k, X[i], Y[i], mu[i], beta_0, tau) / L(k, X[i], Y[i], mu[i], beta_0, tau)
    return LL1

In [24]:
def LL_2(site_num, k, X, Y, mu, beta_0, tau = 1):
    LL2 = 0
    for i in range(site_num):
        LL2 += L_2(k, X[i], Y[i], mu[i], beta_0, tau) / L(k, X[i], Y[i], mu[i], beta_0, tau)
    LL1 = LL_1(site_num, k, X, Y, mu, beta_0, tau)
    return LL2 - LL1.reshape(LL1.shape[0],1) @ LL1.reshape(1,LL1.shape[0])

In [25]:
def update(site_num, k, X, Y, mu, beta_0, tau = 1):
    direction = LL_1(site_num, k, X, Y, mu, beta_0, tau).reshape(1,beta_0.shape[0]) @\
    inv(LL_2(site_num, k, X, Y, mu, beta_0, tau))
    return direction

# Simulation starts below

### Main function

In [26]:
beta_0 = np.repeat(0, 10).reshape(10,1)
# betea_0 = true_beta
mu = [0.1, 0.1]
X = [X1, X2]
Y = [y1, y2]
k = 2
tau = 1
print('Initial beta:', beta_0, "\n")
for step in range(10):
    print('Step ', step+1, ':\n')
    for i in range(len(mu)): 
        mu[i] = max_mu(X[i], Y[i], mu[i], beta_0)
    for substep in range(100):
        print('Mu:\n', mu, '\n')
        new_beta = beta_0 - update(len(mu), k, X, Y, mu, beta_0).reshape(beta_0.shape[0],1)
        delta = new_beta - beta_0
        beta_0 = new_beta
        print('Beta:\n', beta_0, '\n')
        print('Delta:\n', delta, '\n')

Initial beta: [[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]] 

Step  1 :

Mu:
 [0.055791301945490764, -0.07174378216208464] 

Beta:
 [[ 2.13692388e-06]
 [ 1.78504257e-05]
 [-1.44562508e-04]
 [-3.49877910e-04]
 [ 7.35318600e-05]
 [-3.53663855e-04]
 [-1.28625471e-04]
 [-1.17509605e-04]
 [ 1.52233736e-04]
 [ 2.09579367e-04]] 

Delta:
 [[ 2.13692388e-06]
 [ 1.78504257e-05]
 [-1.44562508e-04]
 [-3.49877910e-04]
 [ 7.35318600e-05]
 [-3.53663855e-04]
 [-1.28625471e-04]
 [-1.17509605e-04]
 [ 1.52233736e-04]
 [ 2.09579367e-04]] 

Mu:
 [0.055791301945490764, -0.07174378216208464] 

Beta:
 [[ 4.28487038e-06]
 [ 3.57336053e-05]
 [-2.89352619e-04]
 [-7.00303047e-04]
 [ 1.47180428e-04]
 [-7.07879979e-04]
 [-2.57453243e-04]
 [-2.35200197e-04]
 [ 3.04707418e-04]
 [ 4.19482350e-04]] 

Delta:
 [[ 2.14794650e-06]
 [ 1.78831795e-05]
 [-1.44790111e-04]
 [-3.50425137e-04]
 [ 7.36485679e-05]
 [-3.54216125e-04]
 [-1.28827772e-04]
 [-1.17690592e-04]
 [ 1.52473682e-04]
 [ 2.09902982e-04]] 

Mu:
 [0.05579130

KeyboardInterrupt: 

# Generated the data into files for R

In [981]:
np.savetxt('X1.csv', X1, delimiter=",")
np.savetxt('y1.csv', y1, delimiter=",")
np.savetxt('X2.csv', X2, delimiter=",")
np.savetxt('y2.csv', y2, delimiter=",")

# The followings are NOT true

## Step 1: Combine data to generate golden rules

In [564]:
col = []
for i in range(10): col += ['V'+str(i+1)]
X1 = pd.DataFrame(X1, columns=col)
X2 = pd.DataFrame(X2, columns=col)
# X = pd.concat([X1, X2], ignore_index = True)
y = np.concatenate((y1, y2))

Notation for $\pi_{ij}$:

$$\pi_{ij} = \dfrac{\exp{(X_{ij}^\top\beta_0})}{1 + \exp{(X_{ij}^\top\beta_0})}$$

In [566]:
def Pi(x, beta_0):
    return np.asarray((np.exp(x @ beta_0) / (1 + np.exp(x @ beta_0)))) #need to add /mu_{i0}?

Notation for $L_1$:

$$L_1 = \sum_{j=1}^{n_i}(y_{ij}X_{ij}-\pi_{ij}X_{ij})$$

In [567]:
def L_1(x, y, beta_0):
    return np.expand_dims(np.asarray((y * x - Pi(x, beta_0) * x).sum(axis = 0)), axis=1)

Notation for $L_2$:

$$L_2 = - \sum_{j=1}^{n_i}[\pi_{ij}(1-\pi_{ij})X_{ij}X_{ij}^\top]$$

In [568]:
def L_2(x, y, beta_0):
    XX = 0
    p = Pi(x, beta_0) * (1- Pi(x, beta_0))
    for i in range(x.shape[0]):
#         XX += p[i] * ((np.asarray(x.iloc[i,:]).reshape(x.shape[1],1)))**2 
        XX += p[i] * (np.asarray(x.iloc[i,:]).reshape(x.shape[1],1) \
        @ np.asarray(x.iloc[i,:]).reshape(x.shape[1],1).transpose())
    L2 = -XX
    return L2

In [588]:
beta_0 = np.repeat(0, 10).reshape(10,1)

In [1111]:
print('Initial beta:', beta_0, "\n")
for i in range(100):
    print('Step ', i+1, ':\n')
    beta_0
    beta_new = beta_0 - inv(L_2(X1, y1, beta_0) + L_2(X2, y2, beta_0)) \
    @ (L_1(X1, y1, beta_0) + L_1(X2, y2, beta_0))
    delta = beta_new - beta_0
    beta_0 = beta_new
    print('Beta:\n', beta_0, '\n')
    print('Delta:\n', delta, '\n')
    if (max(np.abs(delta)) == 0):
        break;

Initial beta: [[0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]
 [0]] 

Step  1 :



TypeError: L_2() missing 2 required positional arguments: 'mu' and 'beta_0'

In [591]:
L_2(X1, y1, beta_0)+L_2(X2, y2, beta_0)

array([[-2017.92540591,   278.96089871,    55.13581563,    42.00608349,
           48.96279807,   210.41011784,   147.62714793,   352.95317426,
          260.13291691,    53.38371126],
       [  278.96089871, -1882.57363095,   104.82882103,   -71.34377803,
          332.49827995,   -40.34237896,   356.65541555,   265.13382507,
          523.47528537,   316.84156834],
       [   55.13581563,   104.82882103, -2018.44299488,   -17.02955605,
          -58.167005  ,    96.91290635,   -21.95912822,    81.27766099,
          252.23937134,   165.53816921],
       [   42.00608349,   -71.34377803,   -17.02955605, -1945.83029471,
           63.59659857,    38.07785122,    34.52403033,    49.45718014,
           84.64072536,    18.7808896 ],
       [   48.96279807,   332.49827995,   -58.167005  ,    63.59659857,
        -1959.36670765,    62.41459172,    57.82157479,   327.73823271,
          304.5362868 ,   118.92167227],
       [  210.41011784,   -40.34237896,    96.91290635,    38.07785122,
   

In [550]:
L_2(X1, y1, beta_0)

array([[-4140.35624389],
       [-4107.14077318],
       [-4096.38366046],
       [-4070.5526563 ],
       [-3959.86346214],
       [-4006.78389039],
       [-3930.36488249],
       [-4527.7504776 ],
       [-3984.77596394],
       [-4300.38682221]])

In [565]:
true_beta

array([[-0.0217251 ],
       [ 0.35104993],
       [-2.04337875],
       [-5.75784864],
       [ 1.19368298],
       [-5.78128915],
       [-2.31044591],
       [-2.13129101],
       [ 2.56380536],
       [ 3.40715245]])

In [587]:
inv(L_2(X1, y1, beta_0) + L_2(X2, y2, beta_0)) @ (L_1(X1, y1, beta_0) + L_1(X2, y2, beta_0))

array([[-0.10817893],
       [-0.15577158],
       [-0.04324669],
       [-0.0087236 ],
       [-0.11681485],
       [-0.05317466],
       [-0.07200324],
       [-0.17340093],
       [-0.20606506],
       [-0.11675683]])