# ДАТАСЕТ

**Pima Indians Diabetes Dataset**

In [None]:
import pandas as pd
import numpy as np
from scipy.special import expit
from sklearn.metrics import roc_auc_score

# Загрузка данных
data = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv", header=None)
data_matrix = data.values

X = data_matrix[1:501, :8]
y = data_matrix[1:501, 8]
y = np.squeeze(y)

n = 500
r = 3

d = X.shape[1]
iterations = 1000

# Фиксированные параметры
mu_0 = 0.0
mu_w_0 = 2.0
lambda_w_0 = 3.0
gamma_0 = 1.0
alpha_lambda = 1.0
beta_lambda = 1.0

# Априорные параметры
lambda_w = np.random.gamma(alpha_lambda, 1/beta_lambda)
mu_w = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_w)))
lambda_v = np.random.gamma(alpha_lambda, 1/beta_lambda, r)
mu_v = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_v)), r)

# Вариационные параметры
mu_w0_new = 0.0
sigma_w0_new = 0.1
mu_w_new = np.zeros(d)
sigma_w_new = np.ones(d)
mu_v_new = np.zeros((d, r))
sigma_v_new = np.ones((d, r))

m_mu_w0, v_mu_w0 = 0.0, 0.0
m_sigma_w0, v_sigma_w0 = 0.0, 0.0
m_mu_w, v_mu_w = np.zeros(d), np.zeros(d)
m_sigma_w, v_sigma_w = np.zeros(d), np.zeros(d)
m_mu_v, v_mu_v = np.zeros((d, r)), np.zeros((d, r))
m_sigma_v, v_sigma_v = np.zeros((d, r)), np.zeros((d, r))

mu_w0_all = []
mu_w_all = []
mu_v_all = []

alpha_k = np.ones(d)

def adam_update(param, grad, m, v, t, alpha=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m_new = beta1 * m + (1 - beta1) * grad
    v_new = beta2 * v + (1 - beta2) * (grad ** 2)
    m_hat = m_new / (1 - beta1 ** t)
    v_hat = v_new / (1 - beta2 ** t)
    param +=  alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    return param, m_new, v_new

for t in range(1, iterations + 1):
    epsilon_w0 = np.random.normal(0, 1)
    epsilon_w = np.random.normal(0, 1, d)
    epsilon_v = np.random.normal(0, 1, (d, r))

    Theta_0 = mu_w0_new + epsilon_w0 * sigma_w0_new
    Theta_w = mu_w_new + epsilon_w * sigma_w_new
    Theta_v = mu_v_new + epsilon_v * sigma_v_new

    grad_mu_w0 = np.sum(y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) \
                      - d / np.sum(alpha_k) * np.sum(alpha_k * (mu_w0_new - mu_w_0) / (1/lambda_w_0))

    grad_sigma_w0 = np.sum((y - expit(Theta_0 + X @ Theta_w +
                      0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_w0) \
                        - d / np.sum(alpha_k) * np.sum(alpha_k * (sigma_w0_new / (1/lambda_w_0) - 1 / sigma_w0_new))

    grad_mu_w = np.zeros(d)
    grad_sigma_w = np.zeros(d)
    grad_mu_v = np.zeros((d, r))
    grad_sigma_v = np.zeros((d, r))

    for k in range(d):
          grad_mu_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                  0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * X[:, k]) \
                  - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_w_new[k] - mu_w) / (1/lambda_w))

          grad_sigma_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1)))* X[:, k]* epsilon_w[k])\
                    - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_w_new[k] / (1/lambda_w) - 1 / sigma_w_new[k]))

    for f in range(r):
          for k in range(d):
              grad_mu_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) *
                        (X[:, k] * (X @ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_v_new[k, f] - mu_v[f]) / (1/lambda_v[f]))

              grad_sigma_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_v[k, f] *
                          (X[:, k] * (X@ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_v_new[k, f] / (1/lambda_v[f]) - 1 / sigma_v_new[k, f]))

    mu_w0_new, m_mu_w0, v_mu_w0 = adam_update(mu_w0_new, grad_mu_w0, m_mu_w0, v_mu_w0, t)
    sigma_w0_new, m_sigma_w0, v_sigma_w0 = adam_update(sigma_w0_new, grad_sigma_w0, m_sigma_w0, v_sigma_w0, t)
    mu_w_new, m_mu_w, v_mu_w = adam_update(mu_w_new, grad_mu_w, m_mu_w, v_mu_w, t)
    sigma_w_new, m_sigma_w, v_sigma_w = adam_update(sigma_w_new, grad_sigma_w, m_sigma_w, v_sigma_w, t)
    mu_v_new, m_mu_v, v_mu_v = adam_update(mu_v_new, grad_mu_v, m_mu_v, v_mu_v, t)
    sigma_v_new, m_sigma_v, v_sigma_v = adam_update(sigma_v_new, grad_sigma_v, m_sigma_v, v_sigma_v, t)

    mu_w0_all.append(mu_w0_new)
    mu_w_all.append(mu_w_new.copy())
    mu_v_all.append(mu_v_new.copy())

mu_w0_avg = np.mean(mu_w0_all, axis=0)
mu_w_avg = np.mean(mu_w_all, axis=0)
mu_v_avg = np.mean(mu_v_all, axis=0)

X1 = data_matrix[501:769, :8]
y1 = data_matrix[501:769, 8]

double_sum = 0.5 * (np.sum((X1 @ mu_v_avg )**2, axis=1) - (X1**2) @ np.sum(mu_v_avg**2, axis=1))
prob = expit(mu_w0_avg + X1 @ np.array(mu_w_avg)  + double_sum)

# AUC-ROC
auc_roc = roc_auc_score(y1, prob)
print("AUC-ROC:", auc_roc)


AUC-ROC: 0.43983682384684575


In [None]:
import pandas as pd
import numpy as np
from scipy.special import expit
from sklearn.metrics import roc_auc_score

# Загрузка данных
data = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv", header=None)
data_matrix = data.values

X = data_matrix[1:501, :8]
y = data_matrix[1:501, 8]
y = np.squeeze(y)

n = 500
r = 3

d = X.shape[1]
iterations = 1000

# Фиксированные параметры
mu_0 = 0.0
mu_w_0 = 2.0
lambda_w_0 = 3.0
gamma_0 = 1.0
alpha_lambda = 1.0
beta_lambda = 1.0

# Априорные параметры
lambda_w = np.random.gamma(alpha_lambda, 1/beta_lambda)
mu_w = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_w)))
lambda_v = np.random.gamma(alpha_lambda, 1/beta_lambda, r)
mu_v = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_v)), r)

# Вариационные параметры
mu_w0_new = 0.0
sigma_w0_new = 0.1
mu_w_new = np.zeros(d)
sigma_w_new = np.ones(d)
mu_v_new = np.zeros((d, r))
sigma_v_new = np.ones((d, r))

m_mu_w0, v_mu_w0 = 0.0, 0.0
m_sigma_w0, v_sigma_w0 = 0.0, 0.0
m_mu_w, v_mu_w = np.zeros(d), np.zeros(d)
m_sigma_w, v_sigma_w = np.zeros(d), np.zeros(d)
m_mu_v, v_mu_v = np.zeros((d, r)), np.zeros((d, r))
m_sigma_v, v_sigma_v = np.zeros((d, r)), np.zeros((d, r))

mu_w0_all = []
mu_w_all = []
mu_v_all = []

alpha_k = np.ones(d)

def adam_update(param, grad, m, v, t, alpha=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m_new = beta1 * m + (1 - beta1) * grad
    v_new = beta2 * v + (1 - beta2) * (grad ** 2)
    m_hat = m_new / (1 - beta1 ** t)
    v_hat = v_new / (1 - beta2 ** t)
    param +=  alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    return param, m_new, v_new

for t in range(1, iterations + 1):
    epsilon_w0 = np.random.normal(0, 1)
    epsilon_w = np.random.normal(0, 1, d)
    epsilon_v = np.random.normal(0, 1, (d, r))

    Theta_0 = mu_w0_new + epsilon_w0 * sigma_w0_new
    Theta_w = mu_w_new + epsilon_w * sigma_w_new
    Theta_v = mu_v_new + epsilon_v * sigma_v_new

    grad_mu_w0 = np.sum(y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) \
                      - d / np.sum(alpha_k) * np.sum(alpha_k * (mu_w0_new - mu_w_0) / (1/lambda_w_0))

    grad_sigma_w0 = np.sum((y - expit(Theta_0 + X @ Theta_w +
                      0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_w0) \
                        - d / np.sum(alpha_k) * np.sum(alpha_k * (sigma_w0_new / (1/lambda_w_0) - 1 / sigma_w0_new))

    grad_mu_w = np.zeros(d)
    grad_sigma_w = np.zeros(d)
    grad_mu_v = np.zeros((d, r))
    grad_sigma_v = np.zeros((d, r))

    for k in range(d):
          grad_mu_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                  0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * X[:, k]) \
                  - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_w_new[k] - mu_w) / (1/lambda_w))

          grad_sigma_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1)))* X[:, k]* epsilon_w[k])\
                    - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_w_new[k] / (1/lambda_w) - 1 / sigma_w_new[k]))

    for f in range(r):
          for k in range(d):
              grad_mu_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) *
                        (X[:, k] * (X @ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_v_new[k, f] - mu_v[f]) / (1/lambda_v[f]))

              grad_sigma_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_v[k, f] *
                          (X[:, k] * (X@ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_v_new[k, f] / (1/lambda_v[f]) - 1 / sigma_v_new[k, f]))

    mu_w0_new, m_mu_w0, v_mu_w0 = adam_update(mu_w0_new, grad_mu_w0, m_mu_w0, v_mu_w0, t)
    sigma_w0_new, m_sigma_w0, v_sigma_w0 = adam_update(sigma_w0_new, grad_sigma_w0, m_sigma_w0, v_sigma_w0, t)
    mu_w_new, m_mu_w, v_mu_w = adam_update(mu_w_new, grad_mu_w, m_mu_w, v_mu_w, t)
    sigma_w_new, m_sigma_w, v_sigma_w = adam_update(sigma_w_new, grad_sigma_w, m_sigma_w, v_sigma_w, t)
    mu_v_new, m_mu_v, v_mu_v = adam_update(mu_v_new, grad_mu_v, m_mu_v, v_mu_v, t)
    sigma_v_new, m_sigma_v, v_sigma_v = adam_update(sigma_v_new, grad_sigma_v, m_sigma_v, v_sigma_v, t)

    mu_w0_all.append(mu_w0_new)
    mu_w_all.append(mu_w_new.copy())
    mu_v_all.append(mu_v_new.copy())

mu_w0_avg = np.mean(mu_w0_all, axis=0)
mu_w_avg = np.mean(mu_w_all, axis=0)
mu_v_avg = np.mean(mu_v_all, axis=0)

X1 = data_matrix[501:769, :8]
y1 = data_matrix[501:769, 8]

double_sum = 0.5 * (np.sum((X1 @ mu_v_avg )**2, axis=1) - (X1**2) @ np.sum(mu_v_avg**2, axis=1))
prob = expit(mu_w0_avg + X1 @ np.array(mu_w_avg)  + double_sum)

# AUC-ROC
auc_roc = roc_auc_score(y1, prob)
print("AUC-ROC:", auc_roc)


**Sonar Dataset**

In [None]:
import numpy as np
from scipy.special import expit
import pandas as pd
from sklearn.metrics import roc_auc_score

data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data", header=None)
ind = np.random.permutation(207)
data = data.iloc[ind]

n = 120
r = 2

data_new = data.iloc[:121, :]
X = data_new .iloc[1:, :60].values
y = (data_new .iloc[1:, 60] == 'R').astype(np.float64).values

d = X.shape[1]
iterations = 1000

# Фиксированные параметры
mu_0 = 0.0
mu_w_0 = 2.0
lambda_w_0 = 3.0
gamma_0 = 1.0
alpha_lambda = 1.0
beta_lambda = 1.0

# Априорные параметры
lambda_w = np.random.gamma(alpha_lambda, 1/beta_lambda)
mu_w = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_w)))
lambda_v = np.random.gamma(alpha_lambda, 1/beta_lambda, r)
mu_v = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_v)), r)

# Вариационные параметры
mu_w0_new = 0.0
sigma_w0_new = 0.1
mu_w_new = np.zeros(d)
sigma_w_new = np.ones(d)
mu_v_new = np.zeros((d, r))
sigma_v_new = np.ones((d, r))

m_mu_w0, v_mu_w0 = 0.0, 0.0
m_sigma_w0, v_sigma_w0 = 0.0, 0.0
m_mu_w, v_mu_w = np.zeros(d), np.zeros(d)
m_sigma_w, v_sigma_w = np.zeros(d), np.zeros(d)
m_mu_v, v_mu_v = np.zeros((d, r)), np.zeros((d, r))
m_sigma_v, v_sigma_v = np.zeros((d, r)), np.zeros((d, r))

mu_w0_all = []
mu_w_all = []
mu_v_all = []

alpha_k = np.ones(d)

def adam_update(param, grad, m, v, t, alpha=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m_new = beta1 * m + (1 - beta1) * grad
    v_new = beta2 * v + (1 - beta2) * (grad ** 2)
    m_hat = m_new / (1 - beta1 ** t)
    v_hat = v_new / (1 - beta2 ** t)
    param +=  alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    return param, m_new, v_new

for t in range(1, iterations + 1):
    epsilon_w0 = np.random.normal(0, 1)
    epsilon_w = np.random.normal(0, 1, d)
    epsilon_v = np.random.normal(0, 1, (d, r))

    Theta_0 = mu_w0_new + epsilon_w0 * sigma_w0_new
    Theta_w = mu_w_new + epsilon_w * sigma_w_new
    Theta_v = mu_v_new + epsilon_v * sigma_v_new

    grad_mu_w0 = np.sum(y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) \
                      - d / np.sum(alpha_k) * np.sum(alpha_k * (mu_w0_new - mu_w_0) / (1/lambda_w_0))

    grad_sigma_w0 = np.sum((y - expit(Theta_0 + X @ Theta_w +
                      0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_w0) \
                        - d / np.sum(alpha_k) * np.sum(alpha_k * (sigma_w0_new / (1/lambda_w_0) - 1 / sigma_w0_new))

    grad_mu_w = np.zeros(d)
    grad_sigma_w = np.zeros(d)
    grad_mu_v = np.zeros((d, r))
    grad_sigma_v = np.zeros((d, r))

    for k in range(d):
          grad_mu_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                  0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * X[:, k]) \
                  - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_w_new[k] - mu_w) / (1/lambda_w))

          grad_sigma_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1)))* X[:, k]* epsilon_w[k])\
                    - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_w_new[k] / (1/lambda_w) - 1 / sigma_w_new[k]))


    for f in range(r):
          for k in range(d):
              grad_mu_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) *
                        (X[:, k] * (X @ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_v_new[k, f] - mu_v[f]) / (1/lambda_v[f]))

              grad_sigma_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_v[k, f] *
                          (X[:, k] * (X@ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_v_new[k, f] / (1/lambda_v[f]) - 1 / sigma_v_new[k, f]))

    mu_w0_new, m_mu_w0, v_mu_w0 = adam_update(mu_w0_new, grad_mu_w0, m_mu_w0, v_mu_w0, t)
    sigma_w0_new, m_sigma_w0, v_sigma_w0 = adam_update(sigma_w0_new, grad_sigma_w0, m_sigma_w0, v_sigma_w0, t)
    mu_w_new, m_mu_w, v_mu_w = adam_update(mu_w_new, grad_mu_w, m_mu_w, v_mu_w, t)
    sigma_w_new, m_sigma_w, v_sigma_w = adam_update(sigma_w_new, grad_sigma_w, m_sigma_w, v_sigma_w, t)
    mu_v_new, m_mu_v, v_mu_v = adam_update(mu_v_new, grad_mu_v, m_mu_v, v_mu_v, t)
    sigma_v_new, m_sigma_v, v_sigma_v = adam_update(sigma_v_new, grad_sigma_v, m_sigma_v, v_sigma_v, t)

    mu_w0_all.append(mu_w0_new)
    mu_w_all.append(mu_w_new.copy())
    mu_v_all.append(mu_v_new.copy())

mu_w0_avg = np.mean(mu_w0_all, axis=0)
mu_w_avg = np.mean(mu_w_all, axis=0)
mu_v_avg = np.mean(mu_v_all, axis=0)

X1 = data.iloc[121:208, :60].values.astype(np.float64)
y1 = (data.iloc[121:208, 60] == 'R').astype(np.float64).values

double_sum = 0.5 * (np.sum((X1 @ mu_v_avg )**2, axis=1) - (X1**2) @ np.sum(mu_v_avg**2, axis=1))

prob = expit(mu_w0_avg + X1 @ np.array(mu_w_avg)  + double_sum)

# AUC-ROC
auc_roc = roc_auc_score(y1, prob)
print("AUC-ROC:", auc_roc)


AUC-ROC: 0.8645833333333333


**Australian Credit Approval DataSet**

In [None]:
import numpy as np
from scipy.special import expit
import pandas as pd
from sklearn.metrics import roc_auc_score

data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/statlog/australian/australian.dat"
data = pd.read_csv(data_url, sep=' ', header=None, na_values='?', skipinitialspace=True)
data = data.dropna().values

n = 500
y = data[1:501, 14]
X = data[1:501, :14]

r = 3
d = X.shape[1]
iterations = 1000

# Фиксированные параметры
mu_0 = 0.0
mu_w_0 = 2.0
lambda_w_0 = 3.0
gamma_0 = 1.0
alpha_lambda = 1.0
beta_lambda = 1.0

# Априорные параметры
lambda_w = np.random.gamma(alpha_lambda, 1/beta_lambda)
mu_w = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_w)))
lambda_v = np.random.gamma(alpha_lambda, 1/beta_lambda, r)
mu_v = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_v)), r)

# Вариационные параметры
mu_w0_new = 0.0
sigma_w0_new = 0.1
mu_w_new = np.zeros(d)
sigma_w_new = np.ones(d)
mu_v_new = np.zeros((d, r))
sigma_v_new = np.ones((d, r))

m_mu_w0, v_mu_w0 = 0.0, 0.0
m_sigma_w0, v_sigma_w0 = 0.0, 0.0
m_mu_w, v_mu_w = np.zeros(d), np.zeros(d)
m_sigma_w, v_sigma_w = np.zeros(d), np.zeros(d)
m_mu_v, v_mu_v = np.zeros((d, r)), np.zeros((d, r))
m_sigma_v, v_sigma_v = np.zeros((d, r)), np.zeros((d, r))

mu_w0_all = []
mu_w_all = []
mu_v_all = []

alpha_k = np.ones(d)

def adam_update(param, grad, m, v, t, alpha=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m_new = beta1 * m + (1 - beta1) * grad
    v_new = beta2 * v + (1 - beta2) * (grad ** 2)
    m_hat = m_new / (1 - beta1 ** t)
    v_hat = v_new / (1 - beta2 ** t)
    param +=  alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    return param, m_new, v_new

for t in range(1, iterations + 1):
    epsilon_w0 = np.random.normal(0, 1)
    epsilon_w = np.random.normal(0, 1, d)
    epsilon_v = np.random.normal(0, 1, (d, r))

    Theta_0 = mu_w0_new + epsilon_w0 * sigma_w0_new
    Theta_w = mu_w_new + epsilon_w * sigma_w_new
    Theta_v = mu_v_new + epsilon_v * sigma_v_new

    grad_mu_w0 = np.sum(y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) \
                      - d / np.sum(alpha_k) * np.sum(alpha_k * (mu_w0_new - mu_w_0) / (1/lambda_w_0))

    grad_sigma_w0 = np.sum((y - expit(Theta_0 + X @ Theta_w +
                      0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_w0) \
                        - d / np.sum(alpha_k) * np.sum(alpha_k * (sigma_w0_new / (1/lambda_w_0) - 1 / sigma_w0_new))

    grad_mu_w = np.zeros(d)
    grad_sigma_w = np.zeros(d)
    grad_mu_v = np.zeros((d, r))
    grad_sigma_v = np.zeros((d, r))

    for k in range(d):
          grad_mu_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                  0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * X[:, k]) \
                  - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_w_new[k] - mu_w) / (1/lambda_w))

          grad_sigma_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1)))* X[:, k]* epsilon_w[k])\
                    - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_w_new[k] / (1/lambda_w) - 1 / sigma_w_new[k]))


    for f in range(r):
          for k in range(d):
              grad_mu_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) *
                        (X[:, k] * (X @ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_v_new[k, f] - mu_v[f]) / (1/lambda_v[f]))

              grad_sigma_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_v[k, f] *
                          (X[:, k] * (X@ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_v_new[k, f] / (1/lambda_v[f]) - 1 / sigma_v_new[k, f]))

    mu_w0_new, m_mu_w0, v_mu_w0 = adam_update(mu_w0_new, grad_mu_w0, m_mu_w0, v_mu_w0, t)
    sigma_w0_new, m_sigma_w0, v_sigma_w0 = adam_update(sigma_w0_new, grad_sigma_w0, m_sigma_w0, v_sigma_w0, t)
    mu_w_new, m_mu_w, v_mu_w = adam_update(mu_w_new, grad_mu_w, m_mu_w, v_mu_w, t)
    sigma_w_new, m_sigma_w, v_sigma_w = adam_update(sigma_w_new, grad_sigma_w, m_sigma_w, v_sigma_w, t)
    mu_v_new, m_mu_v, v_mu_v = adam_update(mu_v_new, grad_mu_v, m_mu_v, v_mu_v, t)
    sigma_v_new, m_sigma_v, v_sigma_v = adam_update(sigma_v_new, grad_sigma_v, m_sigma_v, v_sigma_v, t)

    mu_w0_all.append(mu_w0_new)
    mu_w_all.append(mu_w_new.copy())
    mu_v_all.append(mu_v_new.copy())

mu_w0_avg = np.mean(mu_w0_all, axis=0)
mu_w_avg = np.mean(mu_w_all, axis=0)
mu_v_avg = np.mean(mu_v_all, axis=0)

X1 = data[501:690,:14]
y1 = data[501:690,14]

double_sum = 0.5 * (np.sum((X1 @ mu_v_avg )**2, axis=1) - (X1**2) @ np.sum(mu_v_avg**2, axis=1))

prob = expit(mu_w0_avg + X1 @ np.array(mu_w_avg)  + double_sum)

# AUC-ROC
auc_roc = roc_auc_score(y1, prob)
print("AUC-ROC:", auc_roc)

AUC-ROC: 0.7446292446292446


**Banknote Dataset**

In [None]:
import numpy as np
from scipy.special import expit
import pandas as pd
from sklearn.metrics import roc_auc_score
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt"
data = pd.read_csv(data_url, header=None)
data_matrix = data.values

n = 700
y = data_matrix[1:701, 4]
X = data_matrix[1:701, :4]

r = 2
d = X.shape[1]
iterations = 1000

# Фиксированные параметры
mu_0 = 0.0
mu_w_0 = 2.0
lambda_w_0 = 3.0
gamma_0 = 1.0
alpha_lambda = 1.0
beta_lambda = 1.0

# Априорные параметры
lambda_w = np.random.gamma(alpha_lambda, 1/beta_lambda)
mu_w = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_w)))
lambda_v = np.random.gamma(alpha_lambda, 1/beta_lambda, r)
mu_v = np.random.normal(mu_0, np.sqrt(1/(gamma_0*lambda_v)), r)

# Вариационные параметры
mu_w0_new = 0.0
sigma_w0_new = 0.1
mu_w_new = np.zeros(d)
sigma_w_new = np.ones(d)
mu_v_new = np.zeros((d, r))
sigma_v_new = np.ones((d, r))

m_mu_w0, v_mu_w0 = 0.0, 0.0
m_sigma_w0, v_sigma_w0 = 0.0, 0.0
m_mu_w, v_mu_w = np.zeros(d), np.zeros(d)
m_sigma_w, v_sigma_w = np.zeros(d), np.zeros(d)
m_mu_v, v_mu_v = np.zeros((d, r)), np.zeros((d, r))
m_sigma_v, v_sigma_v = np.zeros((d, r)), np.zeros((d, r))

mu_w0_all = []
mu_w_all = []
mu_v_all = []

alpha_k = np.ones(d)

def adam_update(param, grad, m, v, t, alpha=0.1, beta1=0.9, beta2=0.999, epsilon=1e-8):
    m_new = beta1 * m + (1 - beta1) * grad
    v_new = beta2 * v + (1 - beta2) * (grad ** 2)
    m_hat = m_new / (1 - beta1 ** t)
    v_hat = v_new / (1 - beta2 ** t)
    param += alpha * m_hat / (np.sqrt(v_hat) + epsilon)
    return param, m_new, v_new

for t in range(1, iterations + 1):
    epsilon_w0 = np.random.normal(0, 1)
    epsilon_w = np.random.normal(0, 1, d)
    epsilon_v = np.random.normal(0, 1, (d, r))

    Theta_0 = mu_w0_new + epsilon_w0 * sigma_w0_new
    Theta_w = mu_w_new + epsilon_w * sigma_w_new
    Theta_v = mu_v_new + epsilon_v * sigma_v_new

    grad_mu_w0 = np.sum(y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) \
                      - d / np.sum(alpha_k) * np.sum(alpha_k * (mu_w0_new - mu_w_0) / (1/lambda_w_0))

    grad_sigma_w0 = np.sum((y - expit(Theta_0 + X @ Theta_w +
                      0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_w0) \
                        - d / np.sum(alpha_k) * np.sum(alpha_k * (sigma_w0_new / (1/lambda_w_0) - 1 / sigma_w0_new))

    grad_mu_w = np.zeros(d)
    grad_sigma_w = np.zeros(d)
    grad_mu_v = np.zeros((d, r))
    grad_sigma_v = np.zeros((d, r))

    for k in range(d):
          grad_mu_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                  0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * X[:, k]) \
                  - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_w_new[k] - mu_w) / (1/lambda_w))

          grad_sigma_w[k] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                    0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1)))* X[:, k]* epsilon_w[k])\
                    - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_w_new[k] / (1/lambda_w) - 1 / sigma_w_new[k]))


    for f in range(r):
          for k in range(d):
              grad_mu_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) *
                        (X[:, k] * (X @ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (mu_v_new[k, f] - mu_v[f]) / (1/lambda_v[f]))

              grad_sigma_v[k, f] = np.sum((y - expit(Theta_0 + X @ Theta_w +
                        0.5 * np.sum((X @ Theta_v)**2 - (X**2) @ (Theta_v**2), axis=1))) * epsilon_v[k, f] *
                          (X[:, k] * (X@ Theta_v[:, f]) - Theta_v[k, f] * X[:, k]**2))\
                          - (d / np.sum(alpha_k)) * np.sum(alpha_k[k] * (sigma_v_new[k, f] / (1/lambda_v[f]) - 1 / sigma_v_new[k, f]))

    mu_w0_new, m_mu_w0, v_mu_w0 = adam_update(mu_w0_new, grad_mu_w0, m_mu_w0, v_mu_w0, t)
    sigma_w0_new, m_sigma_w0, v_sigma_w0 = adam_update(sigma_w0_new, grad_sigma_w0, m_sigma_w0, v_sigma_w0, t)
    mu_w_new, m_mu_w, v_mu_w = adam_update(mu_w_new, grad_mu_w, m_mu_w, v_mu_w, t)
    sigma_w_new, m_sigma_w, v_sigma_w = adam_update(sigma_w_new, grad_sigma_w, m_sigma_w, v_sigma_w, t)
    mu_v_new, m_mu_v, v_mu_v = adam_update(mu_v_new, grad_mu_v, m_mu_v, v_mu_v, t)
    sigma_v_new, m_sigma_v, v_sigma_v = adam_update(sigma_v_new, grad_sigma_v, m_sigma_v, v_sigma_v, t)

    mu_w0_all.append(mu_w0_new)
    mu_w_all.append(mu_w_new.copy())
    mu_v_all.append(mu_v_new.copy())

mu_w0_avg = np.mean(mu_w0_all, axis=0)
mu_w_avg = np.mean(mu_w_all, axis=0)
mu_v_avg = np.mean(mu_v_all, axis=0)

X1 = data_matrix[701:1372,:4]
y1 = data_matrix[701:1372,4]

double_sum = 0.5 * (np.sum((X1 @ mu_v_avg )**2, axis=1) - (X1**2) @ np.sum(mu_v_avg**2, axis=1))
prob = expit(mu_w0_avg + X1 @ np.array(mu_w_avg)  + double_sum)

# AUC-ROC
auc_roc = roc_auc_score(y1, prob)
print("AUC-ROC:", auc_roc)

AUC-ROC: 0.8678581026605751
