# Training of the model for parliament dataset

### 0- Import librairies

In [1]:
import torch
import json
import numpy as np
from fcts.train_procedure import train_with_LBFGS
from fcts.lbm_nmar import LBM_NMAR
from fcts.utils import reparametrized_expanded_params, init_random_params, save_objects_to_yaml

### 1- Load torch parameters

In [2]:
# (for Mac) %env PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
device = 'mps' #put 'cuda' or 'cpu'
device2 = 'mps' #put None or 'cuda'

if not torch.backends.mps.is_available() and device != 'cpu':
    print('Cuda is not available. Algorithm will use cpu')
    device, device2 = torch.device('cpu'), None

### 2- Load parliament datasets

In [3]:
#votes: matrix gathering votes for several laws and politicians (1: positive, 0: missing/abstention, -1: negative)
votes = np.loadtxt("data_parliament/votes.txt",delimiter=";").astype(int)

#deputes: Family name, Name, Political group 
deputes = json.load(open('data_parliament/deputes.json', 'r')) 

#texts:  political group demanding the law, title of demand, date, type (type of vote, type of majority, name of type of vote), 
texts = json.load(open('data_parliament/texts.json', 'r')) 

In [4]:
#indices
indices_p = np.argwhere(votes == 1) #argwhere: matrix with couples (row,column) with 1 values 
indices_n = np.argwhere(votes == -1) #idem with -1
indices_zeros = np.argwhere(votes == 0) #idem with 0 

### 3- Training 

#### a - Parameter initialization

Dataset shapes

In [5]:
#Shape of votes dataset: number of rows and columns
n1, n2 = votes.shape

# Select number of row clusters
nq = 3# COMPLETE

# Select number of column clusters
nl = 5 # COMPLETE

Initialization of ($\gamma, \theta$)

In [6]:
vector_of_parameters = torch.tensor(init_random_params(n1, n2, nq, nl), requires_grad=True, device=device, dtype=torch.float32)

#### b- Model creation

In [7]:
model = LBM_NMAR(
    vector_of_parameters,
    votes,
    (n1, n2, nq, nl),
    device=device,
    device2=device2,
)

#### c- Train model 

Perform variational EM 

In [8]:
try:
    success, loglike = train_with_LBFGS(model)
except KeyboardInterrupt:
    print("KeyboardInterrupt detected, stopping training")

-------------------------------------------------------------------------------- 
Start training LBM MNAR 
 --------------------------------------------------------------------------------
Number of row classes :  3
Number of col classes :  5
 EM step  |   LBFGS iter  | criteria |


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/python_arg_parser.cpp:1519.)
  p.data.add_(


 0  |   1  | 345652.68750 |
 0  |   2  | 328068.34375 |
 0  |   3  | 322284.31250 |
 0  |   4  | 318931.15625 |
 0  |   5  | 317952.96875 |
 0  |   6  | 317139.96875 |
 0  |   7  | 316672.43750 |
 0  |   8  | 316295.25000 |
 0  |   9  | 315987.59375 |
 0  |   10  | 315570.31250 |
 0  |   11  | 315275.34375 |
 0  |   12  | 314761.68750 |
 0  |   13  | 314357.25000 |
 0  |   14  | 313674.21875 |
 0  |   15  | 313330.78125 |
 0  |   16  | 312713.71875 |
 0  |   17  | 312278.84375 |
 0  |   18  | 311558.90625 |
 0  |   19  | 311093.96875 |
 0  |   20  | 310866.75000 |
 0  |   21  | 310674.71875 |
 0  |   22  | 310667.18750 |
 0  |   23  | 310662.84375 |
 0  |   24  | 310661.84375 |
 0  |   25  | 310661.50000 |
 0  |   26  | 310661.40625 |
 0  |   27  | 310661.34375 |
 0  |   28  | 310661.34375 |
------------------------------  Optimizing next EM step  ------------------------------
 EM step  |   LBFGS iter  | criteria |
 1  |   1  | 309192.96875 |
 1  |   2  | 307797.06250 |
 1  |   3  | 3

Reparametrization

In [9]:
(   nu_a,
    rho_a,
    nu_b,
    rho_b,
    nu_p,
    rho_p,
    nu_q,
    rho_q,
    tau_1,
    tau_2,
    mu_un,
    sigma_sq_a,
    sigma_sq_b,
    sigma_sq_p,
    sigma_sq_q,
    alpha_1,
    alpha_2,
    pi,
) = reparametrized_expanded_params(torch.cat((model.variationnal_params, model.model_params)), n1, n2, nq, nl, device)

Save parameters in YAML file (trained_parameters.yaml)

In [10]:
parameters_dict = {
    'n1': n1, 
    'n2': n2,
    'nq': nq, 
    'nl': nl,
    'nu_a':nu_a,
    'rho_a':rho_a,
    'nu_b':nu_b,
    'rho_b':rho_b,
    'nu_p': nu_p,
    'rho_p':rho_p,
    'nu_q':nu_q,
    'rho_q':rho_q,
    'tau_1':tau_1,
    'tau_2':tau_2,
    'mu_un':mu_un,
    'sigma_sq_a': sigma_sq_a,
    'sigma_sq_b':sigma_sq_b,
    'sigma_sq_p':sigma_sq_p,
    'sigma_sq_q':sigma_sq_q,
    'alpha_1':alpha_1,
    'alpha_2':alpha_2,
    'pi':pi,
    'indices_p': indices_p,
    'indices_n':indices_n,
    'indices_zeros': indices_zeros,
    'device': device, 
    'device2': device2,
}

save_objects_to_yaml(parameters_dict, 'trained_parameters.yaml')