# Training of the model for parliament dataset

### 0- Import librairies

In [1]:
import torch
import json
import numpy as np
from fcts.train_procedure import train_with_LBFGS
from fcts.lbm_nmar import LBM_NMAR
from fcts.utils import reparametrized_expanded_params, init_random_params, save_objects_to_yaml

### 1- Load torch parameters

In [2]:
# (for Mac) %env PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
device = 'cuda' #put 'cuda', 'cpu' or 'mps' (for Mac)
device2 = 'cuda' #put None, 'cuda' or 'mps' (for Mac)

if not torch.backends.mps.is_available() and device != 'cpu':
    print('Cuda is not available. Algorithm will use cpu')
    device, device2 = torch.device('cpu'), None

### 2- Load parliament datasets

In [3]:
#votes: matrix gathering votes for several laws and politicians (1: positive, 0: missing/abstention, -1: negative)
votes = np.loadtxt("data_parliament/votes.txt",delimiter=";").astype(int)

#deputes: Family name, Name, Political group 
deputes = json.load(open('data_parliament/deputes.json', 'r')) 

#texts:  political group demanding the law, title of demand, date, type (type of vote, type of majority, name of type of vote), 
texts = json.load(open('data_parliament/texts.json', 'r')) 

In [4]:
#indices
indices_p = np.argwhere(votes == 1) #argwhere: matrix with couples (row,column) with 1 values 
indices_n = np.argwhere(votes == -1) #idem with -1
indices_zeros = np.argwhere(votes == 0) #idem with 0 

### 3- Training 

#### a - Parameter initialization

Dataset shapes

In [5]:
#Shape of votes dataset: number of rows and columns
n1, n2 = votes.shape

# Select number of row clusters
nq = 3# COMPLETE

# Select number of column clusters
nl = 5 # COMPLETE

Initialization of ($\gamma, \theta$)

In [6]:
vector_of_parameters = torch.tensor(init_random_params(n1, n2, nq, nl), requires_grad=True, device=device, dtype=torch.float32)

#### b- Model creation

In [7]:
model = LBM_NMAR(
    vector_of_parameters,
    votes,
    (n1, n2, nq, nl),
    device=device,
    device2=device2,
)

#### c- Train model 

Perform variational EM: 

VEM step % 2 = 0: VE step, where we maximize the variational parameters $\gamma$


VEM step % 2 = 1: M step, where we maximize the model parameters $\theta$

In [8]:
try:
    success, loglike = train_with_LBFGS(model)
except KeyboardInterrupt:
    print("KeyboardInterrupt detected, stopping training")

-------------------------------------------------------------------------------- 
Start training LBM MNAR 
 --------------------------------------------------------------------------------
Number of row classes :  3
Number of col classes :  5
 VEM step  |   LBFGS iter  | criteria |


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/python_arg_parser.cpp:1519.)
  p.data.add_(


 0  |   1  | 337169.15625 |
 0  |   2  | 322914.09375 |
 0  |   3  | 317270.81250 |
 0  |   4  | 315133.43750 |
 0  |   5  | 314240.21875 |
 0  |   6  | 313764.71875 |
 0  |   7  | 313387.28125 |
 0  |   8  | 313160.25000 |
 0  |   9  | 312944.28125 |
 0  |   10  | 312665.25000 |
 0  |   11  | 312387.12500 |
 0  |   12  | 312105.12500 |
 0  |   13  | 311653.00000 |
 0  |   14  | 311270.43750 |
 0  |   15  | 310746.40625 |
 0  |   16  | 310103.18750 |
 0  |   17  | 309715.03125 |
 0  |   18  | 309419.03125 |
 0  |   19  | 309397.87500 |
 0  |   20  | 309395.12500 |
 0  |   21  | 309388.90625 |
 0  |   22  | 309387.06250 |
 0  |   23  | 309387.06250 |
------------------------------  Optimizing next EM step  ------------------------------
 EM step  |   LBFGS iter  | criteria |
 1  |   1  | 308595.78125 |
 1  |   2  | 307733.53125 |
 1  |   3  | 307408.71875 |
 1  |   4  | 307199.12500 |
 1  |   5  | 306834.90625 |
 1  |   6  | 306507.00000 |
 1  |   7  | 306299.53125 |
 1  |   8  | 306207

Reparametrization

In [9]:
(   nu_a,
    rho_a,
    nu_b,
    rho_b,
    nu_p,
    rho_p,
    nu_q,
    rho_q,
    tau_1,
    tau_2,
    mu_un,
    sigma_sq_a,
    sigma_sq_b,
    sigma_sq_p,
    sigma_sq_q,
    alpha_1,
    alpha_2,
    pi,
) = reparametrized_expanded_params(torch.cat((model.variationnal_params, model.model_params)), n1, n2, nq, nl, device)

Save parameters in YAML file (trained_parameters.yaml)

In [10]:
parameters_dict = {
    'n1': n1, 
    'n2': n2,
    'nq': nq, 
    'nl': nl,
    'nu_a':nu_a,
    'rho_a':rho_a,
    'nu_b':nu_b,
    'rho_b':rho_b,
    'nu_p': nu_p,
    'rho_p':rho_p,
    'nu_q':nu_q,
    'rho_q':rho_q,
    'tau_1':tau_1,
    'tau_2':tau_2,
    'mu_un':mu_un,
    'sigma_sq_a': sigma_sq_a,
    'sigma_sq_b':sigma_sq_b,
    'sigma_sq_p':sigma_sq_p,
    'sigma_sq_q':sigma_sq_q,
    'alpha_1':alpha_1,
    'alpha_2':alpha_2,
    'pi':pi,
    'indices_p': indices_p,
    'indices_n':indices_n,
    'indices_zeros': indices_zeros,
    'device': device, 
    'device2': device2,
}

save_objects_to_yaml(parameters_dict, 'trained_parameters.yaml')