# Training of the model for parliament dataset

### 0- Import librairies

In [1]:
import torch
import json
import numpy as np
from fcts.train_procedure import train_with_LBFGS
from fcts.lbm_nmar import LBM_NMAR
from fcts.lbfgs import FullBatchLBFGS
from fcts.figures import groupes_politiques, pi_df, text_legend_row
from fcts.utils import reparametrized_expanded_params, init_random_params, save_objects_to_yaml, load_objects_from_yaml
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns 
import yaml

### 1- Load torch parameters

In [2]:
# (for Mac) %env PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0
device = 'mps' #put 'cuda' or 'cpu'
device2 = 'mps' #put None or 'cuda'

if not torch.backends.mps.is_available() and device != 'cpu':
    print('Cuda is not available. Algorithm will use cpu')
    device, device2 = torch.device('cpu'), None

### 2- Load parliament datasets

In [3]:
#votes: matrix gathering votes for several laws and politicians (1: positive, 0: missing/abstention, -1: negative)
votes = np.loadtxt("data_parliament/votes.txt",delimiter=";").astype(int)

#deputes: Family name, Name, Political group 
deputes = json.load(open('data_parliament/deputes.json', 'r')) 

#texts:  political group demanding the law, title of demand, date, type (type of vote, type of majority, name of type of vote), 
texts = json.load(open('data_parliament/texts.json', 'r')) 

### 3- Training 

#### a - Parameter initialization

Dataset shape

In [4]:
n1, n2 = votes.shape
print("row length (number of deputes): ",n1)
print("col length (number of laws): ",n2)

row length (number of deputes):  576
col length (number of laws):  1256


Define a number of row and column clusters

In [5]:
# Select number of row clusters
nq = 3# COMPLETE

# Select number of column clusters
nl = 5 # COMPLETE

Initialization of ($\gamma, \theta$)

In [6]:

vector_of_parameters = torch.tensor(init_random_params(n1, n2, nq, nl), requires_grad=True, device=device, dtype=torch.float32)

#### b- Model creation

In [7]:
model = LBM_NMAR(
    vector_of_parameters,
    votes,
    (n1, n2, nq, nl),
    device=device,
    device2=device2,
)

#### c- Train model 

Perform variational EM 

In [8]:
try:
    success, loglike = train_with_LBFGS(model)
except KeyboardInterrupt:
    print("KeyboardInterrupt detected, stopping training")

-------------------------------------------------------------------------------- 
Start training LBM MNAR 
 --------------------------------------------------------------------------------
Number of row classes :  3
Number of col classes :  5
 EM step  |   LBFGS iter  | criteria |


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at /Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/utils/python_arg_parser.cpp:1519.)
  p.data.add_(


 0  |   1  | 336911.00000 |
 0  |   2  | 322342.56250 |
 0  |   3  | 316435.34375 |
 0  |   4  | 314075.43750 |
 0  |   5  | 313106.62500 |
 0  |   6  | 312676.25000 |
 0  |   7  | 312365.40625 |
 0  |   8  | 312166.50000 |
 0  |   9  | 312038.28125 |
 0  |   10  | 311839.56250 |
 0  |   11  | 311661.31250 |
 0  |   12  | 311406.18750 |
 0  |   13  | 311003.62500 |
 0  |   14  | 310603.06250 |
 0  |   15  | 310216.93750 |
 0  |   16  | 309414.84375 |
 0  |   17  | 309123.90625 |
 0  |   18  | 309029.43750 |
 0  |   19  | 308967.59375 |
 0  |   20  | 308964.18750 |
 0  |   21  | 308963.93750 |
Curvature pair skipped due to failed criterion
 0  |   22  | 308963.93750 |
------------------------------  Optimizing next EM step  ------------------------------
 EM step  |   LBFGS iter  | criteria |
 1  |   1  | 308165.31250 |
 1  |   2  | 307158.06250 |
 1  |   3  | 306827.53125 |
 1  |   4  | 306514.75000 |
 1  |   5  | 306299.12500 |
 1  |   6  | 306041.96875 |
 1  |   7  | 305945.46875 |
 

Reparametrization of parameters

In [9]:
(   nu_a,
    rho_a,
    nu_b,
    rho_b,
    nu_p,
    rho_p,
    nu_q,
    rho_q,
    tau_1,
    tau_2,
    mu_un,
    sigma_sq_a,
    sigma_sq_b,
    sigma_sq_p,
    sigma_sq_q,
    alpha_1,
    alpha_2,
    pi,
) = reparametrized_expanded_params(torch.cat((model.variationnal_params, model.model_params)), n1, n2, nq, nl, device)

Save parameters in YAML file (trained_parameters.yaml)

In [10]:
parameters_dict = {
    'n1': n1, 
    'n2': n2,
    'nq': nq, 
    'nl': nl,
    'nu_a':nu_a,
    'rho_a':rho_a,
    'nu_b':nu_b,
    'rho_b':rho_b,
    'nu_p': nu_p,
    'rho_p':rho_p,
    'nu_q':nu_q,
    'rho_q':rho_q,
    'tau_1':tau_1,
    'tau_2':tau_2,
    'mu_un':mu_un,
    'sigma_sq_a': sigma_sq_a,
    'sigma_sq_b':sigma_sq_b,
    'sigma_sq_p':sigma_sq_p,
    'sigma_sq_q':sigma_sq_q,
    'alpha_1':alpha_1,
    'alpha_2':alpha_2,
    'pi':pi,
}

save_objects_to_yaml(parameters_dict, 'trained_parameters.yaml')