# RecSeats

## Baseline : MNL Model

Implementation of the Multinomial Logit Model (MNL) for locational choice experiment data, with statsmodels library.

**Author of the code:** Anon.


#### Librairies import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.api as sm
import warnings
warnings.filterwarnings("ignore")


#### Function from other files import

In [None]:
from src.preprocessing.data_loading import load_data_matrix
from src.preprocessing.compute_features import Feature_Pipeline
from src.utils import Params

from src.model.user_specific import *
from src.metrics import *
from src.room_transformation import *

from src.visualisation.plot_example import * 

#### File setting

Changing the `IND_FILE` value allows to change the studied dataset 

In [None]:
# Index of the file to study:
IND_FILE = 1

params_list = ["study2.json", #0
               "study4_CF_FC.json", #1
              ]

path_data = "./data/Blanchard/"

params_file = Params(path_data + "parameters/" + params_list[IND_FILE])

print("INSAMPLE : ", params_file.csv_train)
print("HOLDOUT : ", params_file.csv_valid)
print("ROOM SIZE : ", params_file.room_size)
print("PADDING : ", params_file.padding)
print("PAIRS OF SEATS : ", params_file.is_couple)


#### Data Loading

In [None]:
try:
    train_inputs = np.load(path_data+"numpy/"+params_file.dataloader_train+"_inputs.npy", allow_pickle=True)
    train_outputs = np.load(path_data+"numpy/"+params_file.dataloader_train+"_outputs.npy", allow_pickle=True)
    
except FileNotFoundError:
    print("Creating data matrix for train set...", end="")
    train_inputs, train_outputs = load_data_matrix(path = path_data + params_file.csv_train, 
                                                   room_size = params_file.room_size, 
                                                   padding = 0,
                                                   verbose = False,
                                                   to_tensor = 0,
                                                   is_wso = params_file.is_wso
                                                   )
    np.save(path_data+"numpy/"+params_file.dataloader_train+"_inputs.npy", train_inputs)
    np.save(path_data+"numpy/"+params_file.dataloader_train+"_outputs.npy", train_outputs)
    print("Done.")


try:
    valid_inputs = np.load(path_data+"numpy/"+params_file.dataloader_valid+"_inputs.npy", allow_pickle=True)
    valid_outputs = np.load(path_data+"numpy/"+params_file.dataloader_valid+"_outputs.npy", allow_pickle=True)
    
except FileNotFoundError:
    print("Creating data matrix for valid set...", end="")
    valid_inputs, valid_outputs = load_data_matrix(path = path_data + params_file.csv_valid, 
                                                   room_size = params_file.room_size, 
                                                   padding = 0,
                                                   verbose = False,
                                                   to_tensor = 0,
                                                   is_wso = params_file.is_wso
                                                   )
    np.save(path_data+"numpy/"+params_file.dataloader_valid+"_inputs.npy", valid_inputs)
    np.save(path_data+"numpy/"+params_file.dataloader_valid+"_outputs.npy", valid_outputs)
    print("Done.")


If the file contains pairs of seats, then the transformation to predict the seat on the left is applied on the dataset:

In [None]:
if params_file.is_couple:
    for i in range(train_inputs.shape[0]):
        for j in range(train_inputs.shape[1]):
            train_inputs[i][j] = keep_left_seat(train_inputs[i][j])
        for j in range(valid_inputs.shape[1]):
            valid_inputs[i][j] = keep_left_seat(valid_inputs[i][j])


### Training and Evaluation 

For customer, we compute the feature matrix, fit the MNL model, and evaluate it on the test set:

In [None]:
params_parametric = Params("src/model/parameters/params_user_specific.json")
pipeline = Feature_Pipeline(params_parametric)

acc_1_list = []
acc_2_list = []
acc_3_list = []
l1_loss_list = []

for idx_client in range(len(train_inputs)):

    train_X, train_Y = pipeline.compute_feature(train_inputs[idx_client], train_outputs[idx_client])
    flat_train_X = np.concatenate(train_X, axis=0)
    flat_train_X = sm.add_constant(flat_train_X, prepend = False)
    flat_train_Y = np.concatenate(train_Y, axis=0)
    
    sizes = [(len(x), len(x[0])) for x in train_inputs[idx_client]]

    mdl = sm.MNLogit(flat_train_Y, flat_train_X)
    mdl_fit = mdl.fit(method='lbfgs', maxiter = 200)

#     print(mdl_fit.params)
#     print(mdl_fit.summary())

    valid_X, valid_Y = pipeline.compute_feature(valid_inputs[idx_client], valid_outputs[idx_client])
    Y_pred = []
    l1 = 0
    
    valid_X = valid_X.tolist()
    
    for idx_choice in range(len(valid_X)):
        valid_X[idx_choice] = sm.add_constant(valid_X[idx_choice], prepend = False)

        prob = mdl_fit.predict(valid_X[idx_choice])[:,1]
        prob = [x for x in prob if not np.isnan(x)]
        sort_prob = np.flip(np.sort(prob))
        Y_pred.append([list(prob).index(p) for p in sort_prob])
        
        label = np.where(np.asarray(valid_Y[idx_choice]) == 1)[0][0]
        label_pos = (int(valid_X[idx_choice][label][0] * sizes[idx_choice][1]), 
                     int(valid_X[idx_choice][label][1] * sizes[idx_choice][0]))
        for p in range(len(prob)):
            l1 += prob[p] * (abs(label_pos[0] - int(valid_X[idx_choice][p][0] * sizes[idx_choice][1])) 
                            + abs(label_pos[1] - int(valid_X[idx_choice][p][1] * sizes[idx_choice][0])))
    
            
    acc_1_list.append(top_n_accuracy(valid_Y, Y_pred, n=1))
    acc_2_list.append(top_n_accuracy(valid_Y, Y_pred, n=3))
    acc_3_list.append(top_n_accuracy(valid_Y, Y_pred, n=5))
    l1_loss_list.append(l1/len(valid_Y))
    
    print("\r{}/{}".format(idx_client + 1, len(train_inputs)), end="")


#### Results

In [None]:
print("Top-1 Accuracy : ", np.mean(acc_1_list))
print("Top-3 Accuracy : ", np.mean(acc_2_list))
print("Top-5 Accuracy : ", np.mean(acc_3_list))
print("Expected L1 loss : ", np.nanmean(l1_loss_list))