# RecSeats

## RecSeats Implementation

Main file for running the hybrid model on Locational choice experiment data.

**Author of the code:** Anon.

#### Librairies import

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import copy

import csv
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import make_scorer

import os

import warnings
warnings.filterwarnings("ignore")

#### Function from other files import

In [None]:
from src.preprocessing.data_loading import *
from src.preprocessing.compute_features import Feature_Pipeline
from src.utils import Params
from src.deep.traintest import TrainTest
from src.model.user_specific import *
from src.model.hybrid import *
from src.visualisation.plot_example import * 
from src.model.cnn import *
from src.model.autoencoder_cnn import *
from src.deep.deep_utils import *
from src.metrics import *

#### File setting

Changing the `IND_FILE` value allows to change the studied dataset 

In [None]:
# Index of the file to study:
IND_FILE = 0

params_list = ["study2.json", #0
               "study4_CF_FC.json", #1
              ]

path_data = "./data/Blanchard/"

params_parametric = Params("src/model/parameters/params_user_specific.json")
params_cnn = Params("src/model/parameters/params_cnn.json")
params_auto_cnn = Params("src/model/parameters/params_autoencoder_cnn.json")
params_hybrid = Params("src/model/parameters/params_hybrid.json")
params_file = Params(path_data + "parameters/" + params_list[IND_FILE])

print("TRAINING : ", params_file.csv_train)
print("TEST : ", params_file.csv_valid)
print("ROOM SIZE : ", params_file.room_size)
print("PADDING : ", params_file.padding)
print("PAIRS OF SEATS : ", params_file.is_couple)

#### Data loading : 

In [None]:
try:
    train_inputs = np.load(path_data+"numpy/"+params_file.dataloader_train+"_inputs.npy", allow_pickle=True)
    train_outputs = np.load(path_data+"numpy/"+params_file.dataloader_train+"_outputs.npy", allow_pickle=True)
    
except FileNotFoundError:
    print("Creating data matrix for train set...", end="")
    train_inputs, train_outputs = load_data_matrix(path = path_data + params_file.csv_train, 
                                                   room_size = params_file.room_size, 
                                                   padding = 0,
                                                   verbose = False,
                                                   to_tensor = 0,
                                                   is_wso = params_file.is_wso
                                                   )
    np.save(path_data+"numpy/"+params_file.dataloader_train+"_inputs.npy", train_inputs)
    np.save(path_data+"numpy/"+params_file.dataloader_train+"_outputs.npy", train_outputs)
    print("Done.")


try:
    valid_inputs = np.load(path_data+"numpy/"+params_file.dataloader_valid+"_inputs.npy", allow_pickle=True)
    valid_outputs = np.load(path_data+"numpy/"+params_file.dataloader_valid+"_outputs.npy", allow_pickle=True)
    
except FileNotFoundError:
    print("Creating data matrix for valid set...", end="")
    valid_inputs, valid_outputs = load_data_matrix(path = path_data + params_file.csv_valid, 
                                                   room_size = params_file.room_size, 
                                                   padding = 0,
                                                   verbose = False,
                                                   to_tensor = 0,
                                                   is_wso = params_file.is_wso
                                                   )
    np.save(path_data+"numpy/"+params_file.dataloader_valid+"_inputs.npy", valid_inputs)
    np.save(path_data+"numpy/"+params_file.dataloader_valid+"_outputs.npy", valid_outputs)
    print("Done.")


If the file contains pairs of seats, then the transformation to predict the seat on the left is applied on the dataset:

In [None]:
if params_file.is_couple:
    for i in range(train_inputs.shape[0]):
        for j in range(train_inputs.shape[1]):
            train_inputs[i][j] = keep_left_seat(train_inputs[i][j])
        for j in range(valid_inputs.shape[1]):
            valid_inputs[i][j] = keep_left_seat(valid_inputs[i][j])

In [None]:
import warnings
warnings.filterwarnings("ignore")

#### Metrics

In [None]:
def top_1_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=1)

def top_3_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=3)

def top_5_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=5)

def top_10_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=10)

def top_20_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=20)

def top_100_acc(outputs, labels):
    return torch_top_n_accuracy(outputs, labels, N=100)

### Training and Evaluation

*Caution:* CNN or CDNN must be trained (on the same dataset) before running.

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = CNN(room_size = params_file.room_size, 
                         params = params_cnn)
model_1.load_state_dict(torch.load(PATH_BEST_MODEL+"CNN"))

# model_1 = AutoencoderCNN(room_size = params_file.room_size, 
#                        params = params_auto_cnn)
# model_1.load_state_dict(torch.load(PATH_BEST_MODEL+"Autoencoder_CNN"))

model_1 = model_1.to(device)


pipeline = Feature_Pipeline(params_parametric)
model_2 = Recommendation(params_parametric, pipeline)


Content_model = ContentPart(room_size = params_file.room_size,
                            padding = params_file.padding,
                            cnn_model = model_1)

User_model = UserPart(room_size = params_file.room_size,
                      padding = params_file.padding,
                      user_model = model_2)

# eval_fns = [top_1_acc, top_3_acc, top_5_acc]
eval_fns = [top_1_acc, top_3_acc, top_5_acc, torch_weighted_l1]

verbose = False

results = []          

# for id_client in range(train_inputs.shape[0]):
for id_client in range(40):

    model = Hybrid(room_size = params_file.room_size,
                   padding = params_file.padding,
                   init_value = params_hybrid.init_value,
                   content_model = Content_model,
                   user_model = User_model)
    model = model.to(device)

    optimizer = torch.optim.SGD(model.parameters(), 
                                lr=params_hybrid.lr_opt,
                                momentum=params_hybrid.momentum, 
                                weight_decay=params_hybrid.weight_decay,
                                nesterov=bool(params_hybrid.nesterov))

    loss_fn = nn.NLLLoss()

    if verbose:
        print("################ Client n°{} : ################".format(id_client+1))


    train_X, valid_X, train_Y, valid_Y = train_test_split(train_inputs[id_client], 
                                                          train_outputs[id_client], 
                                                          test_size=0.3, 
                                                          random_state=15)

    test_X, test_Y = valid_inputs[id_client], valid_outputs[id_client]

    train_X_f, train_Y_f = model_2.pipeline.compute_feature(train_X, train_Y)
    model_2.fit(train_X_f, train_Y_f)


    trainloader_mat = load_data_hybrid(valid_X, valid_Y, 
                                       room_size = params_file.room_size,
                                       padding = params_file.padding,
                                       params = params_hybrid)

    validloader_mat = load_data_hybrid(test_X, test_Y, 
                                       room_size = params_file.room_size,
                                       padding = params_file.padding,
                                       params = params_hybrid)

    train_test_model = TrainTest(model, trainloader_mat, validloader_mat, optimizer, 
                                 loss_fn, eval_fns, two_inputs = True)
    history_cnn = train_test_model.train(patience = 20, max_it = 100, verbose = verbose) 

    acc = train_test_model.evaluate()[1]
    results.append(acc)

    if verbose:
        a = model.alpha.item()
        print("\nAlpha Value : {:5.2%}".format(a))
        print("\nFINAL VALID ACCURACY (Client {}) :".format(id_client+1))
        print("     Top1 : {:5.2%}\n     Top3 : {:5.2%}\n     Top5 : {:5.2%}".format(acc[0], acc[1], acc[2]))
        print("#"*46+"\n")
    else:
        print("\r{}/{}".format(id_client+1, train_inputs.shape[0]), end="")   

m = [np.nanmean(np.transpose(results)[i]) for i in range(len(eval_fns))]

print("\nFINAL MEAN OF VALID ACCURACY :".format(id_client+1))
print("     Top1 : {:5.2%}\n     Top3 : {:5.2%}\n     Top5 : {:5.2%}".format(m[0], m[1], m[2]))

In [None]:
m