# Simple MultiNomial Model

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/artefactory/choice-learn/blob/main/notebooks/models/simple_mnl.ipynb)

In [1]:
# Install necessary requirements

# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.
# Uncomment the following lines:

# !pip install choice-learn

# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:
import os
import sys

sys.path.append("../../")

In [2]:
# Remove GPU use
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import numpy as np
import pandas as pd

from choice_learn.models.simple_mnl import SimpleMNL
from choice_learn.data import ChoiceDataset

In [3]:
df = pd.read_csv("data_full_anonym.csv")
df

Unnamed: 0,individual,alternative,choice,officeID,depWeekDay,OD,fAirline,staySaturday,stayDurationMinutes,totalPrice,totalTripDurationMinutes,dtd,nAirlines,nFlights,isContinental,isDomestic,outDepTime_sin,outDepTime_cos,outArrTime_sin,outArrTime_cos
0,91,19,1,1,2,22,36,1.0,18660.0,230.029999,735.0,23.0,1.0,4.0,1.0,0.0,0.737277,-0.675590,-0.854912,0.518773
1,91,20,0,1,2,22,36,1.0,18235.0,230.029999,1160.0,23.0,1.0,4.0,1.0,0.0,0.737277,-0.675590,-0.854912,0.518773
2,91,21,0,1,2,22,30,1.0,17855.0,247.809998,920.0,24.0,1.0,4.0,1.0,0.0,-0.999762,-0.021815,-0.976296,0.216440
3,91,22,0,1,2,22,30,1.0,17855.0,247.809998,1135.0,24.0,1.0,4.0,1.0,0.0,-0.999762,-0.021815,-0.402747,0.915311
4,91,23,0,1,2,22,30,1.0,280.0,247.809998,1135.0,24.0,1.0,4.0,1.0,0.0,-0.999762,-0.021815,-0.976296,0.216440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1089956,982349,27,0,4,5,19,34,0.0,10265.0,203.139999,250.0,139.0,1.0,2.0,0.0,1.0,-0.442289,-0.896873,-0.707107,0.707107
1089957,982349,28,0,4,5,19,34,0.0,9480.0,203.139999,250.0,140.0,1.0,2.0,0.0,1.0,-0.876727,-0.480989,0.442289,-0.896873
1089958,982349,29,0,4,5,19,34,0.0,10125.0,203.139999,250.0,140.0,1.0,2.0,0.0,1.0,-0.876727,-0.480989,-0.707107,0.707107
1089959,982349,30,0,4,5,19,36,0.0,9310.0,203.149994,735.0,140.0,1.0,4.0,0.0,1.0,-0.996195,-0.087156,-0.971342,-0.237686


In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Define feature categories
shared_features = ["officeID", "OD"]
choice_col = ["choice"]
items_features = [
    "fAirline", "staySaturday", "stayDurationMinutes", "totalPrice",
    "totalTripDurationMinutes", "dtd", "nAirlines", "nFlights",
    "isContinental", "isDomestic", "outDepTime_sin", "outDepTime_cos",
    "outArrTime_sin", "outArrTime_cos", "depWeekDay"
]

grouped = df.groupby("individual")
# Extract choices dynamically
choices = np.array([group[choice_col].values.argmax() for _, group in grouped])

# Extract shared features (ensuring uniqueness)
shared_features_array = df[shared_features].drop_duplicates().to_numpy().astype("float32")

# Extract items features dynamically per individual
items_features_array = [group[items_features].to_numpy().astype("float32") for _, group in grouped]
max_alternatives = max(len(alts) for alts in items_features_array)
items_features_padded = pad_sequences(items_features_array, 
                                      maxlen=max_alternatives, 
                                      dtype="float32", 
                                      padding="post", 
                                      value=0.0)  # Padding with zeros


By default, SimpleMNL does not integrate any intercept, but you can precise 'None'.

In [5]:
# Create dataset
dataset = ChoiceDataset(items_features_by_choice=items_features_padded, choices=choices)

# Train MNL model
model = SimpleMNL(intercept=None)
history = model.fit(dataset, get_report=True, verbose=2)

                                fit models needing them such as Conditional Logit.


Using L-BFGS optimizer, setting up .fit() function


















Using L-BFGS optimizer, setting up .fit() function


In [6]:
print("Estimation Negative LogLikelihood:",
      model.evaluate(dataset) * len(dataset))

Estimation Negative LogLikelihood: tf.Tensor(89924.75, shape=(), dtype=float32)


Model analysis and Comparison with R's mlogit package

In [7]:
report = model.report
report

Unnamed: 0,Coefficient Name,Coefficient Estimation,Std. Err,z_value,P(.>z)
0,Weights_items_features_0,-0.026294,0.000569,-46.202667,0.0
1,Weights_items_features_1,0.215729,0.123341,1.749044,0.080283
2,Weights_items_features_2,0.000363,2.4e-05,15.294557,0.0
3,Weights_items_features_3,-0.004643,0.000106,-43.605633,0.0
4,Weights_items_features_4,-0.050113,0.000862,-58.108757,0.0
5,Weights_items_features_5,0.534423,0.01547,34.546249,0.0
6,Weights_items_features_6,-0.93868,0.032812,-28.607643,0.0
7,Weights_items_features_7,4.744981,0.092403,51.351082,0.0
8,Weights_items_features_8,61.084888,3.31558,18.423592,0.0
9,Weights_items_features_9,85.840233,,,


In [8]:
coefficient_values = report["Coefficient Estimation"].to_numpy()
coefficient_values

array([-2.6293600e-02,  2.1572874e-01,  3.6261088e-04, -4.6430971e-03,
       -5.0112791e-02,  5.3442329e-01, -9.3868017e-01,  4.7449808e+00,
        6.1084888e+01,  8.5840233e+01,  5.3701258e-01, -2.3860253e-01,
       -9.1415435e-01,  1.8930528e-01,  8.3118719e-01], dtype=float32)

In [9]:
# Function to calculate top-k accuracy
def top_k_accuracy(y_true, y_proba, k=1):
    # Get the indices of top k predictions for each sample
    top_k_indices = np.argsort(y_proba, axis=1)[:, -k:]
    
    # Check if true label is in top k predictions
    matches = [y_true[i] in top_k_indices[i] for i in range(len(y_true))]
    
    # Return accuracy
    return np.mean(matches)


In [10]:
# 1. Get the features
X = items_features_padded

# 2. Calculate utilities: dot product of features and coefficients
utilities = np.zeros((X.shape[0], X.shape[1]))
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        # Skip padded items (all zeros)
        if np.all(X[i, j] == 0) and j > 0:
            continue
        # Calculate utility for this alternative
        utilities[i, j] = np.dot(X[i, j], coefficient_values)

# 3. Apply softmax to get probabilities
exp_utilities = np.exp(utilities)
# Mask out padding (zero rows)
mask = np.any(X != 0, axis=2)
exp_utilities = exp_utilities * mask
# Calculate probabilities with softmax
probabilities = exp_utilities / np.sum(exp_utilities, axis=1, keepdims=True)

# 4. Get predicted choices
predictions = np.argmax(probabilities, axis=1)

In [11]:
# Calculate accuracy metrics
accuracy = np.mean(predictions == choices)
top5_accuracy = top_k_accuracy(choices, probabilities, k=5)

print(f"Top-1 Accuracy: {accuracy}")
print(f"Top-5 Accuracy: {top5_accuracy}")

Top-1 Accuracy: 0.1935730906306147
Top-5 Accuracy: 0.5664339783806074
