<a href="https://colab.research.google.com/github/AshMurali77/mixed-logit-optimization/blob/main/Project_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Imports and Data Setup

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random

In [None]:
# Set the random seed for reproducibility
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Example of setting the seed
set_seed(42)

In [None]:
columns = ["Transaction ID", "Choice", "Price", "Cost", "Distance", "Electric", "Gas", "Hybrid", "High Performance"]

df = pd.read_csv("Data(HW8).txt", sep="\t", header=None, names=columns)

df['Transaction ID'] = df['Transaction ID'].astype(int)
df['Choice'] = df['Choice'].astype(int)
df['Price'] = df['Price'].astype(int)
df['Cost'] = df['Cost'].astype(float)
df['Distance'] = df['Distance'].astype(float)
df['Electric'] = df['Electric'].astype(int)
df['Gas'] = df['Gas'].astype(int)
df['Hybrid'] = df['Hybrid'].astype(int)
df['High Performance'] = df['High Performance'].astype(int)

scaler = StandardScaler()
columns_to_scale = ['Price', 'Cost', 'Distance']
data_to_scale = df[columns_to_scale]
scaled_data = scaler.fit_transform(data_to_scale)
df[columns_to_scale] = scaled_data

X = np.asarray(df[["Price", "Cost", "Distance", "Electric", "Gas", "Hybrid", "High Performance"]]).reshape((1484, 3, 7))
y = np.asarray(df[["Choice"]]).reshape((1484, 3))

# Train-test split
X_all_train, X_test = np.split(X, [1000])
y_all_train, y_test = np.split(y, [1000])

# Train-val split
X_train, X_val = np.split(X_all_train, [700])
y_train, y_val = np.split(y_all_train, [700])

# Conversion to tensors
X_all_train = torch.tensor(X_all_train, dtype=torch.float32)
X_train = torch.tensor(X_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

y_all_train = torch.tensor(y_all_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

###Model Setup

In [None]:
class MixedLogitModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_segments):
        super(MixedLogitModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_segments)

    def forward(self, x):
        # Original shape: (batch_size, num_alternatives, input_size)
        num_alternatives = x.shape[1]  # Need to capture this dynamically
        x = x.view(-1, x.size(2))  # Flatten input to fit linear layers

        # Apply layers
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)

        # Reshape back to (batch_size, num_alternatives, num_segments) for softmax
        out = out.view(-1, num_alternatives, out.size(1))

        # Apply softmax across alternatives for each segment
        out = F.softmax(out, dim=1)

        # Mean pooling across segments
        out = torch.mean(out, dim=2)

        return out


In [None]:
model = MixedLogitModel(7, 10, 3)
print(model)

MixedLogitModel(
  (fc1): Linear(in_features=7, out_features=10, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=10, out_features=3, bias=True)
)


##Question 1:

###Create Optimizers

In [None]:
#hyperparmeter values
lr = [0.1, 0.2, 0.3, 0.4]
mom = [0.3, 0.5, 0.6, 0.9]

hyperparameters = []

for rate in lr:
  hyperparameters.append((rate, None))
  for value in mom:
    hyperparameters.append((rate, value))

optimizers = {}

for parameters in hyperparameters:

  lr = parameters[0]
  mom = parameters[1]

  if None in parameters:
    name = "Vanilla " + "(" + str(lr) + ")"
    optimizers[name] = optim.SGD(model.parameters(), lr=lr)

  else:
    name = "Momentum " + str(parameters)
    optimizers[name] = optim.SGD(model.parameters(), lr=lr, momentum=mom)

###Calculate Training and Validation Losses

In [None]:
# Establish number of epochs

num_epochs = 100

In [None]:
train_loss = {}
validation_loss = {}

# converts one-hot encoded class labels to class indicies
y_train_class = torch.argmax(y_train, dim=1)
y_val_class = torch.argmax(y_val, dim=1)

for optimizer_name, optimizer_params in optimizers.items():
    optimizer = optimizer_params
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        outputs = model(X_train)
        loss = criterion(outputs, y_train_class)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if epoch == (num_epochs - 1): # store epoch 100/100 in dictionary to find minimized training loss
          train_loss[optimizer_name] = loss.item()

    model.eval()
    with torch.no_grad():
        outputs = model(X_val)
        loss = criterion(outputs, y_val_class)
        if epoch == (num_epochs - 1):
          validation_loss[optimizer_name] = loss.item()

In [None]:
# Print training losses

for model_name, loss in train_loss.items():
  print(f"{model_name} has a training loss of {loss:.5f}")

Vanilla (0.1) has a training loss of 1.07073
Momentum (0.1, 0.3) has a training loss of 1.02970
Momentum (0.1, 0.5) has a training loss of 1.00920
Momentum (0.1, 0.6) has a training loss of 0.99973
Momentum (0.1, 0.9) has a training loss of 0.98577
Vanilla (0.2) has a training loss of 0.98421
Momentum (0.2, 0.3) has a training loss of 0.98240
Momentum (0.2, 0.5) has a training loss of 0.98039
Momentum (0.2, 0.6) has a training loss of 0.97872
Momentum (0.2, 0.9) has a training loss of 0.97513
Vanilla (0.3) has a training loss of 0.97471
Momentum (0.3, 0.3) has a training loss of 0.97423
Momentum (0.3, 0.5) has a training loss of 0.97363
Momentum (0.3, 0.6) has a training loss of 0.97295
Momentum (0.3, 0.9) has a training loss of 0.97079
Vanilla (0.4) has a training loss of 0.97040
Momentum (0.4, 0.3) has a training loss of 0.97004
Momentum (0.4, 0.5) has a training loss of 0.96959
Momentum (0.4, 0.6) has a training loss of 0.96912
Momentum (0.4, 0.9) has a training loss of 0.96688


In [None]:
# Question 1a: Print validation losses

for model_name, loss in validation_loss.items():
  print(f"{model_name} has a validation loss of {loss:.5f}")

Vanilla (0.1) has a validation loss of 1.07526
Momentum (0.1, 0.3) has a validation loss of 1.03965
Momentum (0.1, 0.5) has a validation loss of 1.01738
Momentum (0.1, 0.6) has a validation loss of 1.00500
Momentum (0.1, 0.9) has a validation loss of 0.99396
Vanilla (0.2) has a validation loss of 0.99208
Momentum (0.2, 0.3) has a validation loss of 0.99118
Momentum (0.2, 0.5) has a validation loss of 0.98981
Momentum (0.2, 0.6) has a validation loss of 0.98914
Momentum (0.2, 0.9) has a validation loss of 0.98858
Vanilla (0.3) has a validation loss of 0.98863
Momentum (0.3, 0.3) has a validation loss of 0.98842
Momentum (0.3, 0.5) has a validation loss of 0.98836
Momentum (0.3, 0.6) has a validation loss of 0.98816
Momentum (0.3, 0.9) has a validation loss of 0.98784
Vanilla (0.4) has a validation loss of 0.98740
Momentum (0.4, 0.3) has a validation loss of 0.98713
Momentum (0.4, 0.5) has a validation loss of 0.98680
Momentum (0.4, 0.6) has a validation loss of 0.98657
Momentum (0.4, 0.

###Best Vanilla + Momentum Hyperparameters

In [None]:
# Question 1b: Best vanilla + momentum models

min_vanilla_key = min((k for k in validation_loss if 'Vanilla' in k), key=validation_loss.get)
min_momentum_key = min((k for k in validation_loss if 'Momentum' in k), key=validation_loss.get)

print(f'The parameters with the lowest validation loss for vanilla is {min_vanilla_key}')
print(f'The parameters with the lowest validation loss for momentum is {min_momentum_key}')

The parameters with the lowest validation loss for vanilla is Vanilla (0.4)
The parameters with the lowest validation loss for momentum is Momentum (0.4, 0.9)


In [None]:
# Store best optimizers

best_opt = {}

best_opt[min_vanilla_key] = optimizers[min_vanilla_key]
best_opt[min_momentum_key] = optimizers[min_momentum_key]

###Calculate Training (entire training data set) and Testing Losses

In [None]:
all_train_loss = {}
test_loss = {}
test_accuracy = {}

criterion = nn.CrossEntropyLoss()

y_all_train_class = torch.argmax(y_all_train, dim=1)
y_test_class = torch.argmax(y_test, dim=1)

for optimizer_name, optimizer_params in best_opt.items():
    optimizer = optimizer_params
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
      model.train()
      outputs = model(X_all_train)
      loss = criterion(outputs, y_all_train_class)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if epoch == (num_epochs - 1):
          all_train_loss[optimizer_name] = loss.item()

      model.eval()
      with torch.no_grad():
        for epoch in range(num_epochs):
          outputs = model(X_test)
          loss = criterion(outputs, y_test_class)
          if epoch == (num_epochs - 1):
            test_loss[optimizer_name] = loss.item()

          _, predicted = torch.max(outputs, 1)
          correct = (predicted == y_test_class).sum().item()
          total = y_test.size(0)
          accuracy = correct / total
          test_accuracy[optimizer_name] = accuracy

In [None]:
test_loss

{'Vanilla (0.4)': 0.9655735492706299,
 'Momentum (0.4, 0.9)': 0.9674547910690308}

In [None]:
all_train_loss

{'Vanilla (0.4)': 0.9684457182884216,
 'Momentum (0.4, 0.9)': 0.9642199277877808}

In [None]:
# Question 1d: not sure why these are the exact same - need to take a look at this

print("Testing Accuracies:")
for optimizer_name, acc in test_accuracy.items():
    print(f"{optimizer_name}: {acc*100:.5f}%")

Testing Accuracies:
Vanilla (0.4): 58.67769%
Momentum (0.4, 0.9): 57.64463%


###In-sample & out-of-sample gaps

In [None]:
# Calculate difference in all training vs testing

train_test_diff = {key: abs(all_train_loss[key] - test_loss[key]) for key in set(all_train_loss) & set(test_loss)}

train_test_diff

{'Vanilla (0.4)': 0.002872169017791748,
 'Momentum (0.4, 0.9)': 0.00323486328125}

In [None]:
# Model with smallest in vs out of sample gap

best_improvement = min((k for k in train_test_diff), key=train_test_diff.get)

print(best_improvement)

Vanilla (0.4)


In [None]:
# Document weights and biases

for name, param in model.named_parameters():
  if param.requires_grad:
    print(name, param.data)

###Question Set #2

In [None]:
num_epochs = 100

N_val = [1, 2, 10, 100, 1000]
models = {}

y_all_train_class = torch.argmax(y_all_train, dim=1)
y_test_class = torch.argmax(y_test, dim=1)

for N in N_val:
  name = str(N) + " Customer Segments"
  models[name] = MixedLogitModel(7, N, 3)

all_train_loss_2 = {}
test_loss_2 = {}
test_accuracy_2 = {}

criterion = nn.CrossEntropyLoss()

for name, model in models.items():

  optimizer = optim.Adam(model.parameters())

  for epoch in range(num_epochs):
      model.train()
      outputs = model(X_all_train)
      loss = criterion(outputs, y_all_train_class)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if epoch == (num_epochs - 1):
        all_train_loss_2[name] = loss.item()

      model.eval()
      with torch.no_grad():
        for epoch in range(num_epochs):
          outputs = model(X_test)
          loss = criterion(outputs, y_test_class)
          if epoch == (num_epochs - 1):
            test_loss_2[name] = loss.item()

          _, predicted = torch.max(outputs, 1)
          correct = (predicted == y_test_class).sum().item()
          total = y_test.size(0)
          accuracy = correct / total
          test_accuracy_2[name] = accuracy

In [None]:
print("Testing Accuracies:")
for optimizer_name, acc in test_accuracy_2.items():
    print(f"{optimizer_name}: {acc*100:.5f}%")

Testing Accuracies:
1 Customer Segments: 37.60331%
2 Customer Segments: 49.17355%
10 Customer Segments: 45.45455%
100 Customer Segments: 56.40496%
1000 Customer Segments: 56.61157%


In [None]:
for model_name, loss in all_train_loss_2.items():
  print(f"{model_name} has a training loss of {loss:.5f}")

1 Customer Segments has a training loss of 1.09739
2 Customer Segments has a training loss of 1.07311
10 Customer Segments has a training loss of 1.06790
100 Customer Segments has a training loss of 1.00137
1000 Customer Segments has a training loss of 0.97152


In [None]:
train_test_diff2 = {key: all_train_loss_2[key] - test_loss_2[key] for key in set(all_train_loss_2) & set(test_loss_2)}

train_test_diff2

{'1 Customer Segments': 0.00021195411682128906,
 '2 Customer Segments': -0.0006042718887329102,
 '10 Customer Segments': -0.0008310079574584961,
 '1000 Customer Segments': 0.00988858938217163,
 '100 Customer Segments': 0.013848483562469482}

In [None]:
final_model = min((k for k in test_loss_2), key=test_loss_2.get)

print(final_model)

1000 Customer Segments


###Question Set #3

####Creating DataLoaders for Each Batch Size

In [None]:
batch_sizes = [1, 10, 100, 1000]
train_val_loaders = {b: DataLoader(TensorDataset(X_all_train, y_all_train), batch_size=b, shuffle=True) for b in batch_sizes}
test_loaders = {b: DataLoader(TensorDataset(X_test, y_test), batch_size=b, shuffle=False) for b in batch_sizes}

####Training Model for Each Batch Size(3a)

In [None]:
input_size = 7  # Number of features
output_size = 3  # Number of choices
N = 100  # Number of neurons
model_losses = {}
model_params = {}

for b in batch_sizes:
    model = MixedLogitModel(input_size, N, output_size)
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss()
    losses = []
    epoch_avg_losses = []

    print(f"Training with Batch Size: {b}")
    for epoch in range(100):  # Adjust the number of epochs as needed
        epoch_loss = []
        for data, target in train_val_loaders[b]:
            target = target.float()
            model.train()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            epoch_loss.append(loss.item())
        avg_loss = np.mean(epoch_loss)
        losses.extend(epoch_loss)
        epoch_avg_losses.append(avg_loss)
        if epoch == (num_epochs-1):
          final_loss = avg_loss
          model_losses[b] = avg_loss
          model_params[b] = {name: param.data for name, param in model.named_parameters()}
          print(f"Final Training Loss for Batch Size {b}: {avg_loss:.4f}")

Training with Batch Size: 1
Final Training Loss for Batch Size 1: 0.9665
Training with Batch Size: 10
Final Training Loss for Batch Size 10: 0.9837
Training with Batch Size: 100
Final Training Loss for Batch Size 100: 1.0373
Training with Batch Size: 1000
Final Training Loss for Batch Size 1000: 1.0943


####Evaluate Model Generalization

In [None]:
test_losses = {}
for b in batch_sizes:
    model = MixedLogitModel(input_size, N, output_size)
    model.load_state_dict(model_params[b])  # Load trained parameters
    criterion = nn.CrossEntropyLoss()
    test_loss = []

    with torch.no_grad():
        for data, target in test_loaders[b]:
            target = target.float()
            output = model(data)
            loss = criterion(output, target)
            test_loss.append(loss.item())
        if epoch == (num_epochs-1):
          test_losses[b] = loss.item()
          print(f"Testing Loss for Batch Size {b}: {loss.item():.4f}")


Testing Loss for Batch Size 1: 0.5685
Testing Loss for Batch Size 10: 1.0405
Testing Loss for Batch Size 100: 1.0527
Testing Loss for Batch Size 1000: 1.0952


####Determine best Batch Size(3b)

In [None]:
loss_gap_by_batch = {}
best_batch = None
smallest_loss_gap = float('inf')

for b in batch_sizes:
    model = MixedLogitModel(input_size, N, output_size)
    model.load_state_dict(model_params[b])  # Load trained parameters
    criterion = nn.CrossEntropyLoss()

    # Calculate In-Sample (Training) Loss
    in_sample_loss = []
    with torch.no_grad():
        for data, target in train_val_loaders[b]:
            target = target.float()
            output = model(data)
            loss = criterion(output, target)
            in_sample_loss.append(loss.item())
    avg_in_sample_loss = np.mean(in_sample_loss)

    # Calculate Out-of-Sample (Testing) Loss
    out_of_sample_loss = []
    with torch.no_grad():
        for data, target in test_loaders[b]:
            target = target.float()
            output = model(data)
            loss = criterion(output, target)
            out_of_sample_loss.append(loss.item())
    avg_out_of_sample_loss = np.mean(out_of_sample_loss)

    # Calculate and store the gap
    loss_gap = abs(avg_in_sample_loss - avg_out_of_sample_loss)
    loss_gap_by_batch[b] = loss_gap

    print(f"Batch Size: {b}, In-Sample Loss: {avg_in_sample_loss:.4f}, Out-of-Sample Loss: {avg_out_of_sample_loss:.4f}, Loss Gap: {loss_gap:.4f}")

    # Update the best batch size with the smallest loss gap
    if loss_gap < smallest_loss_gap:
        smallest_loss_gap = loss_gap
        best_batch = b

# Final output
print(f"Best Batch Size for smallest gap between in-sample and out-of-sample losses: {best_batch} with a Loss Gap of: {smallest_loss_gap:.4f}")


Batch Size: 1, In-Sample Loss: 0.9653, Out-of-Sample Loss: 0.9567, Loss Gap: 0.0086
Batch Size: 10, In-Sample Loss: 0.9836, Out-of-Sample Loss: 0.9725, Loss Gap: 0.0110
Batch Size: 100, In-Sample Loss: 1.0371, Out-of-Sample Loss: 1.0332, Loss Gap: 0.0040
Batch Size: 1000, In-Sample Loss: 1.0942, Out-of-Sample Loss: 1.0952, Loss Gap: 0.0010
Best Batch Size for smallest gap between in-sample and out-of-sample losses: 1000 with a Loss Gap of: 0.0010


####Testing Accuracy(3c)

In [None]:
accuracy_by_batch = {}

for b in batch_sizes:
    correct = 0
    total = 0
    model = MixedLogitModel(input_size, N, output_size)
    model.load_state_dict(model_params[b])
    model.eval()  # Ensure the model is in evaluation mode

    with torch.no_grad():
        for data, target in test_loaders[b]:
            if target.ndim > 1 and target.shape[1] > 1:
                target = torch.argmax(target, dim=1)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += data.size(0)  # Ensure total is incremented by the number of samples in the batch
            correct += (predicted == target).sum().item()

    if total > 0:  # Safeguard against division by zero
        accuracy = 100 * correct / total
        accuracy_by_batch[b] = accuracy
        print(f'Accuracy of the model with batch size {b}: {accuracy:.2f}%')
    else:
        print(f"No data was processed for batch size {b}. Unable to calculate accuracy.")


Accuracy of the model with batch size 1: 58.06%
Accuracy of the model with batch size 10: 56.82%
Accuracy of the model with batch size 100: 51.24%
Accuracy of the model with batch size 1000: 41.74%


In [None]:
best_accuracy_batch = max(accuracy_by_batch, key=accuracy_by_batch.get)
print(f"Best performing batch size: {best_accuracy_batch} with an accuracy of {accuracy_by_batch[best_accuracy_batch]:.2f}%")


Best performing batch size: 1 with an accuracy of 58.06%
