In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import math
import gurobipy as gp
from gurobipy import GRB
import itertools
from itertools import combinations
from itertools import permutations
from random import choice
import json
import cvxpy as cp
from tkinter import _flatten
import copy
import time
import scipy.stats as stats

In [2]:
np.random.seed(1)
random.seed(1)

In [3]:
raw_jd_choice = pd.read_excel('data_processing/choices.xlsm')  
jd_offertimes = raw_jd_choice.groupby('clickset')['clicknum'].sum()[raw_jd_choice.clickset.unique()]
inc_prod_num = raw_jd_choice['clickset'].value_counts()[raw_jd_choice.clickset.unique()]
assortment_info_df = pd.DataFrame({'assortments':raw_jd_choice.clickset.unique(),'offer_times':jd_offertimes,'includ_prod_num':inc_prod_num})

# extended assortments with outside option 
# transfer to list
clickset = raw_jd_choice['clickset']
clickset_list = []
for cset in clickset:
    num_lst = json.loads(cset)
    #clickset_list.append(num_lst+[0])
    clickset_list.append([0]+num_lst)
raw_jd_choice['clickset'] = clickset_list

n = 9 # product size top 8 products and outside option
print('there are {} different products'.format(n))
jd_collection = []
for cset in clickset_list:
    if cset not in jd_collection:
        jd_collection.append(cset)
print('there are {} different assortments'.format(len(jd_collection)))
print('check offertimes',len(jd_offertimes))

there are 9 different products
there are 134 different assortments
check offertimes 134


In [4]:
def generate_negatively_correlated_covariance_matrix(n):
    # Generate a random correlation matrix with negative correlations
    correlation_matrix = np.random.uniform(low=-0.9, high=-0.1, size=(n, n))
    np.fill_diagonal(correlation_matrix, 1)  # Set diagonal elements to 1
    correlation_matrix = (correlation_matrix + correlation_matrix.T) / 2  # Ensure symmetry

    # Ensure the correlation matrix is valid for Cholesky decomposition
    cholesky_success = False
    while not cholesky_success:
        try:
            # Perform Cholesky decomposition
            cholesky_matrix = np.linalg.cholesky(correlation_matrix)
            cholesky_success = True
        except np.linalg.LinAlgError:
            # If Cholesky decomposition fails, adjust the correlation matrix slightly
            correlation_matrix += np.eye(n) * 0.01  # Adjust diagonal elements slightly

    # Generate random standard deviations
    #standard_deviations = np.random.uniform(0.5, 2, size=n)
    
    variance_outsideopt = np.random.uniform(30,40)
    variance_products = np.random.uniform(0.5,2,size=n-1)
    standard_deviations = np.hstack((variance_outsideopt,variance_products))
    # Construct diagonal matrix from standard deviations
    diagonal_matrix = np.diag(standard_deviations)
    #diagonal_matrix = np.eye(n)

    # Calculate the covariance matrix using Cholesky decomposition and diagonal matrix
    covariance_matrix = cholesky_matrix @ diagonal_matrix @ cholesky_matrix.T

    return covariance_matrix



In [5]:
def is_positive_semidefinite(matrix):
    """
    Check if a matrix is positive semidefinite.
    
    Args:
        matrix (numpy.ndarray): The matrix to check.
        
    Returns:
        bool: True if the matrix is positive semidefinite, False otherwise.
    """
    # Check if the matrix is symmetric
    if not np.allclose(matrix, matrix.T):
        return False
    
    # Check if all eigenvalues are non-negative
    eigvals, _ = np.linalg.eig(matrix)
    return np.all(eigvals >= 0)


In [6]:
def collection_distribution_prob_neg(n,collection):
    # generate mean 
    #mu = np.random.uniform(-2,2,n)
    mu_0 = np.random.uniform(6,10)
    mu_1n = np.random.uniform(-2,2,n-1)
    mu = np.hstack((mu_0,mu_1n))
    # generate covariance matrix with positive correlation
    neg_cov = generate_negatively_correlated_covariance_matrix(n)
    if is_positive_semidefinite(neg_cov): 
        sample_size = 10000
        utility_samples = np.random.multivariate_normal(mu,neg_cov,size=sample_size)
    else:
        print('Negative correlation matrix error')
    
    collection_distribution = np.zeros((n,len(collection)))
    for i in range(len(collection)):
        curr_assortment = collection[i]
        curr_population = [[] for _ in range(sample_size)] 
        
        
        for j in range(sample_size):
            for k in curr_assortment:
                curr_population[j].append(utility_samples[j][k])
                # each sub list records only the utilities of products in the current assortment
                
        frequency = [0]*len(curr_assortment)
        for j in range(sample_size):
            max_index = np.argmax(np.array(curr_population[j]))
            # product is chosen iff the utility of the product is max in the assortment
            frequency[max_index] = frequency[max_index] +1 
            # update the frequency of product to be chosen
            
        prob = np.array(frequency)/np.sum(frequency)
        for j in range(len(curr_assortment)):
            collection_distribution[curr_assortment[j]][i] = prob[j]
            
    return collection_distribution

In [7]:
def whole_instance_generation(n,whole_collection,whole_offertimes):
    ## generate LCMNL instances based on the assortment collection infomation
    # step 1: randomly generate LCMNL parameters
    # 1.1 the number of classes in LCMNL, the number of classes is between 10 and 15, both endpoints are included
    ''' num_classes = 20
    # 1.2 randomly generate weights of all classes 
    #weight_pre = np.random.exponential(1, num_classes)
    weights = np.array([1/ num_classes for _ in range(num_classes)])
    # 1.3 randomly generate parameters of each mnl
    parameters_v = np.random.uniform(-30, 30, size=(num_classes,n)) '''
    ''' parameters_v = np.zeros((num_classes,n))
    for i in range(num_classes):
        parameters_v[i] = np.random.exponential(1, n) '''
    #np.random.uniform(-30, 30, size=(num_classes,n))
    # step 2: generate LCMNL instance with the above parameters of LCMNL
    true_instance = collection_distribution_prob_neg(n,whole_collection)
    # step 3: generate multinomial samples based on lcmnl instance and the emprical assortment offertimes
    purchased_samples = []
    for i in range(len(whole_collection)):
        sample_i = np.random.multinomial(whole_offertimes[i], true_instance[:,i])
        purchased_samples.append(sample_i)
    # step 4: compute the simulated collection probabilities
    whole_choice_collection = np.zeros((n,len(whole_collection)))
    for i in range(len(whole_collection)):
        whole_choice_collection[:,i] = np.array([k/np.sum(purchased_samples[i]) for k in purchased_samples[i]])
        
    return whole_choice_collection,purchased_samples

In [8]:
def filter_collection_offertimes(whole_collection,whole_offertimes,least_offetimes):
    
    collection = []
    offertimes = []
    assortment_index = []
    
    for i in range(len(whole_offertimes)):
        if whole_offertimes[i]>=least_offetimes:
            collection.append(whole_collection[i])
            offertimes.append(whole_offertimes[i])
            assortment_index.append(i)
            
    return collection,offertimes,assortment_index

In [9]:
pred_offer_times_list = [20,30,40,50,60]
pred_test_collection_size = [5,4,3,3,2]
pred_train_collection_size = [24,20,16,12,11]
pred_instance_size = [50,50,50,50,50]
price = np.array([0,1.041,0.456,0.391,1.657,1.174,0.474,0.67,1.522])

In [10]:

all_full_collections = []
all_full_offertimes = []
all_full_assortment_index = []
full_collection_size = []
for i in range(len(pred_offer_times_list)):
    collection, offertimes, assortment_index = filter_collection_offertimes(jd_collection,jd_offertimes,pred_offer_times_list[i])
    all_full_collections.append(collection)
    all_full_offertimes.append(offertimes)
    all_full_assortment_index.append(assortment_index)
    full_collection_size.append(len(assortment_index))
    print("number of assortments with offertimes {} is {}".format(pred_offer_times_list[i], len(assortment_index)))

number of assortments with offertimes 20 is 29
number of assortments with offertimes 30 is 24
number of assortments with offertimes 40 is 19
number of assortments with offertimes 50 is 15
number of assortments with offertimes 60 is 13


In [11]:
# check if the collection are nested
def check_subsets(lists):
    # Convert all lists to sets
    sets = [set(lst) for lst in lists]
    
    # Check subsets
    for i in range(len(sets) - 1):
        if not sets[i + 1].issubset(sets[i]):
            return False
    return True

# Check and print the result
result = check_subsets(all_full_assortment_index)
print("All subsequent lists are subsets of the previous one:", result)

All subsequent lists are subsets of the previous one: True


In [12]:
def filter_probability_frequency2(whole_choice_collection,purchase_samples,assortment_index):
    
    choice_collection = np.zeros((whole_choice_collection.shape[0],len(assortment_index)))
    frequency_collection = np.zeros((whole_choice_collection.shape[0],len(assortment_index)))
    for i in range(len(assortment_index)):
        choice_collection[:,i] = whole_choice_collection[:,assortment_index[i]]
        frequency_collection[:,i] = purchase_samples[assortment_index[i]]
        
    return choice_collection,frequency_collection

In [13]:
# generate instances that satisfy for assortment offertimes is >=20
## generate 50 random full instances 
# the observations with at least 20 times are >= 1e-3
pred_full_instance = []
pred_full_samples = []
#collection_20,offertimes_20,assortment_index = filter_collection_offertimes(jd_collection,jd_offertimes,pred_offer_times_list[0])

for j in range(pred_instance_size[0]):
    print(f'generating {j} th instance')
    # full instance generation 
    curr_whole_instance,curr_purchase_samples = whole_instance_generation(n,jd_collection,jd_offertimes)
    # filter choice probability and purchase frequency of each produt in each assortment
    curr_choice_collection,curr_frequency = filter_probability_frequency2(curr_whole_instance,curr_purchase_samples,all_full_assortment_index[0])

    pred_full_instance.append(curr_whole_instance)
    pred_full_samples.append(curr_purchase_samples)
    
    ''' condition = False
    for x in range(len(all_full_collections[0])):
        for y in all_full_collections[0][x]:
            if curr_choice_collection[y][x]<1e-11:
                condition = True
    while condition:
        curr_whole_instance,curr_purchase_samples = whole_instance_generation(jd_collection,jd_offertimes)
        # filter choice probability and purchase frequency of each produt in each assortment
        curr_choice_collection,curr_frequency = filter_probability_frequency2(curr_whole_instance,curr_purchase_samples,all_full_assortment_index[0])

        condition = False
        for x in range(len(all_full_collections[0])):
            for y in all_full_collections[0][x]:
                if curr_choice_collection[y][x] < 1e-11:
                    condition = True
    if condition == True:
        print('instance_generation error')
    else:

            
        pred_full_instance.append(curr_whole_instance)
        pred_full_samples.append(curr_purchase_samples) '''

generating 0 th instance
generating 1 th instance
generating 2 th instance
generating 3 th instance
generating 4 th instance
generating 5 th instance
generating 6 th instance
generating 7 th instance
generating 8 th instance
generating 9 th instance
generating 10 th instance
generating 11 th instance
generating 12 th instance
generating 13 th instance
generating 14 th instance
generating 15 th instance
generating 16 th instance
generating 17 th instance
generating 18 th instance
generating 19 th instance
generating 20 th instance
generating 21 th instance
generating 22 th instance
generating 23 th instance
generating 24 th instance
generating 25 th instance
generating 26 th instance
generating 27 th instance
generating 28 th instance
generating 29 th instance
generating 30 th instance
generating 31 th instance
generating 32 th instance
generating 33 th instance
generating 34 th instance
generating 35 th instance
generating 36 th instance
generating 37 th instance
generating 38 th insta

In [14]:
def compute_lb_ub_w_ci(frequency_collection,choice_collection,target_z_score):
    
    stardard_error = np.zeros(frequency_collection.shape)
    for i in range(stardard_error.shape[0]):
        for j in range(stardard_error.shape[1]):
            if frequency_collection[i][j]!=0:
                stardard_error[i][j] = np.sqrt((1-choice_collection[i][j])/frequency_collection[i][j])
                
    lb = np.zeros(frequency_collection.shape)
    ub = np.zeros(frequency_collection.shape)
    for i in range(lb.shape[0]):
        for j in range(lb.shape[1]):
            if frequency_collection[i][j]!=0:
                lb[i][j] = choice_collection[i][j] * (1-target_z_score*stardard_error[i][j]) 
                ub[i][j] = choice_collection[i][j] * (1+target_z_score*stardard_error[i][j])
    
    return lb,ub

In [15]:
# full instance generation 
all_full_instances = []
all_full_lb = []
all_full_ub = []

# pre-determined confidence interval 
confidence_level = 0.995
alpha = 1 - confidence_level
# Find z-score for the given confidence level
target_z_score = stats.norm.ppf(1 - alpha / 2)  

for i in range(len(pred_offer_times_list)):
    
    full_instances = []
    full_lbs = []
    full_ubs = []
    print(f'check assortment index for offertimes {pred_offer_times_list[i]}')
    for j in range(pred_instance_size[i]):
        # filter choice probability and purchase frequency of each produt in each assortment
        curr_choice_collection,curr_frequency = filter_probability_frequency2(pred_full_instance[j],pred_full_samples[j],all_full_assortment_index[i])
        # re-check if all choice probability are >= 1e-3
        condition = False
        for x in range(len(all_full_collections[i])):
            for y in all_full_collections[i][x]:
                if curr_choice_collection[y][x] < 1e-3:
                    condition = True
        if condition == True:
            print('instance_generation error')
        
        # compute the collection of lower bound and upper bound l_ij and u_ij
        curr_lb,curr_ub = compute_lb_ub_w_ci(curr_frequency,curr_choice_collection,target_z_score)
        
        full_instances.append(curr_choice_collection)
        full_lbs.append(curr_lb)
        full_ubs.append(curr_ub)
    
    all_full_instances.append(full_instances)
    all_full_lb.append(full_lbs)
    all_full_ub.append(full_ubs)
    



check assortment index for offertimes 20
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation err

In [16]:
# train-test split
all_train_instances = []
all_train_collection = []
all_test_instance = []
all_test_collection = []
all_train_lb = []
all_train_ub = []
all_train_offertimes = []

for i in range(len(pred_instance_size)):
    train_instances_collection = []
    train_collection = []
    train_lb_collection = []
    train_ub_collection = []
    train_offertimes_collection = []
    
    test_instance_collection = []
    test_collection = []
    
    test_index_collection = []
    
    
    for j in range(pred_instance_size[i]):
        
        curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
        curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
        curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
        
        curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
        
        curr_train_collection = []
        curr_test_collection = []
        curr_offertimes_lst = []
        
        while(len(curr_train_collection)==0):
            
            chosen_test_idx = random.sample(range(len(all_full_collections[i])),pred_test_collection_size[i])
            chosen_test_idx.sort()
            
            chosen_train_idx = [x for x in range(len(all_full_collections[i])) if x not in chosen_test_idx ]
    
            for k in range(len(chosen_train_idx)):
                curr_train_collection.append(all_full_collections[i][chosen_train_idx[k]])
                curr_train_instance[:,k] = all_full_instances[i][j][:,chosen_train_idx[k]]
                curr_train_lb[:,k] = all_full_lb[i][j][:,chosen_train_idx[k]]
                curr_train_ub[:,k] = all_full_ub[i][j][:,chosen_train_idx[k]]
                
                curr_offertimes_lst.append(all_full_offertimes[i][chosen_train_idx[k]])
                
            for k in range(len(chosen_test_idx)):
                curr_test_collection.append(all_full_collections[i][chosen_test_idx[k]])
                curr_test_instance[:,k] = all_full_instances[i][j][:,chosen_test_idx[k]]
                
            ## check if the training instance includes all the testing products in the test instance
            # ensure each test product has been offered at least  once
            new_lst_train = sum(curr_train_collection,[])
            unique_numbers_train = list(set(new_lst_train)) 
            
            new_lst_test = sum(curr_test_collection,[])
            unique_numbers_test = list(set(new_lst_test)) 
            
            for num in unique_numbers_test:
                if num not in unique_numbers_train:
                    #print('the unique train numbers',unique_numbers_train)
                    #print('the unique test numbers',unique_numbers_test)
                    curr_train_collection = []
                    curr_test_collection = []
                    curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
                    curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
                    curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
                    curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
                    curr_offertimes_lst = []
            if chosen_test_idx in test_index_collection:
                #print('check if exisits repeated indexes ')
                curr_train_collection = []
                curr_test_collection = []
                curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
                curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
                curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
                curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
                curr_offertimes_lst = []
            elif len(curr_test_collection)>0:
                test_index_collection.append(chosen_test_idx)
        #print('chosen training indexes',chosen_train_idx)
        #print('chosen testing indexes',chosen_test_idx)
        #print('curr train collection\n',curr_train_collection)
        #print('curr test collection\n',curr_test_collection)
        df_train = pd.DataFrame(curr_train_instance)
        df_test = pd.DataFrame(curr_test_instance)
        df_offertimes = pd.DataFrame(curr_offertimes_lst)
        #print('curr train instance\n',df_train)
        #print('curr test instance\n',df_test)
        
        ## output the training and testing instance
        ''' df_train.to_csv('instances/train_instances/train_'+str(pred_offer_times_list[i])+'/train_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_test.to_csv('instances/test_instances/test_'+str(pred_offer_times_list[i])+'/test_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_offertimes.to_csv('instances/train_offertimes/train_'+str(pred_offer_times_list[i])+'/train_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv') '''
        
        train_instances_collection.append(curr_train_instance)
        train_collection.append(curr_train_collection)
        train_lb_collection.append(curr_train_lb)
        train_ub_collection.append(curr_train_ub)
        train_offertimes_collection.append(curr_offertimes_lst)
                
        test_instance_collection.append(curr_test_instance)
        test_collection.append(curr_test_collection)
        
    all_train_instances.append(train_instances_collection)
    all_train_collection.append(train_collection)
    all_train_lb.append(train_lb_collection)
    all_train_ub.append(train_ub_collection)
    all_train_offertimes.append(train_offertimes_collection)
    all_test_instance.append(test_instance_collection)
    all_test_collection.append(test_collection)    
        

In [17]:
pd.DataFrame(all_train_instances[0][0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.713816,0.75641,0.75122,0.764444,0.758882,0.769231,0.684211,0.784314,0.695652,0.733871,...,0.6,0.769231,0.707317,0.708333,0.625,0.844444,0.869565,0.857143,0.8,0.764706
1,0.286184,0.0,0.0,0.0,0.0,0.0,0.210526,0.196078,0.282609,0.177419,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.24359,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.24878,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,...,0.25,0.038462,0.146341,0.25,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.235556,0.0,0.0,0.0,0.019608,0.0,0.0,...,0.15,0.0,0.0,0.0,0.03125,0.044444,0.043478,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,...,0.0,0.192308,0.0,0.0,0.34375,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.241118,0.0,0.0,0.0,0.0,0.08871,...,0.0,0.0,0.146341,0.0,0.0,0.111111,0.0,0.0,0.114286,0.0
7,0.0,0.0,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.086957,0.0,0.085714,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.142857,0.0,0.235294


In [18]:
pd.DataFrame(all_test_instance[0][0])

Unnamed: 0,0,1,2,3,4
0,0.772567,0.75,0.746269,0.718593,0.787879
1,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0
3,0.0,0.125,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0
5,0.227433,0.0,0.119403,0.135678,0.0
6,0.0,0.0,0.134328,0.0,0.030303
7,0.0,0.125,0.0,0.0,0.0
8,0.0,0.0,0.0,0.145729,0.181818


In [19]:
pd.DataFrame(all_train_lb[0][0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.671804,0.619981,0.716619,0.708293,0.729904,0.718617,0.511387,0.622648,0.505216,0.622469,...,0.382568,0.537289,0.592157,0.447895,0.384769,0.692785,0.672445,0.671514,0.61021,0.712838
1,0.244173,0.0,0.0,0.0,0.0,0.0,0.05895,0.040021,0.096254,0.081119,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.10716,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.21418,0.0,0.0,0.0,-0.00884,0.0,0.0,0.0,...,0.057816,-0.067405,0.056883,0.001891,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.179404,0.0,0.0,0.0,-0.03489,0.0,0.0,...,-0.008479,0.0,0.0,0.0,-0.055088,-0.041789,-0.075884,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.038616,0.0,...,0.0,-0.024654,0.0,0.0,0.108067,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.21214,0.0,0.0,0.0,0.0,0.017037,...,0.0,0.0,0.056883,0.0,0.0,-0.020394,0.0,0.0,-0.036672,0.0
7,0.0,0.0,0.0,0.0,0.0,0.180155,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.077966,0.0,-0.047111,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.07283,0.0,0.0,0.0,-0.042772,0.0,0.183427


In [20]:
pd.DataFrame(all_train_ub[0][0])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,14,15,16,17,18,19,20,21,22,23
0,0.755827,0.89284,0.78582,0.820596,0.78786,0.819845,0.857034,0.94598,0.886089,0.845273,...,0.817432,1.001172,0.822477,0.968771,0.865231,0.996104,1.066686,1.042772,0.98979,0.816573
1,0.328196,0.0,0.0,0.0,0.0,0.0,0.362103,0.352136,0.468963,0.273719,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.380019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.283381,0.0,0.0,0.0,0.219366,0.0,0.0,0.0,...,0.442184,0.144328,0.2358,0.498109,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.291707,0.0,0.0,0.0,0.074105,0.0,0.0,...,0.308479,0.0,0.0,0.0,0.117588,0.130678,0.162841,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.082095,0.0,...,0.0,0.409269,0.0,0.0,0.579433,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.270096,0.0,0.0,0.0,0.0,0.160382,...,0.0,0.0,0.2358,0.0,0.0,0.242617,0.0,0.0,0.265244,0.0
7,0.0,0.0,0.0,0.0,0.0,0.281383,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.251879,0.0,0.21854,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.156164,0.0,0.0,0.0,0.328486,0.0,0.287162


In [21]:
all_test_collection[0][0]

[[0, 5], [0, 3, 7], [0, 5, 6], [0, 5, 8], [0, 6, 8]]

In [22]:

def find_product_notin_assortment(collection,n):
    notin_collection = []
    for i in range(len(collection)):
        notin_assortment = []
        for j in range(n):
            if j not in collection[i]:
                notin_assortment.append(j)
        notin_collection.append(notin_assortment)
    return notin_collection

In [23]:
def robust_mdm_prediction_w_ci(lb,ub,data, collection, unseen_assortment, price,produts_notin_collection):
    
    eps = 0.01
    l = len(unseen_assortment)
    n, m = data.shape

    prod_in_ass = [[] for _ in range(n)]
    for i, assort in enumerate(collection):
        for j in assort:
            prod_in_ass[j].append(i)

    # Create a Gurobi model
    model = gp.Model('robust_mdm_ci')
    model.setParam('OutputFlag', 0)

    # Define variables
    x = model.addVars(l, vtype=GRB.CONTINUOUS, name="x", lb=0) # choice probabilities of unseen assortment
    y = model.addVars(n, m, vtype=gp.GRB.CONTINUOUS, lb=0, name="y") # define for the in sample choice probability
    delta_plus = model.addVars(m, vtype=GRB.BINARY, name="delta_plus") # delta_{A,S}
    delta_minu = model.addVars(m, vtype=GRB.BINARY, name="delta_minu") # delta_{S,A}
    eta = model.addVar(lb=0, name="eta") # lambda_A 
    lam = model.addVars(m, lb=0, vtype=GRB.CONTINUOUS, name="lam") # lambda_S for all S in the collection
    delta = model.addVars(m, m, vtype=GRB.BINARY, name="delta") # delta_{S,T}

    # Normalization constraints of unseen assortment and bound on the lambda(A)
    model.addConstr(gp.quicksum(x[i] for i in range(l)) == 1, name="Normalization")
    model.addConstr(eta <= 1, name="Eta_Bound")
    
    # Add normalization constraints
    # Bounds for lambda
    for j in range(m):
        model.addConstr(sum(y[i, j] for i in range(n)) == 1, name=f"normalization_{j}")
        model.addConstr(lam[j] <= 1, name=f"Lambda_Bound_{j}")
    
    # in-sample MDM consistency
    for i in range(len(prod_in_ass)):
        if len(prod_in_ass[i]) > 0:
            for j in prod_in_ass[i]:
                for k in prod_in_ass[i]:
                    if j != k:
                        model.addConstr(lam[j] - lam[k] + delta[j, k] >= 0, name=f"Contr1_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(lam[j] - lam[k] - 1 + delta[j, k] + eps * delta[j, k] <= 0,
                                        name=f"Contr2_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - delta[j, k] + 1 >= 0,
                                        name=f"Contr3_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - 1 + delta[k, j] <= 0,
                                        name=f"Contr4_pro_{i}_aspair_{i}_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] + delta[j, k] + delta[k, j] >= 0,
                                        name=f"Contr5_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - delta[j, k] - delta[k, j] <= 0,
                                        name=f"Contr6_pro_{i}_aspair_{j}_{k}")
    
    
    # Out-of-sample MDM consistency
    for k in range(len(unseen_assortment)):
        for i in prod_in_ass[unseen_assortment[k]]:
            model.addConstr(eta - lam[i] + delta_plus[i] >= 0, name=f"Cons1_{k}_{i}")
            model.addConstr(eta - lam[i] - 1 + delta_plus[i] + eps * delta_plus[i] <= 0, name=f"Cons2_{k}_{i}")
            
            model.addConstr(lam[i] - eta + delta_minu[i] >= 0, name=f"Cons3_{k}_{i}")
            model.addConstr(lam[i] - eta - 1 + delta_minu[i] + eps * delta_minu[i] <= 0, name=f"Cons4_{k}_{i}")
            
            model.addConstr(x[k] - y[unseen_assortment[k],i] - delta_plus[i] + 1 >= 0, name=f"Cons5_{k}_{i}")
            model.addConstr(x[k] - y[unseen_assortment[k],i]  + delta_minu[i] - 1 <= 0, name=f"Cons6_{k}_{i}")
            
            model.addConstr(x[k] - y[unseen_assortment[k],i]  + delta_plus[i] + delta_minu[i] >= 0,
                            name=f"Cons7_{k}_{i}")
            model.addConstr(x[k] - y[unseen_assortment[k],i]  - delta_plus[i] - delta_minu[i] <= 0,
                            name=f"Cons8_{k}_{i}")

    # In-sample MDM choice probability lies in the confidence interval
    for i in range(len(collection)):
        # normalization constraint for each assortment. 
        #model.addConstr(gp.quicksum(x[k, i] for k in collection[i]) == 1)
        for j in collection[i]:
            # constraints for the lower bound and upper bound values
            model.addConstr(y[j,i]>= lb[j,i],name=f"lb_product{j}_assortment_{i}")
            model.addConstr(y[j,i]<= ub[j,i],name=f"ub_product{j}_assortment_{i}")

        for k in produts_notin_collection[i]:
            # constraints for products that are not in the assortments
            model.addConstr(y[k,i] == 0,name=f"prob_product{k}_assortment_{i}")

    # Objective
    obj = gp.quicksum(price[unseen_assortment[i]] * x[i] for i in range(l))
    model.setObjective(obj, GRB.MINIMIZE)

    # Optimize the model
    model.optimize()

    # Access the optimal solution
    if model.status == gp.GRB.OPTIMAL:
        #model.write("robust_mdm_ci.sol")
        #model.write("robust_mdm_ci.lp")
        print(f"MDM robust prediction revenue of assortment {unseen_assortment} is : {model.objVal}") 
        y_values = [[y[i, j].X for j in range(m)] for i in range(n)]
        y_values = np.array(y_values) 
        return [model.objVal, [x[i].x for i in range(l)], [lam[i].x for i in range(m)], eta.x,y_values, model.Runtime]
    else:
        print(f'error occurred for MDM robust prediction revenue of assortment {unseen_assortment} .')
        print("Gurobi optimization status:",model.status)
        return [-1000]

    

In [24]:
def best_case_mdm_prediction_w_ci(lb,ub,data, collection, unseen_assortment, price,produts_notin_collection):
    
    eps = 0.01
    l = len(unseen_assortment)
    n, m = data.shape

    prod_in_ass = [[] for _ in range(n)]
    for i, assort in enumerate(collection):
        for j in assort:
            prod_in_ass[j].append(i)

    # Create a Gurobi model
    model = gp.Model('best_case_mdm_ci')
    model.setParam('OutputFlag', 0)

    # Define variables
    x = model.addVars(l, vtype=GRB.CONTINUOUS, name="x", lb=0) # choice probabilities of unseen assortment
    y = model.addVars(n, m, vtype=gp.GRB.CONTINUOUS, lb=0, name="y") # define for the in sample choice probability
    delta_plus = model.addVars(m, vtype=GRB.BINARY, name="delta_plus") # delta_{A,S}
    delta_minu = model.addVars(m, vtype=GRB.BINARY, name="delta_minu") # delta_{S,A}
    eta = model.addVar(lb=0, name="eta") # lambda_A 
    lam = model.addVars(m, lb=0, vtype=GRB.CONTINUOUS, name="lam") # lambda_S for all S in the collection
    delta = model.addVars(m, m, vtype=GRB.BINARY, name="delta") # delta_{S,T}

    # Normalization constraints of unseen assortment and bound on the lambda(A)
    model.addConstr(gp.quicksum(x[i] for i in range(l)) == 1, name="Normalization")
    model.addConstr(eta <= 1, name="Eta_Bound")
    
    # Add normalization constraints
    # Bounds for lambda
    for j in range(m):
        model.addConstr(sum(y[i, j] for i in range(n)) == 1, name=f"normalization_{j}")
        model.addConstr(lam[j] <= 1, name=f"Lambda_Bound_{j}")
    
    # in-sample MDM consistency
    for i in range(len(prod_in_ass)):
        if len(prod_in_ass[i]) > 0:
            for j in prod_in_ass[i]:
                for k in prod_in_ass[i]:
                    if j != k:
                        model.addConstr(lam[j] - lam[k] + delta[j, k] >= 0, name=f"Contr1_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(lam[j] - lam[k] - 1 + delta[j, k] + eps * delta[j, k] <= 0,
                                        name=f"Contr2_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - delta[j, k] + 1 >= 0,
                                        name=f"Contr3_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - 1 + delta[k, j] <= 0,
                                        name=f"Contr4_pro_{i}_aspair_{i}_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] + delta[j, k] + delta[k, j] >= 0,
                                        name=f"Contr5_pro_{i}_aspair_{j}_{k}")
                        model.addConstr(y[i, j] - y[i, k] - delta[j, k] - delta[k, j] <= 0,
                                        name=f"Contr6_pro_{i}_aspair_{j}_{k}")
    
    
    # Out-of-sample MDM consistency
    for k in range(len(unseen_assortment)):
        for i in prod_in_ass[unseen_assortment[k]]:
            model.addConstr(eta - lam[i] + delta_plus[i] >= 0, name=f"Cons1_{k}_{i}")
            model.addConstr(eta - lam[i] - 1 + delta_plus[i] + eps * delta_plus[i] <= 0, name=f"Cons2_{k}_{i}")
            
            model.addConstr(lam[i] - eta + delta_minu[i] >= 0, name=f"Cons3_{k}_{i}")
            model.addConstr(lam[i] - eta - 1 + delta_minu[i] + eps * delta_minu[i] <= 0, name=f"Cons4_{k}_{i}")
            
            model.addConstr(x[k] - y[unseen_assortment[k],i] - delta_plus[i] + 1 >= 0, name=f"Cons5_{k}_{i}")
            model.addConstr(x[k] - y[unseen_assortment[k],i]  + delta_minu[i] - 1 <= 0, name=f"Cons6_{k}_{i}")
            
            model.addConstr(x[k] - y[unseen_assortment[k],i]  + delta_plus[i] + delta_minu[i] >= 0,
                            name=f"Cons7_{k}_{i}")
            model.addConstr(x[k] - y[unseen_assortment[k],i]  - delta_plus[i] - delta_minu[i] <= 0,
                            name=f"Cons8_{k}_{i}")

    # In-sample MDM choice probability lies in the confidence interval
    for i in range(len(collection)):
        # normalization constraint for each assortment. 
        #model.addConstr(gp.quicksum(x[k, i] for k in collection[i]) == 1)
        for j in collection[i]:
            # constraints for the lower bound and upper bound values
            model.addConstr(y[j,i]>= lb[j,i],name=f"lb_product{j}_assortment_{i}")
            model.addConstr(y[j,i]<= ub[j,i],name=f"ub_product{j}_assortment_{i}")

        for k in produts_notin_collection[i]:
            # constraints for products that are not in the assortments
            model.addConstr(y[k,i] == 0,name=f"prob_product{k}_assortment_{i}")


    # Objective
    obj = gp.quicksum(price[unseen_assortment[i]] * x[i] for i in range(l))
    model.setObjective(obj, GRB.MAXIMIZE)

    # Optimize the model
    model.optimize()

    # Access the optimal solution
    if model.status == gp.GRB.OPTIMAL:
        #model.write("robust_mdm_ci.sol")
        #model.write("robust_mdm_ci.lp")
        print(f"MDM best_case prediction revenue of assortment {unseen_assortment} is : {model.objVal}") 
        y_values = [[y[i, j].X for j in range(m)] for i in range(n)]
        y_values = np.array(y_values) 
        return [model.objVal, [x[i].x for i in range(l)], [lam[i].x for i in range(m)], eta.x,y_values, model.Runtime]
    else:
        print(f'error occurred for MDM best_case prediction revenue of assortment {unseen_assortment} .')
        print("Gurobi optimization status:",model.status)
        return [-1000]
    

In [25]:
def row_assortment_data(data,collection):
    pre_data = copy.deepcopy(data)
    
    for j in range(data.shape[0]):
        for i in range(len(collection)):
            if j not in collection[i]:
                pre_data[j][i] = -1
    
    return pre_data

def find_indexes_nonnegative_descending(arr):
    row_indexes_sorted_desc_nonnegative = []
    row_indexes_positive_same_value = []

    for row in arr:
        # Find indexes with non-negative values
        nonnegative_row = row[row >= 0]  # Consider only non-negative values
        unique_elements, unique_indexes = np.unique(nonnegative_row, return_index=True)
        sorted_indexes_desc = np.nonzero(row >= 0)[0][unique_indexes][np.argsort(-unique_elements)]

        row_indexes_sorted_desc_nonnegative.append(sorted_indexes_desc)

        # Keep all indexes of elements with positive and same values
        positive_values = unique_elements[unique_elements > 0]
        positive_same_value_indexes = [np.where(row == val)[0] for val in positive_values if np.count_nonzero(row == val) > 1]
        row_indexes_positive_same_value.append(positive_same_value_indexes)

    return row_indexes_sorted_desc_nonnegative, row_indexes_positive_same_value

In [26]:
def mdm_feasibility(data,ranking,equal):
      
    ub = 100

    model = gp.Model('mdm_feasibility')
    model.setParam('OutputFlag', 0)
    eps = model.addVar(name='eps')
    lam = model.addVars(data.shape[1],name = 'lam')
    
    model.addConstr(eps<=ub ) # just give an upper bound (+ve) for eps, o.w., the problem is unbounded
    
    #for i in range(ranking.shape[0]): # from each product 
    for i in range(len(ranking)): # from each product 
        for j in range(len(ranking[i])-1): # the ranking of assortment in product i 
            model.addConstr(lam [ranking[i][j+1]] - lam[ranking[i][j]]-  eps >=0)
    
    for i in range(len(equal)):
        if len(equal[i])>0:
            for j in range(len(equal[i][0])-1):
                model.addConstr(lam[equal[i][0][j]]-lam[equal[i][0][j+1]]==0)
    
    model.setObjective(eps,GRB.MAXIMIZE)
    model.optimize()
    
     # Access the optimal solution
    lam_values = [lam[i].x for i in range(data.shape[1])]
    lam_values = np.array(lam_values)

    if (model.objVal> 0):
        #model.write("mdmfeas_out.sol")
        #model.write("mdmfeas_model.lp") 
        return [1,model.Runtime,lam_values]
    else:
        #model.write("mdminfeas_model.lp")
        #return [0, used_time]
        return [0,model.Runtime,lam_values]
    

In [27]:
def robust_prediction_lp(lambda_S_lst,lambda_A,prod_in_ass,assortment_A,limit_prob,price):
    # robust_prediction_lp(curr_lam,lambda_A,prod_in_ass,assortment_A,curr_limit_prob,price)
    
    model = gp.Model('robust_rev_lp')
    model.setParam('OutputFlag', 0)
    x = model.addVars(len(assortment_A),vtype=GRB.CONTINUOUS, lb=0, name='x')
    
    model.addConstr(gp.quicksum(x[i] for i in range(len(assortment_A))) == 1, name="Normalization")
    
    ## MDM consistency
    for i in range(len(assortment_A)):
        for j in range(len(prod_in_ass[assortment_A[i]])):
            if lambda_A >= lambda_S_lst[prod_in_ass[assortment_A[i]][j]]:
                model.addConstr(x[i]<= limit_prob[ assortment_A[i]][prod_in_ass[assortment_A[i]][j]])
            if lambda_A <= lambda_S_lst[prod_in_ass[assortment_A[i]][j]]:
                model.addConstr(x[i]>= limit_prob[ assortment_A[i]][prod_in_ass[assortment_A[i]][j]])
    
    # Objective
    obj = gp.quicksum(price[assortment_A[i]] * x[i] for i in range(len(assortment_A)))
    model.setObjective(obj, GRB.MINIMIZE)

    # Optimize the model
    model.optimize()
    
    if model.Status == GRB.OPTIMAL :
        return [model.objVal, [x[i].x for i in range(len(assortment_A))], model.Runtime]
    else:
        return [100000,[-1 for i in range(len(assortment_A))], model.Runtime]
    
    

In [28]:
def best_case_prediction_lp(lambda_S_lst,lambda_A,prod_in_ass,assortment_A,limit_prob,price):
    
    model = gp.Model('best_case_rev_lp')
    model.setParam('OutputFlag', 0)
    x = model.addVars(len(assortment_A),vtype=GRB.CONTINUOUS, lb=0, name='x')
    
    model.addConstr(gp.quicksum(x[i] for i in range(len(assortment_A))) == 1, name="Normalization")
    
    ## MDM consistency
    for i in range(len(assortment_A)):
        for j in range(len(prod_in_ass[assortment_A[i]])):
            if lambda_A >= lambda_S_lst[prod_in_ass[assortment_A[i]][j]]:
                model.addConstr(x[i]<= limit_prob[ assortment_A[i]][prod_in_ass[assortment_A[i]][j]])
            if lambda_A <= lambda_S_lst[prod_in_ass[assortment_A[i]][j]]:
                model.addConstr(x[i]>= limit_prob[ assortment_A[i]][prod_in_ass[assortment_A[i]][j]])
    
    # Objective
    obj = gp.quicksum(price[assortment_A[i]] * x[i] for i in range(len(assortment_A)))
    model.setObjective(obj, GRB.MAXIMIZE)

    # Optimize the model
    model.optimize()
    
    if model.Status == GRB.OPTIMAL :
        return [model.objVal, [x[i].x for i in range(len(assortment_A))], model.Runtime]
    else:
        return [-100000,[-1 for i in range(len(assortment_A))], model.Runtime]
    
    

In [29]:
## mdm milp for the worst case revenue
def mdm_revenue(curr_limit_prob, curr_lam, prod_in_ass,lambda_A_lst, assortment_A, price):
    
    robust_revenue_lst = []
    best_case_revenue_lst = []
    robust_pred_runtime = 0
    best_case_pred_runtime = 0
    #robust_prob_lst = []
    #best_case_prob_lst = []

    for j in range(len(lambda_A_lst)):
        lambda_A = lambda_A_lst[j]
        curr_robust_revenue,curr_robust_prob,curr_robust_runtime = robust_prediction_lp(curr_lam,lambda_A,prod_in_ass,assortment_A,curr_limit_prob,price)
        
        robust_revenue_lst.append(curr_robust_revenue)
        robust_pred_runtime = robust_pred_runtime + curr_robust_runtime
        #robust_prob_lst.append(curr_robust_prob)
        
        curr_best_case_revenue,curr_bestcase_prob,curr_best_case_runtime = best_case_prediction_lp(curr_lam,lambda_A,prod_in_ass,assortment_A,curr_limit_prob,price)
        best_case_revenue_lst.append(curr_best_case_revenue)
        best_case_pred_runtime = best_case_pred_runtime + curr_best_case_runtime
        #best_case_prob_lst.append(curr_bestcase_prob)
    
    robust_revenue_A = np.min(robust_revenue_lst)
    best_case_revenue_A = np.max(best_case_revenue_lst)
    
    ''' robust_prob_A = robust_prob_lst[np.argmin(robust_revenue_lst)]
    bestcase_prob_A = best_case_prob_lst[np.argmax(best_case_revenue_lst)] '''
    
    print(f'the robust revenue for assortment {assortment_A} is {robust_revenue_A} with runtime {robust_pred_runtime}')
    if robust_revenue_A>100:
        print('robust MDM prediction error ')
    print(f'the best-case revenue for assortment {assortment_A} is {best_case_revenue_A} with runtime {best_case_pred_runtime}\n')
    if best_case_revenue_A<0:
        print('best-case MDM prediction error ')
    
    ''' lb_sales_A = np.zeros(len(price))
    ub_sales_A = np.zeros(len(price))
    
    for k in range(len(assortment_A)):
        unit_price = np.zeros(n)
        unit_price[assortment_A[k]] = 1
        
        robust_product_prob_lst = []
        bestcase_product_prob_lst = []
        
        for j in range(len(lambda_A_lst)):
            if best_case_revenue_lst[j]>0:
                lambda_A = lambda_A_lst[j]
                curr_robust_prob = robust_prediction_lp(curr_lam,lambda_A,prod_in_ass,assortment_A,curr_limit_prob,unit_price)[0]
                
                robust_product_prob_lst.append(curr_robust_prob)
                
                curr_bestcase_prob = best_case_prediction_lp(curr_lam,lambda_A,prod_in_ass,assortment_A,curr_limit_prob,unit_price)[0]
                bestcase_product_prob_lst.append(curr_bestcase_prob)
        
        lb_sales_A[assortment_A[k]] = np.min(robust_product_prob_lst)
        ub_sales_A[assortment_A[k]] = np.max(bestcase_product_prob_lst)
        print(f'the sales lb for product {assortment_A[k]} in assortment {assortment_A} is {lb_sales_A[assortment_A[k]]}')
        print(f'the sales ub for product {assortment_A[k]} in assortment {assortment_A} is {ub_sales_A[assortment_A[k]]}\n')
     '''
    #return [robust_revenue_A,robust_pred_runtime,robust_prob_A,lb_sales_A, best_case_revenue_A,best_case_pred_runtime, bestcase_prob_A,ub_sales_A]
    return [robust_revenue_A,robust_pred_runtime, best_case_revenue_A,best_case_pred_runtime]


In [30]:

def mdm_prediction(curr_test_collection,curr_test_instance,price,curr_limit_prob, curr_lam, curr_train_collection):
    ''' true assortment ranking '''
    #true_choice_prob = [] 
    true_revenue = []
    
    ''' lb_mdm_rev_prob = []
    ub_mdm_rev_prob = []
    
    lb_mdm_sales = []
    ub_mdm_sales = [] '''
    
    lb_mdm_revenue = [] 
    ub_mdm_revenue = []
    
    lb_mdm_runtime = []
    ub_mdm_runtime = []
    
    prod_in_ass = [[] for _ in range(len(price))]
    for i, assort in enumerate(curr_train_collection):
        for j in assort:
            prod_in_ass[j].append(i)
    
    dec_lam = np.abs(np.sort(-curr_lam))
    lambda_A_lst = [np.max(curr_lam)+1]
    for i in range(len(dec_lam)-1):
        lambda_A_lst.append((dec_lam[i] + dec_lam[i+1])/2)
    lambda_A_lst.append(np.min(curr_lam)-1)
    
    for k in range(len(curr_test_collection)):
            
            assortment_A = curr_test_collection[k]
            
            # true revenue and true choice probabilities for current testing assortment
            curr_revenue = np.dot(curr_test_instance[:,k],price)
            true_revenue.append(curr_revenue)
            #true_choice_prob.append(curr_test_instance[:,k])
            
            # MDM revenue prediction
            print(f'revenue prediction for unseen assortment {assortment_A}')
            mdm_prediction_result = mdm_revenue(curr_limit_prob, curr_lam, prod_in_ass,lambda_A_lst, assortment_A, price)
            
            # return [robust_revenue_A,robust_pred_runtime, best_case_revenue_A,best_case_pred_runtime]
            
            ## record the revenue result
            lb_mdm_revenue.append(mdm_prediction_result[0])
            ub_mdm_revenue.append(mdm_prediction_result[2])
            
            ## record the choice probability result
            ''' lb_prob = np.zeros(n)
            lb_prob_result = mdm_prediction_result[2]
            
            ub_prob = np.zeros(n)
            ub_prob_result = mdm_prediction_result[6]
            
            for l in range(len(assortment_A)):
                lb_prob[assortment_A[l]] = lb_prob_result[l]
                ub_prob[assortment_A[l]] = ub_prob_result[l]
            
            lb_mdm_rev_prob.append(lb_prob)
            ub_mdm_rev_prob.append(ub_prob) '''
            
            ## record the runtime result
            lb_mdm_runtime.append(mdm_prediction_result[1])
            ub_mdm_runtime.append(mdm_prediction_result[3])
            
            
            ## MDM choice probability interval prediction
            ''' lb_sales_frac = mdm_prediction_result[3]
            ub_sales_frac = mdm_prediction_result[-1] '''
            
            ''' lb_mdm_sales.append(lb_sales_frac)
            ub_mdm_sales.append(ub_sales_frac) '''
    return true_revenue,lb_mdm_revenue,ub_mdm_revenue,lb_mdm_runtime,ub_mdm_runtime       
    #return true_choice_prob,true_revenue,lb_mdm_rev_prob,ub_mdm_rev_prob,lb_mdm_sales,ub_mdm_sales,lb_mdm_revenue,ub_mdm_revenue,lb_mdm_runtime,ub_mdm_runtime

In [31]:
def kendall_tau_distance(values1, values2):
    """Compute the Kendall tau distance."""
    num = len(values1)
    assert len(values2) == num, "Both lists have to be of equal length"
    i, j = np.meshgrid(np.arange(num), np.arange(num))
    a = np.argsort(values1)
    b = np.argsort(values2)
    ndisordered = np.logical_or(np.logical_and(a[i] < a[j], b[i] > b[j]), np.logical_and(a[i] > a[j], b[i] < b[j])).sum()
    return ndisordered/2 

In [32]:
# since the underlying choice probabilities are from MEM, 
# we don't need to do representability check and limit computation

## record the kendall tau distance between the predicted ranking of MDM and the truth
all_mdm_lb_distance_list = []  
all_mdm_ub_distance_list = []

## record the true revenue of the predicted best assortment
all_best_true_revenue_list = []
all_best_revenue_mdm_lb = []  
all_best_revenue_mdm_ub =[]

## record the difference in best assortment
all_mdm_lb_best_rev_diff = []
all_mdm_ub_best_rev_diff = []

## record all the true and predicted revenues
all_true_revenue_list = []
all_mdm_lb_revenue_list = []
all_mdm_ub_revenue_list = []

## choice probability under the revenue prediction 
''' all_true_choice_prob_list = [] 
all_mdm_lb_rev_prob_list = []
all_mdm_ub_rev_prob_list = []

all_mdm_lb_sales_list = []
all_mdm_ub_sales_list = [] '''

all_true_ranking = []
all_mdm_lb_ranking = []
all_mdm_ub_ranking = []

## record the runtime of MDM prediction
all_mdm_rev_lb_runtime = []
all_mdm_rev_ub_runtime = []

all_infeasible_collection = []

for i in range(len(all_full_collections)):
    
    # define as a container for quantity of the same collection size 
    # The following lists are of the same size as the instance size
    
    true_best_rev_collection = [] # a container for groundtruth best reveue 
    mdm_lb_best_rev_collection = [] # a container for the best revenue predicted by robust MDM
    mdm_ub_best_rev_collection = [] # a container for the best revenue predicted by best-case MDM
    
    
    true_ranking_collection = [] # a container for groundtruth reveue ranking 
    mdm_lb_ranking_collection = [] # a container for ranking by robust mdm
    mdm_ub_ranking_collection = [] # a container for ranking by best-case mdm
    
    mdm_lb_distance_collection = [] # a container for kendall tau distance between true ranking and the ranking by robust mdm
    mdm_ub_distance_collection = [] # kendall tau distance between true ranking and the ranking by best-case mdm
    
    ''' true_choice_prob_collection = []
    mdm_lb_rev_prob_collection = []
    mdm_ub_rev_prob_collection = []
    mdm_lb_sales_frac_collection = []
    mdm_ub_sales_frac_collection = [] '''
    
    true_revenue_collection = []
    mdm_lb_revenue_collection = []
    mdm_ub_revenue_collection = []
    
    mdm_lb_runtime_collection = []
    mdm_ub_runtime_collection = []
    
    infeasible_collection = []
    
    for j in range(pred_instance_size[i]):

        # the following lists are of the same size as number of testing assortments
        print(f'testing for offertimes {pred_offer_times_list[i]}, {j}th instance')
        ''' true assortment ranking '''
        true_choice_prob = [] 
        true_revenue = []
        
        ''' lb_mdm_rev_prob = []
        ub_mdm_rev_prob = []
        
        lb_mdm_sales = []
        ub_mdm_sales = [] '''
        
        lb_mdm_revenue = [] 
        ub_mdm_revenue = []
        
        lb_mdm_runtime = []
        ub_mdm_runtime = []
        flag=0
        
        row_data = row_assortment_data(all_train_instances[i][j],all_train_collection[i][j])
        ## find the decreasing ranking and equal sequence of the assortment in the data
        rank, equal = find_indexes_nonnegative_descending(row_data)
        ## representability check of MDM
        curr_rep_result = mdm_feasibility(all_train_instances[i][j],rank,equal)
        # return [1/0,model.Runtime] # 1: representable 0: non-representable
        ''' mdm_rep_collection.append(curr_rep_result[0])
        mdm_rep_runtime_collection.append(curr_rep_result[1]) '''
        
                # check if representable
        if curr_rep_result[0] > 0:
            # representable and then do prediction 
            # mdm_prediction(curr_test_instance,curr_test_collection,price,curr_train_instance,curr_train_collection,curr_train_perm_collection,unseen_assortment_perm_collection) mdm_prediction(curr_test_collection,curr_test_instance,price,curr_limit_prob, curr_lam, curr_train_collection)
            true_revenue,lb_mdm_revenue,ub_mdm_revenue,lb_mdm_runtime,ub_mdm_runtime = mdm_prediction(all_test_collection[i][j],all_test_instance[i][j],price,all_train_instances[i][j],curr_rep_result[-1], all_train_collection[i][j])
        else:
            
            produts_notin_collection = find_product_notin_assortment(all_train_collection[i][j],n)
            for k in range(len(all_test_collection[i][j])):
                
                unseen_assortment = all_test_collection[i][j][k]
                
                
                
                # MDM revenue prediction
                # robust_mdm_prediction_w_ci(lb,ub,data, collection, unseen_assortment, price,produts_notin_collection)
                curr_mdm_lb_result = robust_mdm_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],unseen_assortment,price,produts_notin_collection)
                curr_mdm_ub_result = best_case_mdm_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],unseen_assortment,price,produts_notin_collection)
                
                if curr_mdm_lb_result[0]<0:
                    flag=1
                    break
                else:
                    ## record the revenue result
                    lb_mdm_revenue.append(curr_mdm_lb_result[0])
                    ub_mdm_revenue.append(curr_mdm_ub_result[0])
                    
                    ## record the choice probability result
                    ''' lb_prob = np.zeros(n)
                    lb_prob_result = curr_mdm_lb_result[1]
                    
                    ub_prob = np.zeros(n)
                    ub_prob_result = curr_mdm_ub_result[1]
                    
                    for l in range(len(unseen_assortment)):
                        lb_prob[unseen_assortment[l]] = lb_prob_result[l]
                        ub_prob[unseen_assortment[l]] = ub_prob_result[l]
                    
                    lb_mdm_rev_prob.append(lb_prob)
                    ub_mdm_rev_prob.append(ub_prob) '''
                    
                    ## record the runtime result
                    lb_mdm_runtime.append(curr_mdm_lb_result[-1])
                    ub_mdm_runtime.append(curr_mdm_ub_result[-1])
                    
                    
                    # true revenue and true choice probabilities for current testing assortment
                    curr_revenue = np.dot(all_test_instance[i][j][:,k],price)
                    true_revenue.append(curr_revenue)
                    true_choice_prob.append(all_test_instance[i][j][:,k])
                    
                    ## MDM choice probability interval prediction
                    ''' lb_sales_frac = np.zeros(n)
                    ub_sales_frac = np.zeros(n)

                    for l in range(len(unseen_assortment)):
                        unit_price = np.zeros(n)
                        unit_price[unseen_assortment[l]] = 1
                        ## unit price is a vector that takes value 1 only at unseen_assortment[l] and all zeros for other n-1 elements
                        lb_sales_frac[unseen_assortment[l]] = robust_mdm_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],unseen_assortment,unit_price,produts_notin_collection)[0]
                        ub_sales_frac[unseen_assortment[l]] = best_case_mdm_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],unseen_assortment,unit_price,produts_notin_collection)[0]
                    
                    lb_mdm_sales.append(lb_sales_frac)
                    ub_mdm_sales.append(ub_sales_frac) '''
        if flag==0:        
            true_revenue_collection.append(true_revenue)
            #true_choice_prob_collection.append(true_choice_prob)
            
            mdm_lb_revenue_collection.append(lb_mdm_revenue)   
            mdm_ub_revenue_collection.append(ub_mdm_revenue)
            #mdm_lb_rev_prob_collection.append(lb_mdm_rev_prob)
            #mdm_ub_rev_prob_collection.append(ub_mdm_rev_prob)
            
            mdm_lb_runtime_collection.append(lb_mdm_runtime)
            mdm_ub_runtime_collection.append(ub_mdm_runtime)
            
            ## sales fraction record
            #mdm_lb_sales_frac_collection.append(lb_mdm_sales)
            #mdm_ub_sales_frac_collection.append(ub_mdm_sales)
            
            # true assortment ranking for test instance [i][j]
            curr_true_rank = np.argsort(-np.array(true_revenue))  
            true_ranking_collection.append(curr_true_rank)
            # true best assortment revenue for test instance [i][j]
            true_best_rev_collection.append(true_revenue[curr_true_rank[0]])
            
            # lb mdm assortment ranking for test instance [i][j] 
            curr_mdm_lb_rank  = np.argsort(-np.array(lb_mdm_revenue)) 
            mdm_lb_ranking_collection.append(curr_mdm_lb_rank)
            mdm_lb_best_rev_collection.append(true_revenue[curr_mdm_lb_rank[0]])
            
            # ub mdm assortment ranking for test instance [i][j]
            curr_mdm_ub_rank = np.argsort(-np.array(ub_mdm_revenue)) 
            mdm_ub_ranking_collection.append(curr_mdm_ub_rank)
            mdm_ub_best_rev_collection.append(true_revenue[curr_mdm_ub_rank[0]])
            
            #  MDM kendallTau Distance for current instance [i][j]
            curr_mdm_lb_dist = kendall_tau_distance(curr_true_rank, curr_mdm_lb_rank)
            mdm_lb_distance_collection.append(curr_mdm_lb_dist)
            
            curr_mdm_ub_dist = kendall_tau_distance(curr_true_rank, curr_mdm_ub_rank)
            mdm_ub_distance_collection.append(curr_mdm_ub_dist)
        else:
            infeasible_collection.append(flag)
    
    # for each test instance [i][j] kendall tau distance is a number
    all_mdm_lb_distance_list.append(mdm_lb_distance_collection)
    all_mdm_ub_distance_list.append(mdm_ub_distance_collection)
    
    # for each test instance [i][j] best revenue is a number
    all_best_true_revenue_list.append(true_best_rev_collection) 
    all_best_revenue_mdm_lb.append(mdm_lb_best_rev_collection)
    all_best_revenue_mdm_ub.append(mdm_ub_best_rev_collection)
    
    # for each test instance [i][j] best revenue difference is a number
    all_mdm_lb_best_rev_diff.append((np.array(true_best_rev_collection) - np.array(mdm_lb_best_rev_collection))/ np.array(true_best_rev_collection))
    all_mdm_ub_best_rev_diff.append((np.array(true_best_rev_collection) - np.array(mdm_ub_best_rev_collection))/np.array(true_best_rev_collection))
    
    # for each test instance [i][j], true revenue is a list of the same size as the number of testing assortments
    all_true_revenue_list.append(true_revenue_collection)
    #all_true_choice_prob_list.append(true_choice_prob_collection)
    all_mdm_lb_revenue_list.append(mdm_lb_revenue_collection)
    all_mdm_ub_revenue_list.append(mdm_ub_revenue_collection)
    #all_mdm_lb_rev_prob_list.append(mdm_lb_rev_prob_collection)
    #all_mdm_ub_rev_prob_list.append(mdm_ub_rev_prob_collection)
    
    #all_mdm_lb_sales_list.append(mdm_lb_sales_frac_collection)
    #all_mdm_ub_sales_list.append(mdm_ub_sales_frac_collection)
    
    # for each test instance [i][j], true ranking is a list of the same size as the number of testing assortments
    all_true_ranking.append(true_ranking_collection)
    all_mdm_lb_ranking.append(mdm_lb_ranking_collection)
    all_mdm_ub_ranking.append(mdm_ub_ranking_collection)
    
    ## for each test instance [i][j], runtime is a list of the same size as the number of testing assortments
    all_mdm_rev_lb_runtime.append(mdm_lb_runtime_collection)
    all_mdm_rev_ub_runtime.append(mdm_ub_runtime_collection)
    
    all_infeasible_collection.append(infeasible_collection)
    

testing for offertimes 20, 0th instance
Academic license - for non-commercial use only - expires 2024-12-10
Using license file /Users/autumn/gurobi.lic
MDM robust prediction revenue of assortment [0, 5] is : 0.1268704018825796
MDM best_case prediction revenue of assortment [0, 5] is : 0.30305484794152904
MDM robust prediction revenue of assortment [0, 3, 7] is : 0.09547160774843749
MDM best_case prediction revenue of assortment [0, 3, 7] is : 0.17120654904399127
MDM robust prediction revenue of assortment [0, 5, 6] is : 0.10152115205317716
MDM best_case prediction revenue of assortment [0, 5, 6] is : 0.30305484794152904
MDM robust prediction revenue of assortment [0, 5, 8] is : 0.21534288215082387
MDM best_case prediction revenue of assortment [0, 5, 8] is : 0.45270940823718153
MDM robust prediction revenue of assortment [0, 6, 8] is : 0.10055459006615582
MDM best_case prediction revenue of assortment [0, 6, 8] is : 0.44337837901885485
testing for offertimes 20, 1th instance
MDM robust

In [33]:
## adding details 1
for i in range(len(pred_offer_times_list)):
    df_mdm_details1 = pd.DataFrame({'ins_idx':list(range(pred_instance_size[i]-len(all_infeasible_collection[i]))),'mdm_lb_dist':all_mdm_lb_distance_list[i],'mdm_ub_dist':all_mdm_ub_distance_list[i],
                                    'true_best_rev':all_best_true_revenue_list[i],'mdm_lb_best_rev':all_best_revenue_mdm_lb[i],'mdm_ub_best_rev':all_best_revenue_mdm_ub[i],
                                    'mdm_lb_best_rev_diff':all_mdm_lb_best_rev_diff[i],'mdm_ub_best_rev_diff':all_mdm_ub_best_rev_diff[i]})
    df_mdm_details1.to_csv('prediction/mdm/revenue_prediction/details1/'+str(pred_offer_times_list[i])+'.csv')

In [34]:
## adding details 2 about ranking, revenue, choice probability
for i in range(len(pred_offer_times_list)):
    for j in range(pred_instance_size[i]-sum(all_infeasible_collection[i])):
        
        true_rank = all_true_ranking[i][j]
        lb_rank = all_mdm_lb_ranking[i][j]
        ub_rank = all_mdm_ub_ranking[i][j]
        df_rank = pd.DataFrame({'true_rank':true_rank,'lb_rank':lb_rank,'ub_rank':ub_rank})
        df_rank.to_csv('prediction/mdm/revenue_prediction/details2/ranking/offertimes'+str(pred_offer_times_list[i])+'/rank_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        true_rev = all_true_revenue_list[i][j]
        lb_rev = all_mdm_lb_revenue_list[i][j]
        ub_rev = all_mdm_ub_revenue_list[i][j]
        df_rev = pd.DataFrame({'true_rev':true_rev,'lb_rev':lb_rev,'ub_rev':ub_rev})
        df_rev.to_csv('prediction/mdm/revenue_prediction/details2/revenue/offertimes'+str(pred_offer_times_list[i])+'/rev_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        ''' true_prob = all_true_choice_prob_list[i][j]
        lb_prob = all_mdm_lb_rev_prob_list[i][j]
        ub_prob = all_mdm_ub_rev_prob_list[i][j]
        
        df_true_prob = pd.DataFrame(true_prob).T
        df_lb_prob = pd.DataFrame(lb_prob).T
        df_ub_prob = pd.DataFrame(ub_prob).T
        
        df_true_prob.to_csv('prediction/mdm/revenue_prediction/details2/probabilities/offertimes'+str(pred_offer_times_list[i])+'/true/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_lb_prob.to_csv('prediction/mdm/revenue_prediction/details2/probabilities/offertimes'+str(pred_offer_times_list[i])+'/mdm_lb/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_ub_prob.to_csv('prediction/mdm/revenue_prediction/details2/probabilities/offertimes'+str(pred_offer_times_list[i])+'/mdm_ub/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
         '''
        lb_runtime = all_mdm_rev_lb_runtime[i][j]
        ub_runtime = all_mdm_rev_ub_runtime[i][j]
        df_runtime = pd.DataFrame({'lb_runtime':lb_runtime,'ub_runtime':ub_runtime})
        df_runtime.to_csv('prediction/mdm/revenue_prediction/details2/runtime/offertimes'+str(pred_offer_times_list[i])+'/runtime_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        
        ## using sales fraction prediction
        ## record the choice probability prediction interval
        ''' lb_sales = all_mdm_lb_sales_list[i][j]
        ub_sales = all_mdm_ub_sales_list[i][j]
        df_lb_sales = pd.DataFrame(lb_sales).T
        df_ub_sales = pd.DataFrame(ub_sales).T
        
        df_true_prob.to_csv('prediction/mdm/prob_prediction/offertimes'+str(pred_offer_times_list[i])+'/true/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_lb_sales.to_csv('prediction/mdm/prob_prediction/offertimes'+str(pred_offer_times_list[i])+'/mdm_lb/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_ub_sales.to_csv('prediction/mdm/prob_prediction/offertimes'+str(pred_offer_times_list[i])+'/mdm_ub/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv') '''

In [35]:
## adding details 2 about runtime 
all_lb_avg_runtime = []
all_ub_avg_runtime =[]

for i in range(len(pred_offer_times_list)):
    
    avg_lb_runtime_collection = []
    avg_ub_runtime_collection = []
    
    for j in range(pred_instance_size[i]-len(all_infeasible_collection[i])):
        avg_lb_runtime_collection.append(np.mean(np.array(all_mdm_rev_lb_runtime[i][j])))
        avg_ub_runtime_collection.append(np.mean(np.array(all_mdm_rev_ub_runtime[i][j])))
    
    all_lb_avg_runtime.append(np.mean(np.array(avg_lb_runtime_collection)))
    all_ub_avg_runtime.append(np.mean(np.array(avg_ub_runtime_collection)))

avg_mdm_lb_dist = []
avg_mdm_ub_dist = []

avg_mdm_lb_best_rev_diff = []
avg_mdm_ub_best_rev_diff = []

avg_infeasible_prop = []

for i in range(len(pred_offer_times_list)):
    avg_mdm_lb_dist.append(np.mean(all_mdm_lb_distance_list[i]))
    avg_mdm_ub_dist.append(np.mean(all_mdm_ub_distance_list[i]))
    
    avg_mdm_lb_best_rev_diff.append(np.mean(all_mdm_lb_best_rev_diff[i]))
    avg_mdm_ub_best_rev_diff.append(np.mean(all_mdm_ub_best_rev_diff[i]))
    
    avg_infeasible_prop.append(sum(all_infeasible_collection[i])/pred_instance_size[i])

df_mdm_jd_summary = pd.DataFrame({'offertimes':pred_offer_times_list,'train_size':pred_train_collection_size,'test_size':pred_test_collection_size,
                                  'infeas_prop':avg_infeasible_prop,
                                   'avg_lb_dist':avg_mdm_lb_dist,'avg_ub_dist':avg_mdm_ub_dist,
                                   'avg_lb_best_rev_diff':avg_mdm_lb_best_rev_diff,'avg_ub_best_rev_diff':avg_mdm_ub_best_rev_diff,
                                   'avg_lb_runtime':all_lb_avg_runtime,'avg_ub_runtime':all_ub_avg_runtime})
df_mdm_jd_summary.to_csv('prediction/mdm/jd_mdm_summary.csv')

In [36]:
df_mdm_jd_summary

Unnamed: 0,offertimes,train_size,test_size,infeas_prop,avg_lb_dist,avg_ub_dist,avg_lb_best_rev_diff,avg_ub_best_rev_diff,avg_lb_runtime,avg_ub_runtime
0,20,24,5,0.06,2.93617,1.914894,0.093054,0.070904,0.259867,0.311523
1,30,20,4,0.0,1.42,0.92,0.105559,0.058304,0.140805,0.16198
2,40,16,3,0.0,0.78,0.46,0.095387,0.070954,0.066352,0.069073
3,50,12,3,0.0,0.84,0.22,0.089414,0.023435,0.030245,0.029565
4,60,11,2,0.0,0.18,0.06,0.070209,0.011262,0.020935,0.022002
