In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import math
import gurobipy as gp
from gurobipy import GRB
import itertools
from itertools import combinations
from itertools import permutations
from random import choice
import json
import cvxpy as cp
from tkinter import _flatten
import copy
import time
import scipy.stats as stats
from scipy.stats import gumbel_r

In [2]:
np.random.seed(1)
random.seed(1)

In [3]:
raw_jd_choice = pd.read_excel('data_processing/choices.xlsm')  
jd_offertimes = raw_jd_choice.groupby('clickset')['clicknum'].sum()[raw_jd_choice.clickset.unique()]
inc_prod_num = raw_jd_choice['clickset'].value_counts()[raw_jd_choice.clickset.unique()]
assortment_info_df = pd.DataFrame({'assortments':raw_jd_choice.clickset.unique(),'offer_times':jd_offertimes,'includ_prod_num':inc_prod_num})

# extended assortments with outside option 
# transfer to list
clickset = raw_jd_choice['clickset']
clickset_list = []
for cset in clickset:
    num_lst = json.loads(cset)
    #clickset_list.append(num_lst+[0])
    clickset_list.append([0]+num_lst)
raw_jd_choice['clickset'] = clickset_list

n = 9 # product size top 8 products and outside option
print('there are {} different products'.format(n))
jd_collection = []
for cset in clickset_list:
    if cset not in jd_collection:
        jd_collection.append(cset)
print('there are {} different assortments'.format(len(jd_collection)))
print('check offertimes',len(jd_offertimes))

there are 9 different products
there are 134 different assortments
check offertimes 134


In [4]:
def collection_distribution_hev(n,collection,price):
    # generate mean 
    ''' mu_0 = np.random.uniform(2,3)
    mu_1n = np.random.uniform(-3,-2,n-1) '''
    # generate deterministic utilities for products
    
    rho = -0.5 # prices and utilities are negatively correlated
    price_mean = np.mean(price[1:]) 
    price_std = np.std(price[1:])
    
    z = np.zeros(len(price[1:]))
    for i in range(len(z)):
        z[i] = (price[1:][i] - price_mean)/price_std
        
    mu_1n = np.zeros(len(z))
    normal_rvs = np.random.randn(len(mu_1n))
    for i in range(len(mu_1n)):
        mu_1n[i] = rho*z[i] + np.sqrt(1-rho**2)*normal_rvs[i] 
    ## deterministic utility of the outside option is strictly greater than the utilities of the products
    mu_0 = np.max(mu_1n) + 1
    # np.random.uniform(-2,2,n-1)
    mu = np.hstack((mu_0,mu_1n))
    
    scale_0 = 10
    scale_1n = np.random.uniform(0.04,1,n-1)
    scales = np.hstack((scale_0,scale_1n))
    
    sample_size = 10000
    utility_samples = np.array([gumbel_r.rvs(loc=mu, scale=scales) for _ in range(sample_size)])
    # generate covariance matrix with positive correlation
    # neg_cov = generate_negatively_correlated_covariance_matrix(n)
    # if is_positive_semidefinite(neg_cov): 
    #     sample_size = 10000
    #     utility_samples = np.random.multivariate_normal(mu,neg_cov,size=sample_size)
    # else:
    #     print('Negative correlation matrix error')
    
    collection_distribution = np.zeros((n,len(collection)))
    for i in range(len(collection)):
        curr_assortment = collection[i]
        curr_population = [[] for _ in range(sample_size)] 

        for j in range(sample_size):
            for k in curr_assortment:
                curr_population[j].append(utility_samples[j][k])
                # each sub list records only the utilities of products in the current assortment
                
        frequency = [0]*len(curr_assortment)
        for j in range(sample_size):
            max_index = np.argmax(np.array(curr_population[j]))
            # product is chosen iff the utility of the product is max in the assortment
            frequency[max_index] = frequency[max_index] +1 
            # update the frequency of product to be chosen
            
        prob = np.array(frequency)/np.sum(frequency)
        for j in range(len(curr_assortment)):
            collection_distribution[curr_assortment[j]][i] = prob[j]
            
    return collection_distribution

In [5]:
def whole_instance_generation(n,whole_collection,whole_offertimes,price):
    ## generate LCrum instances based on the assortment collection infomation
    # step 1: randomly generate LCrum parameters
    # 1.1 the number of classes in LCrum, the number of classes is between 10 and 15, both endpoints are included
    ''' num_classes = 20
    # 1.2 randomly generate weights of all classes 
    #weight_pre = np.random.exponential(1, num_classes)
    weights = np.array([1/ num_classes for _ in range(num_classes)])
    # 1.3 randomly generate parameters of each rum
    parameters_v = np.random.uniform(-30, 30, size=(num_classes,n)) '''
    ''' parameters_v = np.zeros((num_classes,n))
    for i in range(num_classes):
        parameters_v[i] = np.random.exponential(1, n) '''
    #np.random.uniform(-30, 30, size=(num_classes,n))
    # step 2: generate LCrum instance with the above parameters of LCrum
    true_hev_instance = collection_distribution_hev(n,whole_collection,price)
    # step 3: generate multinomial samples based on lcrum instance and the emprical assortment offertimes
    purchased_samples = []
    for i in range(len(whole_collection)):
        sample_i = np.random.multinomial(whole_offertimes[i], true_hev_instance[:,i])
        purchased_samples.append(sample_i)
    # step 4: compute the simulated collection probabilities
    whole_choice_collection = np.zeros((n,len(whole_collection)))
    for i in range(len(whole_collection)):
        whole_choice_collection[:,i] = np.array([k/np.sum(purchased_samples[i]) for k in purchased_samples[i]])
        
    return whole_choice_collection,purchased_samples

In [6]:
def filter_collection_offertimes(whole_collection,whole_offertimes,least_offetimes):
    
    collection = []
    offertimes = []
    assortment_index = []
    
    for i in range(len(whole_offertimes)):
        if whole_offertimes[i]>=least_offetimes:
            collection.append(whole_collection[i])
            offertimes.append(whole_offertimes[i])
            assortment_index.append(i)
            
    return collection,offertimes,assortment_index

In [7]:
pred_offer_times_list = [20,30,40,50,60]
pred_test_collection_size = [5,4,3,3,2]
pred_train_collection_size = [24,20,16,12,11]
pred_instance_size = [50,50,50,50,50]
price = np.array([0,1.041,0.456,0.391,1.657,1.174,0.474,0.67,1.522])

In [8]:

all_full_collections = []
all_full_offertimes = []
all_full_assortment_index = []
full_collection_size = []
for i in range(len(pred_offer_times_list)):
    collection, offertimes, assortment_index = filter_collection_offertimes(jd_collection,jd_offertimes,pred_offer_times_list[i])
    all_full_collections.append(collection)
    all_full_offertimes.append(offertimes)
    all_full_assortment_index.append(assortment_index)
    full_collection_size.append(len(assortment_index))
    print("number of assortments with offertimes {} is {}".format(pred_offer_times_list[i], len(assortment_index)))

number of assortments with offertimes 20 is 29
number of assortments with offertimes 30 is 24
number of assortments with offertimes 40 is 19
number of assortments with offertimes 50 is 15
number of assortments with offertimes 60 is 13


In [9]:
# check if the collection are nested
def check_subsets(lists):
    # Convert all lists to sets
    sets = [set(lst) for lst in lists]
    
    # Check subsets
    for i in range(len(sets) - 1):
        if not sets[i + 1].issubset(sets[i]):
            return False
    return True

# Check and print the result
result = check_subsets(all_full_assortment_index)
print("All subsequent lists are subsets of the previous one:", result)

All subsequent lists are subsets of the previous one: True


In [10]:
def filter_probability_frequency2(whole_choice_collection,purchase_samples,assortment_index):
    
    choice_collection = np.zeros((whole_choice_collection.shape[0],len(assortment_index)))
    frequency_collection = np.zeros((whole_choice_collection.shape[0],len(assortment_index)))
    for i in range(len(assortment_index)):
        choice_collection[:,i] = whole_choice_collection[:,assortment_index[i]]
        frequency_collection[:,i] = purchase_samples[assortment_index[i]]
        
    return choice_collection,frequency_collection

In [11]:
# generate instances that satisfy for assortment offertimes is >=20
## generate 50 random full instances 
# the observations with at least 20 times are >= 1e-3
pred_full_instance = []
pred_full_samples = []
#collection_20,offertimes_20,assortment_index = filter_collection_offertimes(jd_collection,jd_offertimes,pred_offer_times_list[0])

for j in range(pred_instance_size[0]):
    print(f'generating {j} th instance')
    # full instance generation 
    curr_whole_instance,curr_purchase_samples = whole_instance_generation(n,jd_collection,jd_offertimes,price)
    # filter choice probability and purchase frequency of each produt in each assortment
    curr_choice_collection,curr_frequency = filter_probability_frequency2(curr_whole_instance,curr_purchase_samples,all_full_assortment_index[0])
    
    pred_full_instance.append(curr_whole_instance)
    pred_full_samples.append(curr_purchase_samples)
    #print(pd.DataFrame(curr_whole_instance))
    
    ''' condition = False
    for x in range(len(all_full_collections[0])):
        for y in all_full_collections[0][x]:
            if curr_choice_collection[y][x]<1e-6 or curr_choice_collection[y][x] >0.999:
                condition = True
    while condition:
        curr_whole_instance,curr_purchase_samples = whole_instance_generation(n,jd_collection,jd_offertimes,price)
        # filter choice probability and purchase frequency of each produt in each assortment
        curr_choice_collection,curr_frequency = filter_probability_frequency2(curr_whole_instance,curr_purchase_samples,all_full_assortment_index[0])

        condition = False
        for x in range(len(all_full_collections[0])):
            for y in all_full_collections[0][x]:
                if curr_choice_collection[y][x] < 1e-6 or curr_choice_collection[y][x] >0.999:
                    condition = True
    if condition == True:
        print('instance_generation error')
    else:  
        pred_full_instance.append(curr_whole_instance)
        pred_full_samples.append(curr_purchase_samples)    '''

generating 0 th instance
generating 1 th instance
generating 2 th instance
generating 3 th instance
generating 4 th instance
generating 5 th instance
generating 6 th instance
generating 7 th instance
generating 8 th instance
generating 9 th instance
generating 10 th instance
generating 11 th instance
generating 12 th instance
generating 13 th instance
generating 14 th instance
generating 15 th instance
generating 16 th instance
generating 17 th instance
generating 18 th instance
generating 19 th instance
generating 20 th instance
generating 21 th instance
generating 22 th instance
generating 23 th instance
generating 24 th instance
generating 25 th instance
generating 26 th instance
generating 27 th instance
generating 28 th instance
generating 29 th instance
generating 30 th instance
generating 31 th instance
generating 32 th instance
generating 33 th instance
generating 34 th instance
generating 35 th instance
generating 36 th instance
generating 37 th instance
generating 38 th insta

In [12]:
def compute_lb_ub_w_ci(frequency_collection,choice_collection,target_z_score):
    
    stardard_error = np.zeros(frequency_collection.shape)
    for i in range(stardard_error.shape[0]):
        for j in range(stardard_error.shape[1]):
            if frequency_collection[i][j]!=0:
                stardard_error[i][j] = np.sqrt((1-choice_collection[i][j])/frequency_collection[i][j])
                
    lb = np.zeros(frequency_collection.shape)
    ub = np.zeros(frequency_collection.shape)
    for i in range(lb.shape[0]):
        for j in range(lb.shape[1]):
            if frequency_collection[i][j]!=0:
                lb[i][j] = choice_collection[i][j] * (1-target_z_score*stardard_error[i][j]) 
                ub[i][j] = choice_collection[i][j] * (1+target_z_score*stardard_error[i][j])
    
    return lb,ub

In [13]:
# full instance generation 
all_full_instances = []
all_full_lb = []
all_full_ub = []

# pre-determined confidence interval 
confidence_level = 0.995
alpha = 1 - confidence_level
# Find z-score for the given confidence level
target_z_score = stats.norm.ppf(1 - alpha / 2)  

for i in range(len(pred_offer_times_list)):
    
    full_instances = []
    full_lbs = []
    full_ubs = []
    print(f'check assortment index for offertimes {pred_offer_times_list[i]}')
    for j in range(pred_instance_size[i]):
        # filter choice probability and purchase frequency of each produt in each assortment
        curr_choice_collection,curr_frequency = filter_probability_frequency2(pred_full_instance[j],pred_full_samples[j],all_full_assortment_index[i])
        # re-check if all choice probability are >= 1e-3
        condition = False
        for x in range(len(all_full_collections[i])):
            for y in all_full_collections[i][x]:
                if curr_choice_collection[y][x] < 1e-3:
                    condition = True
        if condition == True:
            print('instance_generation error')
        
        # compute the collection of lower bound and upper bound l_ij and u_ij
        curr_lb,curr_ub = compute_lb_ub_w_ci(curr_frequency,curr_choice_collection,target_z_score)
        
        full_instances.append(curr_choice_collection)
        full_lbs.append(curr_lb)
        full_ubs.append(curr_ub)
    
    all_full_instances.append(full_instances)
    all_full_lb.append(full_lbs)
    all_full_ub.append(full_ubs)
    



check assortment index for offertimes 20
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation error
instance_generation err

In [14]:
# train-test split
all_train_instances = []
all_train_collection = []
all_test_instance = []
all_test_collection = []
all_train_lb = []
all_train_ub = []
all_train_offertimes = []

for i in range(len(pred_instance_size)):
    train_instances_collection = []
    train_collection = []
    train_lb_collection = []
    train_ub_collection = []
    train_offertimes_collection = []
    
    test_instance_collection = []
    test_collection = []
    
    test_index_collection = []
    
    
    for j in range(pred_instance_size[i]):
        
        curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
        curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
        curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
        
        curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
        
        curr_train_collection = []
        curr_test_collection = []
        curr_offertimes_lst = []
        
        while(len(curr_train_collection)==0):
            
            chosen_test_idx = random.sample(range(len(all_full_collections[i])),pred_test_collection_size[i])
            chosen_test_idx.sort()
            
            chosen_train_idx = [x for x in range(len(all_full_collections[i])) if x not in chosen_test_idx ]
    
            for k in range(len(chosen_train_idx)):
                curr_train_collection.append(all_full_collections[i][chosen_train_idx[k]])
                curr_train_instance[:,k] = all_full_instances[i][j][:,chosen_train_idx[k]]
                curr_train_lb[:,k] = all_full_lb[i][j][:,chosen_train_idx[k]]
                curr_train_ub[:,k] = all_full_ub[i][j][:,chosen_train_idx[k]]
                
                curr_offertimes_lst.append(all_full_offertimes[i][chosen_train_idx[k]])
                
            for k in range(len(chosen_test_idx)):
                curr_test_collection.append(all_full_collections[i][chosen_test_idx[k]])
                curr_test_instance[:,k] = all_full_instances[i][j][:,chosen_test_idx[k]]
                
            ## check if the training instance includes all the testing products in the test instance
            # ensure each test product has been offered at least  once
            new_lst_train = sum(curr_train_collection,[])
            unique_numbers_train = list(set(new_lst_train)) 
            
            new_lst_test = sum(curr_test_collection,[])
            unique_numbers_test = list(set(new_lst_test)) 
            
            for num in unique_numbers_test:
                if num not in unique_numbers_train:
                    #print('the unique train numbers',unique_numbers_train)
                    #print('the unique test numbers',unique_numbers_test)
                    curr_train_collection = []
                    curr_test_collection = []
                    curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
                    curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
                    curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
                    curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
                    curr_offertimes_lst = []
            if chosen_test_idx in test_index_collection:
                #print('check if exisits repeated indexes ')
                curr_train_collection = []
                curr_test_collection = []
                curr_train_instance = np.zeros((n,pred_train_collection_size[i]))
                curr_test_instance = np.zeros((n,pred_test_collection_size[i]))
                curr_train_lb = np.zeros((n,pred_train_collection_size[i]))
                curr_train_ub = np.zeros((n,pred_train_collection_size[i]))
                curr_offertimes_lst = []
            elif len(curr_test_collection)>0:
                test_index_collection.append(chosen_test_idx)
        #print('chosen training indexes',chosen_train_idx)
        #print('chosen testing indexes',chosen_test_idx)
        #print('curr train collection\n',curr_train_collection)
        #print('curr test collection\n',curr_test_collection)
        df_train = pd.DataFrame(curr_train_instance)
        df_test = pd.DataFrame(curr_test_instance)
        df_offertimes = pd.DataFrame(curr_offertimes_lst)
        #print('curr train instance\n',df_train)
        #print('curr test instance\n',df_test)
        
        ## output the training and testing instance
        ''' df_train.to_csv('instances/train_instances/train_'+str(pred_offer_times_list[i])+'/train_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_test.to_csv('instances/test_instances/test_'+str(pred_offer_times_list[i])+'/test_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_offertimes.to_csv('instances/train_offertimes/train_'+str(pred_offer_times_list[i])+'/train_offertimes'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv') '''
        
        train_instances_collection.append(curr_train_instance)
        train_collection.append(curr_train_collection)
        train_lb_collection.append(curr_train_lb)
        train_ub_collection.append(curr_train_ub)
        train_offertimes_collection.append(curr_offertimes_lst)
                
        test_instance_collection.append(curr_test_instance)
        test_collection.append(curr_test_collection)
        
    all_train_instances.append(train_instances_collection)
    all_train_collection.append(train_collection)
    all_train_lb.append(train_lb_collection)
    all_train_ub.append(train_ub_collection)
    all_train_offertimes.append(train_offertimes_collection)
    all_test_instance.append(test_instance_collection)
    all_test_collection.append(test_collection)    
        

In [15]:
## RUM limit formulation
def find_element_permutation_indexes(n, partial_lists):
    perm_collection = []
    
    elements = list(range(n))
    perms = list(permutations(elements))
    #index_permutations = {perm: idx for idx, perm in enumerate(perms)}
    result = {}

    for partial_list in partial_lists:
        partial_result = {}
        perm_element = []
        for element in partial_list:
            element_indexes = [idx for idx, perm in enumerate(perms) if perm.index(element) == min(perm.index(e) for e in partial_list)]
            partial_result[element] = element_indexes
            perm_element.append(element_indexes)
        result[tuple(partial_list)] = partial_result
        perm_collection.append(perm_element)
    return result,perm_collection

def find_product_notin_assortment(collection,n):
    notin_collection = []
    for i in range(len(collection)):
        notin_assortment = []
        for j in range(n):
            if j not in collection[i]:
                notin_assortment.append(j)
        notin_collection.append(notin_assortment)
    return notin_collection


In [16]:
def rum_feasiblity(data,collection,perm_collection):
    n = data.shape[0]
    
    fact = math.factorial(n)
    
    # the RUM feasibility LP
    model = gp.Model('rum_feasibility')
    model.setParam('OutputFlag', 0)
    lam = model.addVars(fact,lb=0.0, vtype=gp.GRB.CONTINUOUS, name='lam')
    
    #model.addConstrs(lam[i]>=0 for i in range(fact))
    model.addConstr(sum(lam[i] for i in range(fact))==1)
    
    # caculate RUM choice probability for each product in the assortment
    for i in range(len(collection)):
        for j in range(len(collection[i])):
            model.addConstr(sum(lam[k] for k in perm_collection[i][j])- data[collection[i][j]][i]==0)
    
    model.setObjective(0)
    #model.write("model.lp")
    model.optimize()
    # Access the optimal solution

    if model.Status == 2: #optimal
        return [1,model.Runtime]
        #return [1, used_time]
    
    if model.Status == 3: #infeasible
        return [0,model.Runtime]


In [17]:
def robust_rum_prediction(data,collection,perm_collection,unseen_assortment,unseen_assortment_perm,price):
    start_time = time.time()
    
    n = data.shape[0]
    
    fact = math.factorial(n)
    
    model = gp.Model('rum')
    #model.Params.FeasibilityTol = 1e-5
    model.setParam('OutputFlag', 0)
    lam = model.addVars(fact,lb=0.0, vtype=gp.GRB.CONTINUOUS, name='lam')
    x = model.addVars(len(unseen_assortment),lb=0.0,vtype=gp.GRB.CONTINUOUS,name='x')
    
    #model.addConstrs(lam[i]>=0 for i in range(fact))
    model.addConstr(sum(lam[i] for i in range(fact))==1)
    #model.addConstr(sum(x[i] for i in range(len(unseen_assortment))) == 1)
    
    # caculate RUM choice probability for each product in the assortment
    for i in range(len(collection)):
        for j in range(len(collection[i])):
            model.addConstr(sum(lam[k] for k in perm_collection[i][j])- data[collection[i][j]][i]==0)
    
    # calculate RUM choice probability for the unseen assortment
    for i in range(len(unseen_assortment)):
        model.addConstr(sum(lam[k] for k in unseen_assortment_perm[i]) - x[i] == 0)
    
    # minimize the revenue within all possible rums
    model.setObjective(gp.quicksum(price[unseen_assortment[i]]*x[i] for i in range(len(unseen_assortment))),sense=gp.GRB.MINIMIZE)
   
    model.optimize()
    used_time = time.time() - start_time
    #status = model.status
    #print("Gurobi optimization status:", status)
    
    # Check if the optimization was successful
    if model.status == gp.GRB.OPTIMAL:
        # Access the objective value
        #obj_value = model.objVal
        #print(f"Optimal Objective Value: {obj_value}")  
        #print(f'current testing assortment',unseen_assortment)
        return [model.objVal,[x[i].x for i in range(len(unseen_assortment))], model.Runtime,used_time]
    if model.status == gp.GRB.INFEASIBLE:
        print("Optimization was not successful.")
        #print(f'current testing assortment',unseen_assortment)
        # Compute the Infeasibility Proof (IIS)
        ''' model.computeIIS()
        infeas_constraints = [c.constrName for c in model.getConstrs() if c.IISConstr]
        print("The following constraints contribute to infeasibility:")
        print(infeas_constraints) '''
        model.write("rumPre_infeas_model_1e-6.lp")
        return [-1, [ 0 for _ in range(len(unseen_assortment))],model.Runtime,used_time]
    
    

In [18]:
def robust_rum_prediction_tol(data,collection,perm_collection,unseen_assortment,unseen_assortment_perm,price,tol):
    start_time = time.time()
    n = data.shape[0]
    
    fact = math.factorial(n)
    
    model = gp.Model('rum')
    model.Params.FeasibilityTol = tol
    model.setParam('OutputFlag', 0)
    lam = model.addVars(fact,lb=0.0, vtype=gp.GRB.CONTINUOUS, name='lam')
    x = model.addVars(len(unseen_assortment),lb=0.0,vtype=gp.GRB.CONTINUOUS,name='x')
    
    #model.addConstrs(lam[i]>=0 for i in range(fact))
    model.addConstr(sum(lam[i] for i in range(fact))==1)
    #model.addConstr(sum(x[i] for i in range(len(unseen_assortment))) == 1)
    
    # caculate RUM choice probability for each product in the assortment
    for i in range(len(collection)):
        for j in range(len(collection[i])):
            model.addConstr(sum(lam[k] for k in perm_collection[i][j])- data[collection[i][j]][i]==0)
    
    # calculate RUM choice probability for the unseen assortment
    for i in range(len(unseen_assortment)):
        model.addConstr(sum(lam[k] for k in unseen_assortment_perm[i]) - x[i] == 0)
    
    # minimize the revenue within all possible rums
    model.setObjective(gp.quicksum(price[unseen_assortment[i]]*x[i] for i in range(len(unseen_assortment))),sense=gp.GRB.MINIMIZE)
   
    model.optimize()
    #status = model.status
    #print("Gurobi optimization status:", status)
    used_time= time.time() - start_time
    # Check if the optimization was successful
    if model.status == gp.GRB.OPTIMAL:
        # Access the objective value
        #obj_value = model.objVal
        #print(f"Optimal Objective Value: {obj_value}")  
        #print(f'current testing assortment',unseen_assortment)
        return [model.objVal,[x[i].x for i in range(len(unseen_assortment))], model.Runtime,used_time]
    if model.status == gp.GRB.INFEASIBLE:
        print("Optimization was not successful.")
        #print(f'current testing assortment',unseen_assortment)
        # Compute the Infeasibility Proof (IIS)
        ''' model.computeIIS()
        infeas_constraints = [c.constrName for c in model.getConstrs() if c.IISConstr]
        print("The following constraints contribute to infeasibility:")
        print(infeas_constraints) '''
        #model.write("rumPre_infeas_model_tol.lp")
        return [-1, [ 0 for _ in range(len(unseen_assortment))],model.Runtime,used_time]
    
    #return [-2, [ 0 for _ in range(len(unseen_assortment))],model.Runtime]
    
    

In [19]:
## prediction for the current testing collection of assortments 
def rum_prediction(curr_test_instance,curr_test_collection,price,curr_train_instance,curr_train_collection,curr_train_perm_collection,curr_test_perm_collection):
    ''' true assortment ranking '''

    true_revenue = []

    lb_rum_revenue = [] 

    lb_rum_runtime = []
    actual_run_runtime = []
    
    for k in range(len(curr_test_collection)):
        
        unseen_assortment = curr_test_collection[k]
        unseen_perm_collection = curr_test_perm_collection[k]
        # true revenue and true choice probabilities for current testing assortment
        curr_revenue = np.dot(curr_test_instance[:,k],price)
        true_revenue.append(curr_revenue)
        #true_choice_prob.append(curr_test_instance[:,k])
        
        # be careful for the output sequence
        
        curr_rum_lb_result = robust_rum_prediction(curr_train_instance,curr_train_collection,curr_train_perm_collection,unseen_assortment,unseen_perm_collection,price)
        if curr_rum_lb_result[0] == -1:
            tol = 1e-5
            curr_rum_lb_result = robust_rum_prediction_tol(curr_train_instance,curr_train_collection,curr_train_perm_collection,unseen_assortment,unseen_perm_collection,price,tol)
            while(curr_rum_lb_result[0] == -1 and tol <1e-2):
                print('current prediction needs a significant tolerence')
                tol = tol*10
                curr_rum_lb_result = robust_rum_prediction_tol(curr_train_instance,curr_train_collection,curr_train_perm_collection,unseen_assortment,unseen_perm_collection,price,tol)
            
        lb_rum_revenue.append(curr_rum_lb_result[0])
        
        ## record the runtime result
        lb_rum_runtime.append(curr_rum_lb_result[2])
        actual_run_runtime.append(curr_rum_lb_result[-1])
        print(f'robust RUM revenue prediction of {unseen_assortment} is {curr_rum_lb_result[0]} with actual runtime {curr_rum_lb_result[3]}')
       
    return true_revenue,lb_rum_revenue,lb_rum_runtime,actual_run_runtime

In [20]:
def robust_rum_prediction_w_ci(lb,ub,data,collection,perm_collection,produts_notin_collection,unseen_assortment,unseen_assortment_perm,price):
    start_time = time.time()
    n,m = data.shape # number of products
    fact = math.factorial(n) # number of permutations
    
    model = gp.Model('rum_ci') #define a model
    model.setParam('OutputFlag', 0)
    
    # define nonnegative continuous variables for lambda, the probability of each permutation
    lam = model.addVars(fact,lb=0.0, vtype=gp.GRB.CONTINUOUS, name='lam') 
    # define nonnegative continuous variables for choice probability of each product in each train assortment
    x = model.addVars(n, m, vtype=gp.GRB.CONTINUOUS, lb=0.0, name="x") 
    # define nonnegative continuous variables for choice probability of each product in the test assortment 
    y = model.addVars(len(unseen_assortment),lb=0.0,vtype=gp.GRB.CONTINUOUS,name='y')
    
    #normalization constraint of lambda
    model.addConstr(sum(lam[i] for i in range(fact))==1,name="Normalization")
    
    # caculate RUM choice probability for each product in the train assortment
    for i in range(len(collection)):
        # normalization constraint for each assortment. 
        #model.addConstr(gp.quicksum(x[k, i] for k in collection[i]) == 1)
        for j in range(len(collection[i])):
            # constraints for RUM choice probabilities 
            model.addConstr(sum(lam[k] for k in perm_collection[i][j])- x[collection[i][j],i]==0,name=f"prob_product{collection[i][j]}_assortment_{i}")
            
            # constraints for the lower bound and upper bound values
            model.addConstr(x[collection[i][j],i]>= lb[collection[i][j],i],name=f"lb_product{collection[i][j]}_assortment_{i}")
            model.addConstr(x[collection[i][j],i]<= ub[collection[i][j],i],name=f"ub_product{collection[i][j]}_assortment_{i}")
        
        for k in produts_notin_collection[i]:
            # constraints for products that are not in the assortments
            model.addConstr(x[k,i] == 0,name=f"prob_product{k}_assortment_{i}")
    
    # calculate RUM choice probability for the unseen assortment
    for i in range(len(unseen_assortment)):
        model.addConstr(sum(lam[k] for k in unseen_assortment_perm[i]) - y[i] == 0,name=f'unseen_prob_product{unseen_assortment[i]}')
    
    # Objective
    obj = gp.quicksum(price[unseen_assortment[i]] * y[i] for i in range(len(unseen_assortment)))
    model.setObjective(obj, GRB.MINIMIZE)
    #model.setObjective(0)
    model.optimize()
    used_time = time.time() - start_time
    #status = model.status
    #print("Gurobi optimization status:", status)
    # Access the optimal solution
    if model.status == gp.GRB.OPTIMAL:
        print(f"RUM robust prediction revenue of assortment {unseen_assortment} is : {model.objVal} with runtime {used_time}")  
        #model.write("rum_ci.sol")
        #model.write("rum_ci.lp")
        x_values = [[x[i, j].X for j in range(m)] for i in range(n)]
        x_values = np.array(x_values)
        #print(f'current testing assortment',unseen_assortment)
        return [model.objVal,[y[i].x for i in range(len(unseen_assortment))], model.Runtime,used_time]
    else:
        print('RUM CI: No solution found or an error occurred.')
        return [-1]
    

In [21]:
def kendall_tau_distance(values1, values2):
    """Compute the Kendall tau distance."""
    num = len(values1)
    assert len(values2) == num, "Both lists have to be of equal length"
    i, j = np.meshgrid(np.arange(num), np.arange(num))
    a = np.argsort(values1)
    b = np.argsort(values2)
    ndisordered = np.logical_or(np.logical_and(a[i] < a[j], b[i] > b[j]), np.logical_and(a[i] > a[j], b[i] < b[j])).sum()
    return ndisordered/2 

In [22]:
## record the kendall tau distance between the predicted ranking of rum and the truth
all_rum_lb_distance_list = []  

## record the true revenue of the predicted best assortment
all_best_true_revenue_list = []
all_best_revenue_rum_lb = []  

## record the difference in best assortment
all_rum_lb_best_rev_diff = []

## record all the true and predicted revenues
all_true_revenue_list = []
all_rum_lb_revenue_list = []

## choice probability under the revenue prediction 
''' all_true_choice_prob_list = [] 
all_rum_lb_rev_prob_list = []

all_rum_lb_sales_list = [] '''

all_true_ranking = []
all_rum_lb_ranking = []

## record the runtime of rum prediction
all_rum_rev_lb_runtime = []
all_rum_actual_runtime = []

all_infeasible_collection = []

for i in range(len(all_full_collections)):
    
    
    # define as a container for quantity of the same collection size 
    # The following lists are of the same size as the instance size
    
    true_best_rev_collection = [] # a container for groundtruth best reveue 
    rum_lb_best_rev_collection = [] # a container for the best revenue predicted by robust rum
    
    true_ranking_collection = [] # a container for groundtruth reveue ranking 
    rum_lb_ranking_collection = [] # a container for ranking by robust rum
    
    rum_lb_distance_collection = [] # a container for kendall tau distance between true ranking and the ranking by robust rum
   
    ''' true_choice_prob_collection = []
    rum_lb_rev_prob_collection = []
   
    rum_lb_sales_frac_collection = [] '''

    true_revenue_collection = []
    rum_lb_revenue_collection = []
   
    rum_lb_runtime_collection = []
    rum_actual_runtime_collection = []
    
    infeasible_collection = []
    
    for j in range(pred_instance_size[i]):
        print(f'testing for offertimes {pred_offer_times_list[i]}, {j}th instance')
        # the following lists are of the same size as number of testing assortments
        ''' true assortment ranking '''
        #true_choice_prob = [] 
        true_revenue = []
        
        ''' lb_rum_rev_prob = []
        lb_rum_sales = [] '''
        lb_rum_revenue = [] 
        lb_rum_runtime = []
        actual_runtime = [] 
        
        produts_notin_collection = find_product_notin_assortment(all_train_collection[i][j],n)
        train_element_indexes,train_perm_collection = find_element_permutation_indexes(n, all_train_collection[i][j])
        test_element_indexes, test_perm_collection = find_element_permutation_indexes(n, all_test_collection[i][j])
        
        flag = 0
        for k in range(len(all_test_collection[i][j])):
            
            unseen_assortment = all_test_collection[i][j][k]
            
            
            
            # rum revenue prediction
            # robust_rum_prediction_w_ci(lb,ub,data,collection,perm_collection,produts_notin_collection,unseen_assortment,unseen_assortment_perm,price)
            curr_rum_lb_result = robust_rum_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],train_perm_collection,produts_notin_collection,unseen_assortment,test_perm_collection[k],price)

            if curr_rum_lb_result[0]<0:
                flag=1
                break
            else:
                ## record the revenue result
                lb_rum_revenue.append(curr_rum_lb_result[0])
            
                ''' ## record the choice probability result
                lb_prob = np.zeros(n)
                lb_prob_result = curr_rum_lb_result[1]
                
                for l in range(len(unseen_assortment)):
                    lb_prob[unseen_assortment[l]] = lb_prob_result[l]
            
                lb_rum_rev_prob.append(lb_prob) '''
        
                ## record the runtime result
                lb_rum_runtime.append(curr_rum_lb_result[2])
                actual_runtime.append(curr_rum_lb_result[-1])
                
                
                ''' ## rum choice probability interval prediction
                lb_sales_frac = np.zeros(n)

                for l in range(len(unseen_assortment)):
                    unit_price = np.zeros(n)
                    unit_price[unseen_assortment[l]] = 1
                    ## unit price is a vector that takes value 1 only at unseen_assortment[l] and all zeros for other n-1 elements
                    lb_sales_frac[unseen_assortment[l]] = robust_rum_prediction_w_ci(all_train_lb[i][j],all_train_ub[i][j],all_train_instances[i][j],all_train_collection[i][j],train_perm_collection,produts_notin_collection,unseen_assortment,test_perm_collection[k],unit_price)[0]
                
                lb_rum_sales.append(lb_sales_frac) '''
            # true revenue and true choice probabilities for current testing assortment
            curr_revenue = np.dot(all_test_instance[i][j][:,k],price)
            true_revenue.append(curr_revenue)
            #true_choice_prob.append(all_test_instance[i][j][:,k])
        if flag ==0:          
            true_revenue_collection.append(true_revenue)
            #true_choice_prob_collection.append(true_choice_prob)
            
            rum_lb_revenue_collection.append(lb_rum_revenue)   
            #rum_lb_rev_prob_collection.append(lb_rum_rev_prob)
            
            rum_lb_runtime_collection.append(lb_rum_runtime)
            rum_actual_runtime_collection.append(actual_runtime)
            ## sales fraction record
            #rum_lb_sales_frac_collection.append(lb_rum_sales)
            
            # true assortment ranking for test instance [i][j]
            curr_true_rank = np.argsort(-np.array(true_revenue))  
            true_ranking_collection.append(curr_true_rank)
            # true best assortment revenue for test instance [i][j]
            true_best_rev_collection.append(true_revenue[curr_true_rank[0]])
            
            # lb rum assortment ranking for test instance [i][j] 
            curr_rum_lb_rank  = np.argsort(-np.array(lb_rum_revenue)) 
            rum_lb_ranking_collection.append(curr_rum_lb_rank)
            rum_lb_best_rev_collection.append(true_revenue[curr_rum_lb_rank[0]])
            
            #  rum kendallTau Distance for current instance [i][j]
            curr_rum_lb_dist = kendall_tau_distance(curr_true_rank, curr_rum_lb_rank)
            rum_lb_distance_collection.append(curr_rum_lb_dist)
        else:
            infeasible_collection.append(flag)
    # for each test instance [i][j] kendall tau distance is a number
    all_rum_lb_distance_list.append(rum_lb_distance_collection)
    
    # for each test instance [i][j] best revenue is a number
    all_best_true_revenue_list.append(true_best_rev_collection) 
    all_best_revenue_rum_lb.append(rum_lb_best_rev_collection)
    
    # for each test instance [i][j] best revenue difference is a number
    all_rum_lb_best_rev_diff.append((np.array(true_best_rev_collection) - np.array(rum_lb_best_rev_collection))/ np.array(true_best_rev_collection))
    
    # for each test instance [i][j], true revenue is a list of the same size as the number of testing assortments
    all_true_revenue_list.append(true_revenue_collection)
    #all_true_choice_prob_list.append(true_choice_prob_collection)
    all_rum_lb_revenue_list.append(rum_lb_revenue_collection)
    #all_rum_lb_rev_prob_list.append(rum_lb_rev_prob_collection)
    
    #all_rum_lb_sales_list.append(rum_lb_sales_frac_collection)
    
    # for each test instance [i][j], true ranking is a list of the same size as the number of testing assortments
    all_true_ranking.append(true_ranking_collection)
    all_rum_lb_ranking.append(rum_lb_ranking_collection)

    ## for each test instance [i][j], runtime is a list of the same size as the number of testing assortments
    all_rum_rev_lb_runtime.append(rum_lb_runtime_collection)
    all_rum_actual_runtime.append(rum_actual_runtime_collection)

    all_infeasible_collection.append(infeasible_collection)  

testing for offertimes 20, 0th instance
Academic license - for non-commercial use only - expires 2024-12-10
Using license file /Users/autumn/gurobi.lic
RUM robust prediction revenue of assortment [0, 5] is : 0.21301541755419617 with runtime 108.67249917984009
RUM robust prediction revenue of assortment [0, 3, 7] is : 0.18475453630370198 with runtime 100.53817796707153
RUM robust prediction revenue of assortment [0, 5, 6] is : 0.11052545238995223 with runtime 98.72902703285217
RUM robust prediction revenue of assortment [0, 5, 8] is : 0.21301541755419615 with runtime 98.79533004760742
RUM robust prediction revenue of assortment [0, 6, 8] is : 0.08815161249490386 with runtime 97.67918300628662
testing for offertimes 20, 1th instance
RUM robust prediction revenue of assortment [0, 3] is : 0.08645124779767498 with runtime 97.11691403388977
RUM robust prediction revenue of assortment [0, 4] is : 0.2724113161001463 with runtime 98.30032181739807
RUM robust prediction revenue of assortment [0

In [23]:
## adding details 1
for i in range(len(pred_offer_times_list)):
    df_rum_details1 = pd.DataFrame({'ins_idx':list(range(pred_instance_size[i]-len(all_infeasible_collection[i]))),'rum_lb_dist':all_rum_lb_distance_list[i],
                                    'true_best_rev':all_best_true_revenue_list[i],'rum_lb_best_rev':all_best_revenue_rum_lb[i],
                                    'rum_lb_best_rev_diff':all_rum_lb_best_rev_diff[i]})
    df_rum_details1.to_csv('prediction/rum/revenue_prediction/details1/'+str(pred_offer_times_list[i])+'.csv')

In [24]:
## adding details 2 about ranking, revenue, choice probability
for i in range(len(pred_offer_times_list)):
    for j in range(pred_instance_size[i]-len(all_infeasible_collection[i])):
        
        true_rank = all_true_ranking[i][j]
        lb_rank = all_rum_lb_ranking[i][j]
        
        df_rank = pd.DataFrame({'true_rank':true_rank,'lb_rank':lb_rank})
        df_rank.to_csv('prediction/rum/revenue_prediction/details2/ranking/offertimes'+str(pred_offer_times_list[i])+'/rank_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        true_rev = all_true_revenue_list[i][j]
        lb_rev = all_rum_lb_revenue_list[i][j]
        
        df_rev = pd.DataFrame({'true_rev':true_rev,'lb_rev':lb_rev})
        df_rev.to_csv('prediction/rum/revenue_prediction/details2/revenue/offertimes'+str(pred_offer_times_list[i])+'/rev_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        ''' true_prob = all_true_choice_prob_list[i][j]
        lb_prob = all_rum_lb_rev_prob_list[i][j]
        
        
        df_true_prob = pd.DataFrame(true_prob).T
        df_lb_prob = pd.DataFrame(lb_prob).T
       
        
        df_true_prob.to_csv('prediction/rum/revenue_prediction/details2/probabilities/offertimes'+str(pred_offer_times_list[i])+'/true/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_lb_prob.to_csv('prediction/rum/revenue_prediction/details2/probabilities/offertimes'+str(pred_offer_times_list[i])+'/rum_lb/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        '''
        lb_runtime = all_rum_rev_lb_runtime[i][j]
       
        df_runtime = pd.DataFrame({'lb_runtime':lb_runtime})
        df_runtime.to_csv('prediction/rum/revenue_prediction/details2/runtime/offertimes'+str(pred_offer_times_list[i])+'/runtime_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        actual_runtime = all_rum_actual_runtime[i][j]
        df_actual_runtime = pd.DataFrame({'rum_runtime':actual_runtime})
        df_actual_runtime.to_csv('prediction/rum/revenue_prediction/details2/actual_runtime/offertimes'+str(pred_offer_times_list[i])+'/runtime_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        
        ''' ## using sales fraction prediction
        ## record the choice probability prediction interval
        lb_sales = all_rum_lb_sales_list[i][j]
     
        df_lb_sales = pd.DataFrame(lb_sales).T
      
        
        df_true_prob.to_csv('prediction/rum/prob_prediction/offertimes'+str(pred_offer_times_list[i])+'/true/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
        df_lb_sales.to_csv('prediction/rum/prob_prediction/offertimes'+str(pred_offer_times_list[i])+'/rum_lb/prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')
         '''

In [25]:
## adding details 2 about runtime 
all_lb_avg_runtime = []

for i in range(len(pred_offer_times_list)):
    
    avg_lb_runtime_collection = []

    for j in range(pred_instance_size[i]-len(all_infeasible_collection[i])):
        avg_lb_runtime_collection.append(np.mean(np.array(all_rum_rev_lb_runtime[i][j])))
   
    all_lb_avg_runtime.append(np.mean(np.array(avg_lb_runtime_collection)))


In [26]:
## adding details 2 about runtime 
all_actual_avg_runtime = []

for i in range(len(pred_offer_times_list)):
    
    avg_actual_runtime_collection = []

    for j in range(pred_instance_size[i]-len(all_infeasible_collection[i])):
        avg_actual_runtime_collection.append(np.mean(np.array(all_rum_actual_runtime[i][j])))
   
    all_actual_avg_runtime.append(np.mean(np.array(avg_actual_runtime_collection)))

In [27]:
avg_rum_lb_dist = []

avg_rum_lb_best_rev_diff = []

avg_infeasible_prop = []

for i in range(len(pred_offer_times_list)):
    avg_rum_lb_dist.append(np.mean(all_rum_lb_distance_list[i]))
 
    avg_rum_lb_best_rev_diff.append(np.mean(all_rum_lb_best_rev_diff[i]))
    
    avg_infeasible_prop.append(len(all_infeasible_collection[i])/pred_instance_size[i])

df_rum_jd_summary = pd.DataFrame({'offertimes':pred_offer_times_list,'train_size':pred_train_collection_size,'test_size':pred_test_collection_size,
                                  'infeas_prop':avg_infeasible_prop,
                                   'avg_lb_dist':avg_rum_lb_dist,
                                   'avg_lb_best_rev_diff':avg_rum_lb_best_rev_diff,
                                   'avg_lb_runtime':all_lb_avg_runtime,
                                   'rum_actual_runtime':all_actual_avg_runtime})
df_rum_jd_summary.to_csv('prediction/rum/jd_rum_summary.csv')

In [28]:
df_rum_jd_summary

Unnamed: 0,offertimes,train_size,test_size,infeas_prop,avg_lb_dist,avg_lb_best_rev_diff,avg_lb_runtime,rum_actual_runtime
0,20,24,5,0.08,3.173913,0.095302,1.554812,88.028483
1,30,20,4,0.0,1.84,0.155433,1.366669,76.525321
2,40,16,3,0.0,0.86,0.112663,1.289146,70.492162
3,50,12,3,0.0,0.92,0.123555,1.105594,56.37482
4,60,11,2,0.0,0.28,0.100679,1.084863,54.67755


In [29]:
def rum_limit(data,collection,perm_collection,produts_notin_collection,offer_times):
    
    start_time = time.time()
    n,m = data.shape # number of products
    fact = math.factorial(n) # number of permutations
    
    model = gp.Model('rum') #define a model
    model.setParam('OutputFlag', 0)
    
    # define nonnegative continuous variables for lambda, the probability of each permutation
    lam = model.addVars(fact,lb=0.0, vtype=gp.GRB.CONTINUOUS, name='lam') 
    # define nonnegative continuous variables for choice probability of each product in each assortment
    x = model.addVars(n, m, vtype=gp.GRB.CONTINUOUS, lb=0, name="x") 
    # define the variables for absolute value
    y = model.addVars(n, m, vtype=gp.GRB.CONTINUOUS, lb=0, name="y")
    #normalization constraint of lambda
    model.addConstr(sum(lam[i] for i in range(fact))==1)
    
    # caculate RUM choice probability for each product in the assortment
    for i in range(len(collection)):
        # normalization constraint for each assortment. 
        model.addConstr(gp.quicksum(x[k, i] for k in collection[i]) == 1)
        for j in range(len(collection[i])):
            # constraints for RUM choice probabilities 
            model.addConstr(sum(lam[k] for k in perm_collection[i][j])- x[collection[i][j],i]==0)
            # constraints for the absolute value
            model.addConstr(x[collection[i][j],i] - data[collection[i][j]][i] - y[collection[i][j],i] <=0)
            model.addConstr(data[collection[i][j]][i] - x[collection[i][j],i] -  y[collection[i][j],i] <=0)
        for k in produts_notin_collection[i]:
            # constraints for products that are not in the assortments
            model.addConstr(x[k,i] == 0)
            model.addConstr(y[k,i] == 0)
    
    #model.setObjective(gp.quicksum(offer_times[j]*data[i][j] * y[i, j] for i in range(n) for j in range(m)/sum(offer_times)), sense=gp.GRB.MINIMIZE)
    #model.setObjective(0)
    # Define the modified objective function using auxiliary variables
    obj = (sum(offer_times[j]*data[i, j] *  y[i, j]  for i in range(n) for j in range(m)))
    #obj = (sum(offer_times[j]*data[i][j] * abs_vars[i, j] for i in range(n) for j in range(m))/sum(offer_times))

    model.setObjective(obj, GRB.MINIMIZE)
    
    model.optimize()
    status = model.status
    
    # Access the optimal solution
    used_time = time.time() - start_time
    if model.status == gp.GRB.OPTIMAL:
        print('Optimal solution found!')
        print(f'RUM L1 Norm Loss = {model.objVal}')
        x_values = [[x[i, j].X for j in range(m)] for i in range(n)]
        x_values = np.array(x_values)
            
        return [model.objVal,x_values,model.Runtime,used_time]
    else:
        print("Gurobi optimization status:", status)
        print('No solution found or an error occurred.')
       
    
    
    

In [30]:


## record the limit loss of all instances
all_rum_limit_loss = []
all_rum_limit_prob = []
all_rum_limit_runtime = []


for i in range(len(pred_offer_times_list)):


    ## define container for rum limit 
    rum_limit_loss_collection =[]
    rum_limit_runtime_collection =[]
    rum_limit_probability_collection =[]
       
    for j in range(pred_instance_size[i]):
        print(f'testing instance with offertimes {pred_offer_times_list[i]} : {j}th ')
        ####### rum representability check ##########
        

        element_indexes,perm_collection = find_element_permutation_indexes(n, all_full_collections[i])
        products_notin_collection = find_product_notin_assortment(all_full_collections[i] ,n)
        curr_limit_result = rum_limit(all_full_instances[i][j],all_full_collections[i],perm_collection,products_notin_collection,all_full_offertimes[i])
        # return [model.objVal,x_values,model.Runtime,used_time]
        
        curr_limit_prob = curr_limit_result[1]
        rum_limit_probability_collection.append(curr_limit_prob)
        rum_limit_loss_collection.append(curr_limit_result[0])
        rum_limit_runtime_collection.append(curr_limit_result[-1])
    
    ## for each train instance [i][j] limit loss result is a number
    all_rum_limit_loss.append(rum_limit_loss_collection)
    all_rum_limit_runtime.append(rum_limit_runtime_collection)
    
    ## for each train instance [i][j] limit probability is a matrix
    all_rum_limit_prob.append(rum_limit_probability_collection)

testing instance with offertimes 20 : 0th 
Optimal solution found!
RUM L1 Norm Loss = 43.55688946128886
testing instance with offertimes 20 : 1th 
Optimal solution found!
RUM L1 Norm Loss = 51.0668287300729
testing instance with offertimes 20 : 2th 
Optimal solution found!
RUM L1 Norm Loss = 40.794478251347215
testing instance with offertimes 20 : 3th 
Optimal solution found!
RUM L1 Norm Loss = 41.38337163051647
testing instance with offertimes 20 : 4th 
Optimal solution found!
RUM L1 Norm Loss = 30.12423043963387
testing instance with offertimes 20 : 5th 
Optimal solution found!
RUM L1 Norm Loss = 26.7237286497574
testing instance with offertimes 20 : 6th 
Optimal solution found!
RUM L1 Norm Loss = 26.876106046812133
testing instance with offertimes 20 : 7th 
Optimal solution found!
RUM L1 Norm Loss = 32.36743131543703
testing instance with offertimes 20 : 8th 
Optimal solution found!
RUM L1 Norm Loss = 40.1775679505382
testing instance with offertimes 20 : 9th 
Optimal solution found

In [31]:
## adding limit results
for i in range(len(pred_offer_times_list)):
    df_rum_loss = pd.DataFrame({'ins_idx':list(range(pred_instance_size[i])),'rum_loss':all_rum_limit_loss[i], 'rum_limit_time':all_rum_limit_runtime[i]})
    df_rum_loss.to_csv('limit/rum/limit/'+str(pred_offer_times_list[i])+'.csv')
    
    for j in range(pred_instance_size[i]):
            df_limit_prob = pd.DataFrame(all_rum_limit_prob[i][j])
            df_limit_prob.to_csv('limit/rum/limit/limit_prob/offertimes'+str(pred_offer_times_list[i])+'/limit_prob_'+str(pred_offer_times_list[i])+'_'+str(j)+'.csv')

In [32]:
all_avg_loss = []
for i in range(len(pred_offer_times_list)):
    all_avg_loss.append(all_rum_limit_loss[i]/sum(all_full_offertimes[i]))

In [33]:


avg_total_limit_loss = []
avg_toal_limit_loss_se = []
avg_rum_limit_runtime = []

#avg_loss = []
avg_loss_se = []

for i in range(len(pred_offer_times_list)):

    
    avg_total_limit_loss.append(np.mean(all_rum_limit_loss[i]))
    avg_rum_limit_runtime.append(np.sum(all_rum_limit_runtime[i]))
    avg_toal_limit_loss_se.append(np.std(all_rum_limit_loss[i])/np.sqrt(len(all_rum_limit_loss[i])))
    
    #avg_loss = np.mean(all_avg_loss[i])
    avg_loss_se.append(np.std(all_avg_loss[i])/np.sqrt(len(all_avg_loss[i])))

avg_loss = []
for i in range(len(avg_total_limit_loss)):
    avg_loss.append(avg_total_limit_loss[i]/sum(all_full_offertimes[i]))

df_rum_limit = pd.DataFrame({'offertimes':pred_offer_times_list,
                                   'total_rum_loss':avg_total_limit_loss,'total_rum_loss_se':avg_toal_limit_loss_se,
                                   'avg_rum_loss':avg_loss,'avg_rum_loss_se':avg_loss_se,
                                   'avg_rum_limit_runtime':avg_rum_limit_runtime
                                   })
df_rum_limit.to_csv('limit/rum/rum_limit_summary.csv')
df_rum_limit

Unnamed: 0,offertimes,total_rum_loss,total_rum_loss_se,avg_rum_loss,avg_rum_loss_se,avg_rum_limit_runtime
0,20,36.507508,1.310636,0.004769,0.000171,5395.049991
1,30,28.101991,1.229636,0.003732,0.000163,5301.423036
2,40,20.287403,1.12727,0.002756,0.000153,3703.925786
3,50,13.606598,0.909836,0.001892,0.000127,3395.35266
4,60,10.907307,0.870728,0.00154,0.000123,3191.300611


In [34]:
all_rum_rep = []
avg_rep = []
for i in range(len(pred_offer_times_list)):
    rum_rep_collection = []

    for j in range(pred_instance_size[i]):
        if all_rum_limit_loss[i][j] <1e-6:
            rum_rep_collection.append(1)
        else:
            rum_rep_collection.append(0)
    all_rum_rep.append(rum_rep_collection)
    avg_rep.append(np.mean(rum_rep_collection))
    df_rum_rep = pd.DataFrame({'ins_idx':list(range(pred_instance_size[i])),'rum_rep':rum_rep_collection})
    df_rum_rep.to_csv('limit/rum/representability/'+str(pred_offer_times_list[i])+'.csv')


In [35]:
avg_rep

[0.0, 0.0, 0.0, 0.0, 0.0]

In [36]:
df_rum_limit['rum_rep'] = avg_rep
df_rum_limit

Unnamed: 0,offertimes,total_rum_loss,total_rum_loss_se,avg_rum_loss,avg_rum_loss_se,avg_rum_limit_runtime,rum_rep
0,20,36.507508,1.310636,0.004769,0.000171,5395.049991,0.0
1,30,28.101991,1.229636,0.003732,0.000163,5301.423036,0.0
2,40,20.287403,1.12727,0.002756,0.000153,3703.925786,0.0
3,50,13.606598,0.909836,0.001892,0.000127,3395.35266,0.0
4,60,10.907307,0.870728,0.00154,0.000123,3191.300611,0.0


In [37]:
df_rum_limit.to_csv('limit/rum/rum_limit_summary.csv')

In [38]:
for i in range(len(avg_total_limit_loss)):
    print(avg_total_limit_loss[i]/sum(all_full_offertimes[i]))

0.004769106237065451
0.0037324997609695753
0.002756066197675386
0.0018924336786164508
0.0015401450497384522


In [45]:
all_rum_kl_loss = []
all_rum_avg_kl_loss = []
for i in range(len(pred_offer_times_list)): 
    rum_kl_loss_collection = []
    rum_avg_kl_loss_collection = []
    for j in range(pred_instance_size[i]):
        limit_prob = all_rum_limit_prob[i][j]
        data = all_full_instances[i][j]
        curr_kl_loss = 0
        for x in range(data.shape[0]):
            for y in range(data.shape[1]):
                if data[x][y]>0 and limit_prob[x][y]>0:
                    curr_kl_loss = curr_kl_loss + all_full_offertimes[i][y]*data[x][y]*np.log(limit_prob[x][y])
        rum_kl_loss_collection.append(-curr_kl_loss)
        rum_avg_kl_loss_collection.append(-curr_kl_loss/sum(all_full_offertimes[i]))
    df_rum_loss = pd.DataFrame({'ins_idx':list(range(pred_instance_size[i])),'rum_loss':rum_kl_loss_collection,'avg_loss':rum_avg_kl_loss_collection})
    df_rum_loss.to_csv('limit/rum/limit_kl/'+str(pred_offer_times_list[i])+'.csv')
    
    all_rum_kl_loss.append(rum_kl_loss_collection) 
    all_rum_avg_kl_loss.append(rum_avg_kl_loss_collection)

In [46]:
avg_total_kl_loss = []
avg_total_kl_loss_se = []
avg_avg_kl_loss = []
avg_avg_kl_loss_se = []

for i in range(len(pred_offer_times_list)):
    avg_total_kl_loss.append(np.mean(all_rum_kl_loss[i]))
    avg_total_kl_loss_se.append(np.std(all_rum_kl_loss[i])/np.sqrt(len(all_rum_kl_loss[i])))
    
    avg_avg_kl_loss.append(np.mean(all_rum_avg_kl_loss[i]))
    avg_avg_kl_loss_se.append(np.std(all_rum_avg_kl_loss[i])/np.sqrt(len(all_rum_avg_kl_loss[i])))
    
df_rum_kl = pd.DataFrame({'offertimes':pred_offer_times_list,
                                   'total_rum_kl_loss':avg_total_kl_loss,'total_rum_kl_loss_se':avg_total_kl_loss_se,
                                   'avg_rum_kl_loss':avg_avg_kl_loss,'avg_rum_kl_loss_se':avg_avg_kl_loss_se
                                   })
df_rum_kl.to_csv('limit/rum/rum_kl_summary.csv')
df_rum_kl 

Unnamed: 0,offertimes,total_rum_kl_loss,total_rum_kl_loss_se,avg_rum_kl_loss,avg_rum_kl_loss_se
0,20,4781.325275,26.629818,0.624602,0.003479
1,30,4686.513279,25.945882,0.622462,0.003446
2,40,4563.434784,25.024783,0.619948,0.0034
3,50,4436.457662,24.388246,0.617032,0.003392
4,60,4356.648783,23.906923,0.615172,0.003376
