In [1]:
import matplotlib.pyplot as plt
import numpy as np
from order_fulfillment_environment_notidentical_arrival_probs import OrderFulfillment
from cluster_fulfillment_policy_notidentical_arrival_probs import FulfillmentPolicy
from LP_fulfillment_notidentical_arrival_probs import SolvingLP
from tqdm import tqdm

# Small Network $J=10, K=5, n=20, T=10^5$

Moreover:
- $n = 20$ (number of items)
- $n_{max} \in \{2,5,10\}$
- $n_0 = 5$
- $p_{stock} = 0.75$

### $n_{max} = 2$

In [2]:
fulfillment_instance = OrderFulfillment(num_items=20, n_max=2, n_0=5,
                                        p_stock=0.75, T=100, CSL=0.5,
                                        facilities_data="Data/fulfillment_centers_warmup_test.csv", 
                                        cities_data="Data/cities_warmup_test.csv",
                                        prob_seed_value=1,
                                        order_seed_value=1,
                                        inv_seed_value=1,
                                        alpha=0.5
                                        )

In [3]:
# solving_LP_instance_base = SolvingLP(fulfillment_instance)
# LP_solution_base, methods_base, sizes_base, num_vars_base, num_constrs_base, optimal_value_base, our_optimization_duration = solving_LP_instance_base.optimize_LP_relaxation()
# solving_LP_instance_base.calculate_probabilities_of_consumption(LP_solution_base)

In [7]:
fulfillment_instance.demand_distribution_by_type_by_location

[[[0.05169900184600667, 0.034681830436464944]],
 [[0.05772727377779248, 0.03872584477905995],
  [0.09970990662099806, 0.06688953269476557],
  [1.9603696553507875e-05, 1.3150971112011687e-05],
  [0.04185215076897491, 0.02807615513921322],
  [0.020317481736964887, 0.013629807759079933]],
 [[0.01937185728281025, 0.012995443732608908],
  [0.03907191637731537, 0.026211058825896055],
  [0.07248519955643588, 0.04862607227229888],
  [0.08322581478492458, 0.05583132155829706],
  [0.11302064703857077, 0.07581892834385613]]]

In [11]:
fulfillment_instance.agg_adjusted_demand_distribution_by_type_by_location

[[[5.169900184600667, 3.468183043646495]],
 [[5.7727273777792485, 3.872584477905996],
  [9.970990662099807, 6.688953269476558],
  [0.001960369655350788, 0.0013150971112011688],
  [4.185215076897491, 2.807615513921322],
  [2.031748173696489, 1.3629807759079935]],
 [[1.9371857282810254, 1.299544373260891],
  [3.907191637731538, 2.621105882589606],
  [7.24851995564359, 4.862607227229889],
  [8.32258147849246, 5.5831321558297065],
  [11.302064703857079, 7.581892834385615]]]

In [9]:
fulfillment_instance.reshape_adjusted_arrival_prob

[[0.051699001846006676,
  0.03468183043646495,
  0.057727273777792486,
  0.03872584477905996,
  0.09970990662099807,
  0.06688953269476558,
  1.9603696553507878e-05,
  1.3150971112011688e-05,
  0.041852150768974916,
  0.028076155139213223,
  0.02031748173696489,
  0.013629807759079935,
  0.019371857282810254,
  0.01299544373260891,
  0.03907191637731538,
  0.02621105882589606,
  0.0724851995564359,
  0.048626072272298884,
  0.08322581478492459,
  0.055831321558297065,
  0.11302064703857079,
  0.07581892834385615],
 [0.051699001846006676,
  0.03468183043646495,
  0.057727273777792486,
  0.03872584477905996,
  0.09970990662099807,
  0.06688953269476558,
  1.9603696553507878e-05,
  1.3150971112011688e-05,
  0.041852150768974916,
  0.028076155139213223,
  0.02031748173696489,
  0.013629807759079935,
  0.019371857282810254,
  0.01299544373260891,
  0.03907191637731538,
  0.02621105882589606,
  0.0724851995564359,
  0.048626072272298884,
  0.08322581478492459,
  0.055831321558297065,
  0.113

In [10]:
fulfillment_instance.reshape_agg_adjusted_arrival_prob

[5.169900184600667,
 3.468183043646495,
 5.7727273777792485,
 3.872584477905996,
 9.970990662099807,
 6.688953269476558,
 0.001960369655350788,
 0.0013150971112011688,
 4.185215076897491,
 2.807615513921322,
 2.031748173696489,
 1.3629807759079935,
 1.9371857282810254,
 1.299544373260891,
 3.907191637731538,
 2.621105882589606,
 7.24851995564359,
 4.862607227229889,
 8.32258147849246,
 5.5831321558297065,
 11.302064703857079,
 7.581892834385615]

In [None]:
fulfillment_instance.agg_adjusted_demand_distribution_by_type_by_location

In [None]:
def sum_nested_numbers(nested_list):
    total = 0
    for element in nested_list:
        if isinstance(element, list):
            total += sum_nested_numbers(element)
        else:
            total += element
    return total

In [None]:
sum_nested_numbers(lista)

In [None]:
# fulfillment_instance.order_types

In [None]:
# fulfillment_instance.demand_distribution_by_type_by_location

In [None]:
# sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][0][0])

In [None]:
# sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][0][0]) + sum([sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][1][i]) for i in range(4)])

In [None]:
# fulfillment_instance.all_methods_location[-25]['methods']

In [None]:
# fulfillment_instance.all_costs[-25]

In [None]:

def evaluate_policies(conservative_prob=0.01, T=10**2, num_instances=50, num_order_sequences=50, plot=False, modified=True):

    times_our_policy_is_better = 0
    times_our_policy_equal = 0
    times_our_policy_worse = 0
    
    instances = np.arange(1, num_instances + 1)
    
    expected_cost_our_policy = []
    expected_cost_aa = []
    cost_difference = []
    
    # Accumulators for total cost
    total_cost_our_policy = 0
    total_cost_aa = 0

    times_our_policy_better = []
    times_aa_better = []
    times_same_cost = []
    
    for instance in instances:
        print('Instance', instance)
        
        fulfillment_instance = OrderFulfillment(num_items=20, n_max=5, n_0=5,
                                                p_stock=1, T=T, CSL=0.5,
                                                facilities_data="Data/fulfillment_centers_warmup_test.csv", cities_data="Data/cities_warmup_test.csv", prob_seed_value=instance, order_seed_value=instance, inv_seed_value=instance)
        # Create an instance of FulfillmentPolicy (same as fulfillment_instance in this case)
        fulfillment_policy = FulfillmentPolicy(num_items=20, n_max=5, n_0=5,
                                                p_stock=1, T=T, CSL=0.5,
                                                facilities_data="Data/fulfillment_centers_warmup_test.csv", cities_data="Data/cities_warmup_test.csv", prob_seed_value=instance, order_seed_value=instance, inv_seed_value=instance)
        # Generate magician problems (dictionary where the keys are (i,k))
        magician_problems = fulfillment_policy.generate_magician_problems(conservative_prob=conservative_prob)
        
        
        # FOR EACH INSTANCE, GENERATE DIFFERENT ORDER SEQUENCES (i.e. order arrivals through time)
        order_sequences = np.arange(1, num_order_sequences + 1) # seed_value for each order sequence

        count_fulfillment_policy = 0
        count_always_accept_policy = 0
        tie = 0

        expected_policy_cost = 0
        expected_policy_cost_aa = 0

        # for order_sequence in tqdm(order_sequences):
        for order_sequence in tqdm(order_sequences):
            
            # Initialize inventory consumption for our fulfillment policy
            if modified:
                inventory_consumption = fulfillment_policy.initialize_inventory_consumption()
                sampled_orders_index, sampled_orders, sampled_methods, accepts_decisions, fulfillment_costs = fulfillment_policy.modified_fulfillment_policy(inventory_consumption, magician_problems, seed_value=order_sequence)
                fulfillment_policy.check_consistency(inventory_consumption)
                total_fulfillment_cost = sum(fulfillment_costs)
                print(sampled_orders)
            else:
                inventory_consumption = fulfillment_policy.initialize_inventory_consumption()
                sampled_orders_index, sampled_orders, sampled_methods, accepts_decisions, fulfillment_costs = fulfillment_policy.fulfillment_policy(inventory_consumption, magician_problems, seed_value=order_sequence)
                fulfillment_policy.check_consistency(inventory_consumption)
                total_fulfillment_cost = sum(fulfillment_costs)
                print(sampled_orders)
            
            # Initialize inventory consumption for always_accept_policy
            inventory_consumption_aa = fulfillment_policy.initialize_inventory_consumption()
            sampled_orders_index_aa, sampled_orders_aa, sampled_methods_aa, accepts_decisions_aa, fulfillment_costs_aa = fulfillment_policy.always_accept_policy(inventory_consumption_aa, seed_value=order_sequence)
            fulfillment_policy.check_consistency(inventory_consumption)
            total_always_accept_cost = sum(fulfillment_costs_aa)
            print(sampled_orders_aa)

            expected_policy_cost += total_fulfillment_cost
            expected_policy_cost_aa += total_always_accept_cost

            # Check which policy has lower costs and update counters accordingly
            if total_fulfillment_cost < total_always_accept_cost:
                count_fulfillment_policy += 1
            elif total_always_accept_cost < total_fulfillment_cost:
                count_always_accept_policy += 1
            elif total_fulfillment_cost == total_always_accept_cost:
                tie += 1

        expected_policy_cost = round(expected_policy_cost/num_order_sequences,2) # expected cost over the number of order sequences
        expected_policy_cost_aa = round(expected_policy_cost_aa/num_order_sequences,2) # expected cost over the number of order sequences

        total_cost_our_policy += expected_policy_cost
        total_cost_aa += expected_policy_cost_aa

        if expected_policy_cost < expected_policy_cost_aa:
            times_our_policy_is_better += 1
        elif expected_policy_cost > expected_policy_cost_aa:
            times_our_policy_worse += 1
        elif expected_policy_cost == expected_policy_cost_aa:
            times_our_policy_equal += 1
            
        expected_cost_our_policy.append(expected_policy_cost)
        expected_cost_aa.append(expected_policy_cost_aa)
        cost_difference.append(expected_policy_cost-expected_policy_cost_aa)
        
        times_our_policy_better.append(count_fulfillment_policy)
        times_aa_better.append(count_always_accept_policy)
        times_same_cost.append(tie)
    
    # Calculate average expected costs across instances
    average_cost_our_policy = total_cost_our_policy / num_instances
    average_cost_aa = total_cost_aa / num_instances
            
    if plot:
        plt.figure(figsize=(10, 6))
        plt.plot(instances, cost_difference, label='Difference our_policy-aa_policy', marker='o')
        plt.xlabel('Instance')
        plt.ylabel('Cost Difference')
        plt.title('Difference in Expected Total Cost vs. Instance')
        plt.legend()
        plt.grid(True)
        plt.show()

    results = {
        "avg_cost_our_policy_over_instances": average_cost_our_policy,
        "avg_cost_aa_over_instances": average_cost_aa,
        "expected_cost_our_policy_per_instance": expected_cost_our_policy,
        "expected_cost_aa_per_instance": expected_cost_aa,
        "expected_cost_difference_per_instance": cost_difference,
        "percent_better": times_our_policy_is_better / num_instances * 100,
        "percent_equal": times_our_policy_equal / num_instances * 100,
        "percent_worse": times_our_policy_worse / num_instances * 100
    }
    
    # with open(f'results_{T}.txt', 'w') as f:
    #     f.write("Expected cost of our policy: " + str(avg_cost_our_policy) + "\n")
    #     f.write("Expected cost of always_accept_policy: " + str(avg_cost_aa) + "\n")
    #     f.write("Number of times magician-based fulfillment policy is better: " + str(times_our_policy_better) + "\n")
    #     f.write("Number of times always_accept_policy is better: " + str(times_aa_better) + "\n")
    #     f.write("Policies have the same cost: " + str(times_same_cost) + "\n")
    
    return results


In [None]:
evaluate_policies(conservative_prob=0.01, T=8, num_instances=5, num_order_sequences=1, plot=True, modified=True)

In [None]:
evaluate_policies(conservative_prob=0.0001, T=3000, num_instances=50, num_order_sequences=50, plot=True, modified=False)

In [None]:
def find_best_conservative_prob(start=0.01, stop=1.0, step=0.01, **kwargs):
    best_prob = start
    results = evaluate_policies(conservative_prob=start, **kwargs)
    best_times_our_policy_better_or_equal = results['percent_better'] + results['percent_equal']
    best_average_cost_diff = results["avg_cost_our_policy_over_instances"] - results["avg_cost_aa_over_instances"]

    for prob in np.arange(start + step, stop + step, step):
        results = evaluate_policies(conservative_prob=prob, **kwargs)
        current_times_our_policy_better_or_equal = results['percent_better'] + results['percent_equal']
        current_average_cost_diff = results["avg_cost_our_policy_over_instances"] - results["avg_cost_aa_over_instances"]
        
        # Update the best_prob if the current probability results in our policy being at least as often better or equal than before
        # and if the average cost of our policy is greater than the average cost of the always_accept_policy
        if current_times_our_policy_better_or_equal >= best_times_our_policy_better_or_equal and current_average_cost_diff > best_average_cost_diff and prob > best_prob:
            best_prob = prob
            best_times_our_policy_better_or_equal = current_times_our_policy_better_or_equal
            best_average_cost_diff = current_average_cost_diff
            
    return best_prob


best_prob = find_best_conservative_prob()
print(f"The 'best' conservative_prob based on the new criteria is: {best_prob}")


In [None]:
evaluate_policies(conservative_prob=best_prob, T=10**1, num_instances=50, num_order_sequences=50, plot=True, modified=True)