In [1]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from order_fulfillment_environment_notidentical_arrival_probs import OrderFulfillment
from fulfillment_policy_notidentical_arrival_probs import FulfillmentPolicy
from tqdm import tqdm

# Small Network $J=10, K=5, n=20, T=10^5$

Moreover:
- $n = 20$ (number of items)
- $n_{max} \in \{2,5,10\}$
- $n_0 = 5$
- $p_{stock} = 0.75$

### $n_{max} = 2$

In [2]:
# fulfillment_instance = OrderFulfillment(num_items=20, n_max=5, n_0=5,
#                                         p_stock=1, T=100, CSL=0.5,
#                                         facilities_data="Data/fulfillment_centers_warmup_test.csv", 
#                                         cities_data="Data/cities_warmup_test.csv",
#                                         prob_seed_value=11,
#                                         order_seed_value=11,
#                                         inv_seed_value=11)

# print(fulfillment_instance.fixed_costs)
# print(fulfillment_instance.unit_costs)

In [3]:
# fulfillment_instance.order_types

In [4]:
# fulfillment_instance.demand_distribution_by_type_by_location

In [5]:
# sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][0][0])

In [6]:
# sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][0][0]) + sum([sum(fulfillment_instance.demand_distribution_by_type_by_location[0:2][1][i]) for i in range(4)])

In [7]:
# fulfillment_instance.all_methods_location[-25]['methods']

In [8]:
# fulfillment_instance.all_costs[-25]

In [9]:
# def compare_lists(list1, list2):
#     """
#     Compare two lists and return all indices where they differ.
#     If they are the same, return an empty list.
#     """
#     differences = []

#     # Iterate through the lists simultaneously
#     for i, (item1, item2) in enumerate(zip(list1, list2)):
#         if item1 != item2:
#             differences.append(i)  # Add the index to the differences list

#     # Check if one list is longer than the other
#     longer_length = max(len(list1), len(list2))
#     for i in range(min(len(list1), len(list2)), longer_length):
#         differences.append(i)  # Add the indices where the longer list continues

#     return differences

In [10]:
# print(compare_lists([1,3,2,4], [1,2,3,4]))

In [18]:
def evaluate_policies(conservative_prob=0.01, T=10**2, num_instances=1, num_order_sequences=50, plot=False, modified=False, alpha=0.5):

    times_our_policy_is_better = 0
    times_our_policy_equal = 0
    times_our_policy_worse = 0
    
    instances = np.arange(1, num_instances + 1)
    
    expected_cost_our_policy = []
    expected_cost_aa = []
    cost_difference = []
    
    # Accumulators for total cost
    total_cost_our_policy = 0
    total_cost_aa = 0

    times_our_policy_better = []
    times_aa_better = []
    times_same_cost = []
    
    for instance in instances:
        
        # if instance == 1:
        #     continue
        
        print('Instance', instance)
        
        fulfillment_instance = OrderFulfillment(num_items=5, n_max=5, n_0=5,
                                                p_stock=0.75, T=T, CSL=0.5,
                                                facilities_data="Data/fulfillment_centers_warmup_test.csv", cities_data="Data/cities_warmup_test.csv", prob_seed_value=instance, order_seed_value=instance, inv_seed_value=instance, alpha=alpha)
        # Create an instance of FulfillmentPolicy (same as fulfillment_instance in this case)
        fulfillment_policy = FulfillmentPolicy(num_items=5, n_max=5, n_0=5,
                                                p_stock=0.75, T=T, CSL=0.5,
                                                facilities_data="Data/fulfillment_centers_warmup_test.csv", cities_data="Data/cities_warmup_test.csv", prob_seed_value=instance, order_seed_value=instance, inv_seed_value=instance, alpha=alpha)
        #print(fulfillment_instance.safety_stock)
        # Generate magician problems (dictionary where the keys are (i,k))
        magician_problems = fulfillment_policy.generate_magician_problems(conservative_prob=conservative_prob)
        # print(magician_problems[(4,0)]['breaking_wand_probabilities'])
        
        
        # FOR EACH INSTANCE, GENERATE DIFFERENT ORDER SEQUENCES (i.e. order arrivals through time)
        order_sequences = np.arange(1, num_order_sequences + 1) # seed_value for each order sequence

        count_fulfillment_policy = 0
        count_always_accept_policy = 0
        tie = 0

        expected_policy_cost = 0
        expected_policy_cost_aa = 0

        # Lists to store costs
        fulfillment_costs_list = []
        always_accept_costs_list = []
        
        # for order_sequence in tqdm(order_sequences):
        for order_sequence in tqdm(order_sequences):
            
            # Initialize inventory consumption for our fulfillment policy
            if modified:
                inventory_consumption = fulfillment_policy.initialize_inventory_consumption()
                sampled_orders_index, sampled_orders, sampled_methods, accepts_decisions, fulfillment_costs = fulfillment_policy.modified_fulfillment_policy(inventory_consumption, magician_problems, seed_value=order_sequence)
                fulfillment_policy.check_consistency(inventory_consumption)
                total_fulfillment_cost = sum(fulfillment_costs)
            else:
                inventory_consumption = fulfillment_policy.initialize_inventory_consumption()
                sampled_orders_index, sampled_orders, sampled_methods, accepts_decisions, fulfillment_costs = fulfillment_policy.fulfillment_policy(inventory_consumption, magician_problems, seed_value=order_sequence)
                fulfillment_policy.check_consistency(inventory_consumption)
                total_fulfillment_cost = sum(fulfillment_costs)
                
            
            # Initialize inventory consumption for always_accept_policy
            inventory_consumption_aa = fulfillment_policy.initialize_inventory_consumption()
            sampled_orders_index_aa, sampled_orders_aa, sampled_methods_aa, accepts_decisions_aa, fulfillment_costs_aa = fulfillment_policy.always_accept_policy(inventory_consumption_aa, seed_value=order_sequence)
            fulfillment_policy.check_consistency(inventory_consumption)
            total_always_accept_cost = sum(fulfillment_costs_aa)
            
            # if accepts_decisions != accepts_decisions_aa:
            #     print(compare_lists(accepts_decisions, accepts_decisions_aa))
            #     print([sampled_methods[i] for i in compare_lists(accepts_decisions, accepts_decisions_aa)])

            # Append cost of order sequence to lists
            fulfillment_costs_list.append(total_fulfillment_cost)
            always_accept_costs_list.append(total_always_accept_cost)
            
            expected_policy_cost += total_fulfillment_cost
            expected_policy_cost_aa += total_always_accept_cost

            # Check which policy has lower costs and update counters accordingly
            if total_fulfillment_cost < total_always_accept_cost:
                count_fulfillment_policy += 1
            elif total_always_accept_cost < total_fulfillment_cost:
                count_always_accept_policy += 1
            elif total_fulfillment_cost == total_always_accept_cost:
                tie += 1

        # 95% CONFIDENCE INTERVAL for the difference
        cost_differences = np.array(fulfillment_costs_list) - np.array(always_accept_costs_list)
        mean_difference = np.mean(cost_differences)
        std_deviation = np.std(cost_differences, ddof=1)
        n = len(cost_differences)
        # Calculate the standard error and the t-score for 95% confidence
        standard_error = std_deviation / np.sqrt(n)
        t_score = stats.t.ppf(0.975, df=n-1)  # two-tailed 95% confidence, so 0.975
        # Confidence interval
        confidence_interval = (mean_difference - t_score * standard_error, mean_difference + t_score * standard_error)
        
        expected_policy_cost = round(expected_policy_cost/num_order_sequences,2) # expected cost over the number of order sequences
        expected_policy_cost_aa = round(expected_policy_cost_aa/num_order_sequences,2) # expected cost over the number of order sequences

        total_cost_our_policy += expected_policy_cost
        total_cost_aa += expected_policy_cost_aa

        if expected_policy_cost < expected_policy_cost_aa:
            times_our_policy_is_better += 1
        elif expected_policy_cost > expected_policy_cost_aa:
            times_our_policy_worse += 1
        elif expected_policy_cost == expected_policy_cost_aa:
            times_our_policy_equal += 1
            
        expected_cost_our_policy.append(expected_policy_cost)
        expected_cost_aa.append(expected_policy_cost_aa)
        cost_difference.append(expected_policy_cost-expected_policy_cost_aa)
        
        times_our_policy_better.append(count_fulfillment_policy)
        times_aa_better.append(count_always_accept_policy)
        times_same_cost.append(tie)
    
    # Calculate average expected costs across instances
    average_cost_our_policy = total_cost_our_policy / num_instances
    average_cost_aa = total_cost_aa / num_instances
            
    if plot:
        plt.figure(figsize=(10, 6))
        plt.plot(instances, cost_difference, label='Difference our_policy-aa_policy', marker='o')
        plt.xlabel('Instance')
        plt.ylabel('Cost Difference')
        plt.title('Difference in Expected Total Cost vs. Instance')
        plt.legend()
        plt.grid(True)
        plt.show()

    results = {
        "avg_cost_our_policy_over_instances": average_cost_our_policy,
        "avg_cost_aa_over_instances": average_cost_aa,
        "expected_cost_our_policy_per_instance": expected_cost_our_policy,
        "expected_cost_aa_per_instance": expected_cost_aa,
        "expected_cost_difference_per_instance": cost_difference,
        "percent_better_over_instances": times_our_policy_is_better / num_instances * 100,
        "percent_equal_over_instances": times_our_policy_equal / num_instances * 100,
        "percent_worse_over_instances": times_our_policy_worse / num_instances * 100,
        "confidence_interval": confidence_interval
    }
    
    # with open(f'results_{T}.txt', 'w') as f:
    #     f.write("Expected cost of our policy: " + str(avg_cost_our_policy) + "\n")
    #     f.write("Expected cost of always_accept_policy: " + str(avg_cost_aa) + "\n")
    #     f.write("Number of times magician-based fulfillment policy is better: " + str(times_our_policy_better) + "\n")
    #     f.write("Number of times always_accept_policy is better: " + str(times_aa_better) + "\n")
    #     f.write("Policies have the same cost: " + str(times_same_cost) + "\n")
    
    return results


In [19]:
evaluate_policies(conservative_prob=0, T=1000, num_instances=1, num_order_sequences=5, plot=False, modified=False, alpha=1)

Instance 1
[[211.0, 202.0, 167.0, 273.0, 160.0], [315.0, 301.0, 249.0, 407.0, 239.0]]
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones


 40%|████      | 2/5 [00:00<00:00,  5.81it/s]

[474, 476, 482, 488, 490, 493, 989, 990, 991, 994, 999]
[[(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1)], [(0, 1), (1, 1), (4, 1)], [(0, 1), (1, 1), (3, 1), (4, 1)], [(3, 1), (4, 1)], [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1)]]
[352, 357, 363, 369, 370, 382, 399, 402, 409, 410, 418, 420, 421, 423, 440, 441, 455, 459, 464, 468, 482, 484, 948, 951]
[[(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 0)], [(4, 1)], [(4, 0)], [(4, 1)], [(4, 1)], [(4, 1)], [(0, 0), (1, 0), (4, 0)], [(3, 0), (4, 0)]]


 60%|██████    | 3/5 [00:00<00:00,  5.57it/s]

[342, 344, 360, 379, 454, 949, 952, 956, 958, 960]
[[(4, 0)], [(4, 0)], [(4, 0)], [(4, 0)], [(4, 0)], [(0, 0), (1, 0), (3, 0), (4, 0)], [(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)], [(0, 0), (1, 0), (3, 0), (4, 0)], [(1, 0), (2, 0), (3, 0), (4, 0)], [(0, 0), (1, 0), (4, 0)]]


100%|██████████| 5/5 [00:00<00:00,  5.33it/s]


[365, 379, 383, 397, 435, 440, 442, 465, 471, 480, 482, 484, 972, 978, 979, 981, 982, 983, 986, 987]
[[(4, 0)], [(4, 0)], [(4, 0)], [(4, 0)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(4, 1)], [(0, 1), (1, 1), (3, 1), (4, 1)], [(0, 1), (1, 1), (3, 1), (4, 1)], [(1, 1), (2, 1), (3, 1), (4, 1)], [(1, 1), (2, 1), (3, 1), (4, 1)], [(0, 1), (1, 1), (2, 1), (4, 1)], [(0, 1), (2, 1), (3, 1), (4, 1)], [(0, 1), (1, 1), (4, 1)], [(0, 1), (1, 1), (3, 1), (4, 1)]]


{'avg_cost_our_policy_over_instances': 9918.31,
 'avg_cost_aa_over_instances': 10082.33,
 'expected_cost_our_policy_per_instance': [9918.31],
 'expected_cost_aa_per_instance': [10082.33],
 'expected_cost_difference_per_instance': [-164.02000000000044],
 'percent_better_over_instances': 100.0,
 'percent_equal_over_instances': 0.0,
 'percent_worse_over_instances': 0.0,
 'confidence_interval': (-465.7154571103449, 137.66628018935083)}

In [20]:
evaluate_policies(conservative_prob=0, T=10, num_instances=1, num_order_sequences=5, plot=False, modified=False, alpha=1)

Instance 1
[[1058.0, 1010.0, 837.0, 1367.0, 803.0], [1577.0, 1505.0, 1248.0, 2039.0, 1197.0]]
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones
expected number of broken wands larger than the available ones


 20%|██        | 1/5 [00:00<00:03,  1.18it/s]

[474, 476, 482, 488, 490, 493, 500, 507, 548, 550, 555, 574, 591, 597, 608, 624, 642, 643, 645, 646, 651, 655, 656, 657, 668, 697, 700, 704, 712, 729, 735, 737, 763, 765, 802, 831, 838, 861, 869, 870, 887, 891, 896, 903, 906, 922, 923, 938, 939, 944, 946, 950, 954, 955, 958, 962, 964, 981, 985, 993, 997, 1000, 1017, 1022, 1024, 1027, 1028, 1030, 1034, 1046, 1048, 1055, 1063, 1071, 1072, 1080, 1081, 1084, 1085, 1087, 1088, 1092, 1096, 1099, 1101, 1102, 1111, 1113, 1116, 1118, 1125, 1127, 1133, 1136, 1138, 1149, 1155, 1156, 1158, 1159, 1166, 1180, 1191, 1192, 1193, 1194, 1198, 1201, 1207, 1214, 1215, 1218, 1229, 1235, 1237, 1245, 1247, 1258, 1262, 1272, 1276, 1280, 1282, 1285, 1292, 1296, 1321, 1332, 1333, 1342, 1352, 1356, 1359, 1364, 1384, 1395, 1402, 1421, 1422, 1427, 1429, 1430, 1436, 1437, 1438, 1442, 1444, 1447, 1448, 1452, 1460, 1462, 1465, 1472, 1475, 1478, 1479, 1496, 1503, 1504, 1505, 1513, 1516, 1526, 1527, 1528, 1530, 1535, 1541, 1543, 1547, 1554, 1559, 1560, 1563, 1565, 1576

 40%|████      | 2/5 [00:01<00:02,  1.15it/s]

[352, 357, 363, 369, 370, 382, 399, 402, 409, 410, 418, 420, 421, 423, 440, 441, 455, 459, 464, 468, 482, 484, 524, 535, 536, 538, 547, 550, 560, 565, 573, 577, 629, 643, 645, 657, 672, 688, 713, 722, 723, 733, 736, 744, 750, 818, 826, 831, 835, 839, 848, 852, 853, 855, 858, 859, 860, 862, 868, 871, 874, 876, 877, 879, 885, 893, 900, 902, 906, 909, 912, 916, 920, 922, 928, 940, 945, 948, 955, 956, 958, 962, 967, 968, 970, 975, 980, 987, 994, 998, 1012, 1013, 1016, 1024, 1047, 1048, 1051, 1052, 1054, 1056, 1060, 1063, 1072, 1083, 1084, 1085, 1102, 1104, 1111, 1115, 1124, 1126, 1127, 1130, 1141, 1149, 1150, 1151, 1158, 1160, 1172, 1178, 1189, 1190, 1203, 1205, 1208, 1213, 1214, 1215, 1216, 1221, 1228, 1231, 1244, 1251, 1256, 1258, 1264, 1280, 1281, 1288, 1292, 1294, 1300, 1301, 1303, 1312, 1314, 1318, 1320, 1321, 1323, 1329, 1336, 1346, 1354, 1356, 1360, 1366, 1367, 1368, 1380, 1387, 1389, 1391, 1392, 1394, 1395, 1397, 1400, 1401, 1438, 1454, 1455, 1462, 1465, 1478, 1479, 1480, 1489, 150

 60%|██████    | 3/5 [00:02<00:01,  1.13it/s]

[342, 344, 360, 379, 454, 519, 520, 526, 528, 529, 532, 549, 554, 562, 568, 571, 592, 598, 604, 610, 612, 628, 634, 640, 655, 658, 661, 693, 698, 700, 726, 736, 742, 750, 761, 770, 773, 779, 785, 791, 798, 799, 800, 801, 802, 833, 855, 859, 862, 863, 864, 868, 880, 889, 890, 891, 897, 906, 913, 914, 915, 917, 919, 920, 933, 934, 940, 942, 949, 956, 964, 966, 976, 980, 985, 992, 994, 996, 1009, 1013, 1015, 1016, 1017, 1024, 1029, 1031, 1036, 1037, 1038, 1041, 1048, 1054, 1060, 1065, 1067, 1071, 1080, 1083, 1086, 1090, 1102, 1110, 1114, 1115, 1118, 1128, 1130, 1135, 1139, 1141, 1144, 1150, 1162, 1167, 1171, 1172, 1173, 1192, 1196, 1197, 1205, 1206, 1208, 1236, 1238, 1239, 1240, 1241, 1245, 1246, 1253, 1258, 1259, 1275, 1290, 1295, 1298, 1304, 1317, 1321, 1322, 1325, 1330, 1348, 1350, 1351, 1352, 1353, 1358, 1360, 1365, 1370, 1377, 1378, 1384, 1388, 1402, 1404, 1407, 1410, 1420, 1422, 1423, 1426, 1431, 1432, 1433, 1436, 1445, 1447, 1454, 1455, 1459, 1470, 1473, 1478, 1485, 1486, 1490, 149

 80%|████████  | 4/5 [00:03<00:00,  1.16it/s]

[556, 573, 574, 578, 584, 616, 618, 620, 626, 644, 649, 650, 655, 656, 669, 706, 715, 717, 728, 734, 735, 736, 740, 742, 745, 748, 772, 775, 776, 778, 783, 784, 792, 794, 795, 796, 802, 807, 810, 812, 844, 852, 854, 855, 868, 877, 888, 896, 899, 902, 904, 905, 921, 922, 923, 930, 931, 933, 937, 939, 944, 952, 958, 964, 965, 968, 987, 990, 996, 997, 1000, 1005, 1008, 1010, 1012, 1020, 1023, 1025, 1027, 1030, 1032, 1039, 1040, 1046, 1060, 1063, 1077, 1086, 1089, 1094, 1101, 1104, 1111, 1114, 1117, 1124, 1125, 1126, 1128, 1152, 1156, 1158, 1163, 1165, 1173, 1175, 1181, 1184, 1187, 1190, 1197, 1200, 1204, 1208, 1210, 1215, 1220, 1221, 1231, 1233, 1234, 1237, 1244, 1245, 1251, 1264, 1273, 1274, 1284, 1288, 1297, 1305, 1310, 1318, 1322, 1330, 1333, 1335, 1337, 1342, 1345, 1347, 1351, 1354, 1359, 1360, 1363, 1368, 1369, 1373, 1384, 1388, 1392, 1411, 1413, 1420, 1423, 1424, 1427, 1431, 1439, 1446, 1448, 1468, 1469, 1470, 1472, 1474, 1475, 1476, 1482, 1488, 1490, 1491, 1496, 1498, 1502, 1504, 1

100%|██████████| 5/5 [00:04<00:00,  1.17it/s]

[365, 379, 383, 397, 435, 440, 442, 465, 471, 480, 482, 484, 503, 542, 556, 558, 559, 560, 566, 595, 605, 607, 608, 611, 622, 638, 645, 662, 674, 676, 694, 697, 701, 710, 724, 728, 772, 782, 799, 800, 802, 805, 807, 812, 814, 816, 827, 829, 837, 844, 861, 871, 874, 936, 940, 943, 952, 954, 956, 970, 972, 975, 980, 984, 991, 992, 1007, 1017, 1024, 1029, 1047, 1056, 1058, 1064, 1078, 1080, 1082, 1086, 1087, 1092, 1095, 1097, 1099, 1116, 1119, 1124, 1130, 1132, 1138, 1143, 1144, 1147, 1148, 1155, 1160, 1164, 1173, 1174, 1222, 1227, 1228, 1231, 1240, 1254, 1255, 1257, 1258, 1260, 1261, 1262, 1264, 1268, 1282, 1285, 1287, 1293, 1297, 1299, 1303, 1306, 1307, 1311, 1318, 1331, 1337, 1360, 1364, 1365, 1375, 1376, 1380, 1388, 1390, 1393, 1394, 1402, 1408, 1412, 1418, 1425, 1426, 1428, 1430, 1435, 1446, 1464, 1467, 1473, 1474, 1475, 1482, 1487, 1488, 1490, 1533, 1544, 1551, 1558, 1564, 1577, 1578, 1587, 1597, 1599, 1603, 1613, 1620, 1626, 1628, 1629, 1635, 1645, 1647, 1651, 1665, 1669, 1676, 168




{'avg_cost_our_policy_over_instances': 52095.56,
 'avg_cost_aa_over_instances': 49069.15,
 'expected_cost_our_policy_per_instance': [52095.56],
 'expected_cost_aa_per_instance': [49069.15],
 'expected_cost_difference_per_instance': [3026.409999999996],
 'percent_better_over_instances': 0.0,
 'percent_equal_over_instances': 0.0,
 'percent_worse_over_instances': 100.0,
 'confidence_interval': (464.42706464016055, 5588.402100539459)}

In [None]:
evaluate_policies(conservative_prob=0.01, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.2, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.3, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.4, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.5, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.6, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.7, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.8, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.9, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=1)

In [None]:
evaluate_policies(conservative_prob=0.1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.2, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.3, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.4, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.5, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.6, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.7, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.8, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.9, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.9)

In [None]:
evaluate_policies(conservative_prob=0.1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.2, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.3, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.4, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.5, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.6, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.7, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.8, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.9, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0)

In [None]:
evaluate_policies(conservative_prob=0.1, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.2, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.3, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.4, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.5, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.6, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.7, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.8, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
evaluate_policies(conservative_prob=0.9, T=1000, num_instances=50, num_order_sequences=50, plot=True, modified=False, alpha=0.1)

In [None]:
def find_best_conservative_prob(start=0.01, stop=1.0, step=0.01, **kwargs):
    best_prob = start
    results = evaluate_policies(conservative_prob=start, **kwargs)
    best_times_our_policy_better_or_equal = results['percent_better'] + results['percent_equal']
    best_average_cost_diff = results["avg_cost_our_policy_over_instances"] - results["avg_cost_aa_over_instances"]

    for prob in np.arange(start + step, stop + step, step):
        results = evaluate_policies(conservative_prob=prob, **kwargs)
        current_times_our_policy_better_or_equal = results['percent_better'] + results['percent_equal']
        current_average_cost_diff = results["avg_cost_our_policy_over_instances"] - results["avg_cost_aa_over_instances"]
        
        # Update the best_prob if the current probability results in our policy being at least as often better or equal than before
        # and if the average cost of our policy is greater than the average cost of the always_accept_policy
        if current_times_our_policy_better_or_equal >= best_times_our_policy_better_or_equal and current_average_cost_diff > best_average_cost_diff and prob > best_prob:
            best_prob = prob
            best_times_our_policy_better_or_equal = current_times_our_policy_better_or_equal
            best_average_cost_diff = current_average_cost_diff
            
    return best_prob


best_prob = find_best_conservative_prob()
print(f"The 'best' conservative_prob based on the new criteria is: {best_prob}")


In [None]:
evaluate_policies(conservative_prob=best_prob, T=10**1, num_instances=50, num_order_sequences=50, plot=True, modified=True)