In [2]:
import numpy as np
from scipy.optimize import dual_annealing
import pulp
import pandas as pd
import os
import random
from wmape import WMAPE
from order_generator import OrderGenerator
from order_merger import OrderMerger
import math
import matplotlib.pyplot as plt

In [3]:
random.seed(42)

In [4]:
# import allocations for lead day 5 and lead day 0 and calculate wmape values

allocation_dir = 'allocations'
merged_dir = 'merged'
lead_day = 5
soft_col = f'lead_{lead_day}'
hard_col = 'lead_0'

allocation_0 = pd.read_csv(f'{allocation_dir}/allocation_lead_day_0.csv')
allocation_5 = pd.read_csv(f'{allocation_dir}/allocation_lead_day_5.csv')

merged_5_0 = pd.read_csv(f'{merged_dir}/merged_lead_day_{lead_day}_0.csv')

_, wmape_site = WMAPE.calculate_wmape_site(merged_5_0, soft_col, hard_col)
_, wmape_global = WMAPE.calculate_wmape_global(merged_5_0, soft_col, hard_col)

print(f'wmape_global: {wmape_global}')
print(f'wmape_site: {wmape_site}')

wmape_global: 0.23284907183212267
wmape_site: 0.4854721549636804


In [5]:
# Objective Function
def objective_function(current_allocation, orders_df, target_orders_df, initial_day, target_day):
    # Apply the current allocation to the orders dataframe
    orders_df['site'] = current_allocation

    # Aggregate the orders for the initial and target lead days
    aggregated_initial_df = OrderMerger.aggregate_items(orders_df, initial_day)
    aggregated_target_df = OrderMerger.aggregate_items(target_orders_df, target_day)

    # Merge the aggregated dataframes
    merged_df = OrderMerger.merge_allocation(aggregated_initial_df, aggregated_target_df, initial_day, target_day)

    # Calculate WMAPE
    _, wmape_site = WMAPE.calculate_wmape_site(merged_df, f'lead_{initial_day}', f'lead_{target_day}')
    return wmape_site

In [6]:
# Generate Neighbor Function
def generate_neighbor(current_allocation, orders_df, eligibility_dict):
    new_allocation = current_allocation.copy()
    max_attempts = 100  # Limit the number of attempts to find a valid swap

    unique_order_ids = orders_df['order_id'].unique()

    for _ in range(max_attempts):
        order_indices = random.sample(range(len(unique_order_ids)), 2)

        order_id1 = unique_order_ids[order_indices[0]]
        order_id2 = unique_order_ids[order_indices[1]]

        items1 = orders_df[orders_df['order_id'] == order_id1]['item_id'].tolist()
        items2 = orders_df[orders_df['order_id'] == order_id2]['item_id'].tolist()

        site1 = new_allocation[orders_df[orders_df['order_id'] == order_id1].index[0]]
        site2 = new_allocation[orders_df[orders_df['order_id'] == order_id2].index[0]]

        if all(item in eligibility_dict[site2] for item in items1) and all(item in eligibility_dict[site1] for item in items2):
            # Perform the swap if both are eligible
            new_allocation[orders_df[orders_df['order_id'] == order_id1].index] = site2
            new_allocation[orders_df[orders_df['order_id'] == order_id2].index] = site1
            break

    return new_allocation

In [7]:
def acceptance_probability(current_energy, new_energy, temperature):
    if new_energy < current_energy:
        return 1.0
    else:
        return math.exp((current_energy - new_energy) / temperature)

def cooling_schedule(temp, alpha, improvement_rate):
    if improvement_rate > 0.01:
        alpha = min(alpha * 1.01, 0.99)  # Slightly increase alpha but cap it to 0.99
    elif improvement_rate < 0.001:
        alpha = max(alpha * 0.99, 0.90)  # Slightly decrease alpha but ensure it doesn't drop too low
    return temp * alpha

In [11]:
# Combined k-opt and Simulated Annealing Algorithm
def k_opt_simulated_annealing(orders_df, target_orders_df, eligibility_dict, initial_day, target_day, k, initial_temp, final_temp, alpha, max_iter):
    current_allocation = orders_df['site'].values
    best_allocation = current_allocation.copy()
    current_energy = objective_function(current_allocation, orders_df, target_orders_df, initial_day, target_day)
    best_energy = current_energy

    temp = initial_temp
    step = 0

    while temp > final_temp and step < max_iter:
        neighbors = []
        for _ in range(k):
            new_allocation = generate_neighbor(current_allocation, orders_df, eligibility_dict)
            new_energy = objective_function(new_allocation, orders_df, target_orders_df, initial_day, target_day)
            neighbors.append((new_allocation, new_energy))

        # Select the best neighbor
        best_neighbor_allocation, best_neighbor_energy = min(neighbors, key=lambda x: x[1])

        # Acceptance criterion (simulated annealing)
        if acceptance_probability(current_energy, best_neighbor_energy, temp) > random.random():
            current_allocation = best_neighbor_allocation
            current_energy = best_neighbor_energy

            if current_energy < best_energy:
                best_allocation = current_allocation
                best_energy = current_energy

        improvement_rate = (current_energy - best_energy) / current_energy
        temp = cooling_schedule(temp, alpha, improvement_rate)
        step += 1

        # Debug statement to trace the progress
        if step % 100 == 0:
            print(f'Step {step}: Best Energy {best_energy}, Temperature: {temp}')

    # Debug statement for final values
    print(f'Final Step: {step}, Final Temperature: {temp}')
    return best_allocation, best_energy


# Perform k-opt and Simulated Annealing Between Consecutive Days
def perform_k_opt_simulated_annealing(lead_days, k, initial_temp, final_temp, alpha, max_iter, eligibility_dict):
    # Load the initial allocation dataframe for the first lead day
    previous_df = pd.read_csv(f'{allocation_dir}/allocation_lead_day_{lead_days[0]}.csv')
    
    # Ensure the first lead day stays unmodified
    previous_df.to_csv(f'hybrid_sa_{k}opt_allocations/hybrid_sa_{k}_opt_allocation_lead_day_{lead_days[0]}.csv', index=False)
    
    for i in range(1, len(lead_days)):
        current_day = lead_days[i]
        previous_day = lead_days[i-1]
        
        current_df = pd.read_csv(f'{allocation_dir}/allocation_lead_day_{current_day}.csv')
        
        best_allocation, best_energy = k_opt_simulated_annealing(current_df.copy(), previous_df.copy(), eligibility_dict, current_day, previous_day, k, initial_temp, final_temp, alpha, max_iter)
        
        # Update the allocation for the current day with the best found allocation
        current_df['site'] = best_allocation
        
        # Save the updated dataframe for the current lead day
        current_df.to_csv(f'hybrid_sa_{k}opt_allocations/hybrid_sa_{k}_opt_allocation_lead_day_{current_day}.csv', index=False)
        
        # Set the current dataframe as the previous dataframe for the next iteration
        previous_df = current_df.copy()

In [12]:
# Aggregate, Merge DataFrames and Calculate WMAPE
def calculate_wmape_for_all_days(lead_days, k):
    wmape_site_values = []
    wmape_global_values = []

    final_day = lead_days[-1]
    final_day_df = pd.read_csv(f'hybrid_sa_{k}opt_merged/hybrid_sa_{k}_opt_sa_allocation_lead_day_{final_day}.csv')
    aggregated_final_day = OrderMerger.aggregate_items(final_day_df, final_day)
    
    for day in lead_days[:-1]:
        current_df = pd.read_csv(f'hybrid_sa_{k}opt_merged/hybrid_sa_{k}_opt_sa_allocation_lead_day_{day}.csv')
        aggregated_current_day = OrderMerger.aggregate_items(current_df, day)
        
        merged_df = OrderMerger.merge_allocation(aggregated_current_day, aggregated_final_day, day, final_day)
        
        site_df, wmape_site = WMAPE.calculate_wmape_site(merged_df, f'lead_{day}', f'lead_{final_day}')
        global_df, wmape_global = WMAPE.calculate_wmape_global(merged_df, f'lead_{day}', f'lead_{final_day}')
        
        wmape_site_values.append(wmape_site)
        wmape_global_values.append(wmape_global)
        
        # Save the merged dataframe
        merged_df.to_csv(f'hybrid_sa_{k}opt_merged/hybrid_sa_{k}_opt_merged_allocation_lead_day_{day}_to_{final_day}.csv', index=False)
    
    # Add 0 for lead day 0
    wmape_site_values.append(0)
    wmape_global_values.append(0)
    
    return wmape_site_values, wmape_global_values

# Plot the WMAPE Values
def plot_wmape(lead_days, wmape_site_values, wmape_global_values):
    # Plot the data
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.plot(lead_days, wmape_site_values, linestyle='-', color='b', label='WMAPE Site')
    ax.plot(lead_days, wmape_global_values, linestyle='-', color='r', label='WMAPE Global')

    # Add labels and title
    ax.set_xlabel('Lead Day')
    ax.set_ylabel('Error')
    ax.set_title('WMAPE Values for Different Lead Days')
    ax.set_xticks(lead_days)
    ax.legend()

    # Invert x-axis to have 0 at the end
    ax.invert_xaxis()

    plt.show()

In [14]:


lead_days = list(range(18, -1, -1))  # Define the lead days from 18 to 0
k = 2  # Number of neighbors to generate in each iteration
initial_temp = 5000
final_temp = 0.1
alpha = 0.99
max_iter = 1000

eligibility_dict = OrderGenerator.load_eligibility_dict('eligibility_dict.json')
# Perform k-opt and simulated annealing for consecutive lead days
perform_k_opt_simulated_annealing(lead_days, k, initial_temp, final_temp, alpha, max_iter, eligibility_dict)

Step 100: Best Energy 0.34383460497169577, Temperature: 676.6650245351597
Step 200: Best Energy 0.3366970219049963, Temperature: 92.50011220791274
Step 300: Best Energy 0.32906719173024856, Temperature: 12.393134957941097
Step 400: Best Energy 0.32439084420379033, Temperature: 1.7994448542077084
Step 500: Best Energy 0.32143736155550084, Temperature: 0.2586610874795004
Final Step: 548, Final Temperature: 0.09856206194985236
Step 100: Best Energy 0.3914964108227499, Temperature: 669.8983742898077
Step 200: Best Energy 0.383489784649365, Temperature: 89.7527663752255
Step 300: Best Energy 0.3785201546107123, Temperature: 13.031824001990653
Step 400: Best Energy 0.3735505245720596, Temperature: 1.7459995425928865
Step 500: Best Energy 0.37023743787962454, Temperature: 0.2947634081694301
Final Step: 572, Final Temperature: 0.09955763833318426
Step 100: Best Energy 0.3197533849129594, Temperature: 669.8983742898077
Step 200: Best Energy 0.3088733075435203, Temperature: 89.7527663752255
Step

In [15]:
# Calculate WMAPE for all days
wmape_site_values, wmape_global_values = calculate_wmape_for_all_days(lead_days, k)

# Plot the WMAPE values
plot_wmape(lead_days, wmape_site_values, wmape_global_values)

FileNotFoundError: [Errno 2] No such file or directory: 'k_opt_sa_allocation_lead_day_0.csv'