In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans, BisectingKMeans
import importlib
import math
import os
from collections import defaultdict
import torch
from torch.utils.data import Dataset
from k_means_constrained import KMeansConstrained
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
%matplotlib inline
import GnnScheduleDataset as GnnScheduleDataset_Module
import MultiCriteriaGNNModel as MultiCriteriaGNNModel_Module

importlib.reload(GnnScheduleDataset_Module) # in case of updates
importlib.reload(MultiCriteriaGNNModel_Module) # in case of updates

from GnnScheduleDataset import GnnScheduleDataset
from MultiCriteriaGNNModel import MultiCriteriaGNNModel
import subprocess
import sys

#manual installing
def install_package(package_name, use_index_url=True):
    print(f"Installing {package_name}...")
    #run: python.exe -m pip install [package_name]

    subprocess.run(
        [sys.executable, "-m", "pip", "uninstall", "-y", package_name],
        check=True,
        text=True
    )
    if use_index_url:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", package_name, "--index-url", "https://download.pytorch.org/whl/cu126"],
            check=True,
            text=True
        )
    else:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", package_name],
            check=True,
            text=True
        )
    print(f"Successfully installed {package_name}!")

# Try to import, if it fails, install it
# try:
#     import torch
#     print("Torch is already available.")
# except: 
#     #install_package('torch')
#     # After installing, you must use importlib to refresh or restart the script
#     import torch
#     print("Torch imported successfully after installation.")

#torch-scatter torch-sparse torch-cluster torch-spline-conv pyg-lib
#torch-scatter torch-sparse torch-cluster torch-spline-conv
# install_package('torch-scatter')
# install_package('torch-sparse')
# install_package('torch-cluster')
# install_package('torch-spline-conv')

# try:
#     import torch_geometric
#     print("Torch is already available.")
# except: 
#     install_package('torch_geometric', False)
#     # After installing, you must use importlib to refresh or restart the script
#     import torch_geometric
#     print("Torch imported successfully after installation.")

#Try to import, if it fails, install it
# try:
#     import k_means_constrained
#     print("k_means_constrained is already available.")
# except: 
#     install_package('k_means_constrained', False)
#     # After installing, you must use importlib to refresh or restart the script
#     import torch
#     print("k_means_constrained imported successfully after installation.")

#file paths
TARGET_BATCH_SIZE = 10 #number of missions per mini-batch
LARGE_SCALE_MISSION_BATCH_DIR = "./datasets/Batch1000M.csv"
PREPROCESSED_BATCH_DIR = f"./preprocessed/batch1000M/Batch{TARGET_BATCH_SIZE}M_idx.xlsx" #idx to be replaced cluster idx
MISSION_BATCH_DIR = "./datasets/mini-batch/Batch10M_distanced.csv"
UDC_TYPES_DIR = "./datasets/WM_UDC_TYPE.csv"
MISSION_BATCH_TRAVEL_DIR = "./datasets/mini-batch/Batch10M_travel_distanced.csv"
FORK_LIFTS_DIR = "./datasets/ForkLifts10W.csv"
#MISSION_TYPES_DIR = "./datasets/MissionTypes.csv"
SCHEDULE_DIR = "./schedules/mini-batch/"
NUM_EPOCHS = 10
BATCH_SIZE = 92 #nice to be equal to the number of mini-batch instances
LEARNING_RATE = 0.001


  from .autonotebook import tqdm as notebook_tqdm


### Preprocessing
Split the large-scale batch in mini-batches (ex. 10 missions each) using spatial k-means, considering only `FROM_X, FROM_Y, FROM_Z, TO_X, TO_Y, TO_Z`.

In [None]:
def create_spatial_batches_and_save(input_csv=LARGE_SCALE_MISSION_BATCH_DIR, target_batch_size=TARGET_BATCH_SIZE):
    """
    groups missions into spatially dense clusters of <50 missions 
    and saves each group to a separate CSV file.
    """
    os.makedirs(os.path.dirname(PREPROCESSED_BATCH_DIR), exist_ok=True)
    
    #load the main dataset
    df = pd.read_csv(input_csv).drop_duplicates(subset=['CD_MISSION'], keep='first')
    df = df.reset_index(drop=True)
    n_samples = len(df)
    
    #determine number of clusters
    n_clusters = max(1, math.ceil(n_samples / target_batch_size))
    print(f"total orders: {n_samples} | target clusters: {n_clusters}")

    if n_clusters == 1:
        df['cluster_id'] = 0
        df.to_csv('cluster_0.csv', index=False)
        return ["cluster_0.csv"]

    #feature Selection: 6D spatial coordinates
    #use both pickup (from) and drop-off (to) locations
    coord_cols = ['FROM_X', 'FROM_Y', 'FROM_Z', 'TO_X', 'TO_Y', 'TO_Z']
    
    for col in coord_cols:
        if df[col].dtype == 'object' or df[col].dtype == 'string':  #only apply to string/object columns
            df[col] = df[col].str.replace(',', '', regex=False)

    coords = df[coord_cols].astype(float).fillna(0).values
    
    #standardize (crucial for K-Means distance calculations)
    scaler = StandardScaler()
    coords_scaled = scaler.fit_transform(coords)
    
    #cluster logic
    if n_clusters > 1:
        #kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10) #produces some very unbalanced clusters
        #df['cluster_id'] = kmeans.fit_predict(coords_scaled)

        clf = KMeansConstrained(
            n_clusters=n_clusters,
            size_min=target_batch_size - 1,
            size_max=target_batch_size + 1,
            random_state=42
        )
        df['cluster_id'] = clf.fit_predict(coords_scaled)

        # model = BisectingKMeans(n_clusters=n_clusters, random_state=42, bisecting_strategy='largest_cluster')
        # df['cluster_id'] = model.fit_predict(coords_scaled)
    else:
        df['cluster_id'] = 0
    
    #save each cluster to its own file with original coordinates
    saved_files = []
    for cid in sorted(df['cluster_id'].unique()):
        cluster_df = df[df['cluster_id'] == cid].copy()
        
        #remove the temporary cluster_id before saving if desired
        cluster_df = cluster_df.drop(columns=['cluster_id'])
        
        file_name = PREPROCESSED_BATCH_DIR.replace("idx", str(cid+1))
        if file_name.endswith('.csv'):
            cluster_df.to_csv(file_name, index=False)
        else:
            cluster_df.to_excel(file_name, index=False)
            
        saved_files.append(file_name)

        print(f"saved {file_name} with {len(cluster_df)} missions.")
    
    return saved_files

#create_spatial_batches_and_save()

total orders: 920 | target clusters: 92
saved ./preprocessed/batch1000M/Batch10M_1.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_2.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_3.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_4.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_5.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_6.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_7.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_8.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_9.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_10.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_11.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_12.xlsx with 11 missions.
saved ./preprocessed/batch1000M/Batch10M_13.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch10M_14.xlsx with 9 missions.
saved ./preprocessed/batch1000M/Batch1

['./preprocessed/batch1000M/Batch10M_1.xlsx',
 './preprocessed/batch1000M/Batch10M_2.xlsx',
 './preprocessed/batch1000M/Batch10M_3.xlsx',
 './preprocessed/batch1000M/Batch10M_4.xlsx',
 './preprocessed/batch1000M/Batch10M_5.xlsx',
 './preprocessed/batch1000M/Batch10M_6.xlsx',
 './preprocessed/batch1000M/Batch10M_7.xlsx',
 './preprocessed/batch1000M/Batch10M_8.xlsx',
 './preprocessed/batch1000M/Batch10M_9.xlsx',
 './preprocessed/batch1000M/Batch10M_10.xlsx',
 './preprocessed/batch1000M/Batch10M_11.xlsx',
 './preprocessed/batch1000M/Batch10M_12.xlsx',
 './preprocessed/batch1000M/Batch10M_13.xlsx',
 './preprocessed/batch1000M/Batch10M_14.xlsx',
 './preprocessed/batch1000M/Batch10M_15.xlsx',
 './preprocessed/batch1000M/Batch10M_16.xlsx',
 './preprocessed/batch1000M/Batch10M_17.xlsx',
 './preprocessed/batch1000M/Batch10M_18.xlsx',
 './preprocessed/batch1000M/Batch10M_19.xlsx',
 './preprocessed/batch1000M/Batch10M_20.xlsx',
 './preprocessed/batch1000M/Batch10M_21.xlsx',
 './preprocessed/batch

### Loss Definition 

In [2]:
def weighted_loss(predictions, ground_truth, u_batch):
    """
    computes weighted BCE loss for activation, assignment, and sequence heads.
    total Loss = Beta * act_loss + alpha * (assign_loss + seq_loss)
    """
    pred_act = predictions['activation']
    pred_assign = predictions['assignment']
    pred_seq = predictions['sequence']
    
    #ground truth (should be in [N, 1] shape)
    true_act = ground_truth['operator'].y.view(-1, 1)
    true_assign = ground_truth['operator', 'assign', 'order'].y.view(-1, 1)
    true_seq = ground_truth['order', 'to', 'order'].y.view(-1, 1)
    
    #BCE losses
    loss_act = F.binary_cross_entropy(pred_act, true_act)
    loss_assign = F.binary_cross_entropy(pred_assign, true_assign)
    loss_seq = F.binary_cross_entropy(pred_seq, true_seq)
    
    #extract alpha/beta (Mean over batch)
    alpha = u_batch[:, 0].mean()
    beta = u_batch[:, 1].mean()
    
    #weighted Sum
    #Note that alpha/beta need to be scaled down if they are large (e.g. 100) to prevent explosion
    #or rely on the optimizer (Adam) to handle scaling.
    total_loss = (beta * loss_act) + (alpha * (loss_assign + loss_seq))
    
    return total_loss, loss_act.item(), loss_assign.item(), loss_seq.item()

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

#init dataset
dataset = GnnScheduleDataset(
    schedule_dir=SCHEDULE_DIR,
    mission_base_path=MISSION_BATCH_DIR,
    edge_base_path=MISSION_BATCH_TRAVEL_DIR,
    pallet_types_file_path=UDC_TYPES_DIR,
    fork_path=FORK_LIFTS_DIR
)

print(f"found {len(dataset)} valid schedule instances.")

#create DataLoader using the dataset
#batch_size can be > 1 to train on multiple graphs at once
loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

#init model
if len(dataset) > 0:
    sample_data = dataset[0]
    model = MultiCriteriaGNNModel(
        metadata=sample_data.metadata(),
        hidden_dim=64,
        num_layers=3,
        heads=4
    ).to(device)

    #adam optimizer is a standard for GNNs
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

    for epoch in range(1, NUM_EPOCHS + 1):
        model.train()
        total_epoch_loss = 0.0
        x = loader.dataset
        for batch_idx, batch in tqdm(enumerate(loader), total=len(loader), desc=f"epoch {epoch}/{NUM_EPOCHS}"):
            batch = batch.to(device)
            optimizer.zero_grad()
            
            #construct batch_dict
            batch_dict_arg = {
                'operator': batch['operator'].batch,
                'order': batch['order'].batch
            }
            
            #forward pass
            preds = model(
                batch.x_dict, 
                batch.edge_index_dict, 
                batch.edge_attr_dict,
                batch.u,
                batch_dict=batch_dict_arg
            )
            
            #backward step and optimization
            loss, l_act, l_assign, l_seq = weighted_loss(preds, batch, batch.u)
            loss.backward()
            optimizer.step()
            
            total_epoch_loss += loss.item()
            
            #print mini-batch progress (every 2 batches)
            if batch_idx % 2 == 0:
                print(f"[Batch {batch_idx}] loss: {loss.item():.4f} (act_loss: {l_act:.3f}, assign_loss: {l_assign:.3f}, seq_loss: {l_seq:.3f})")
        
        avg_loss = total_epoch_loss / len(loader)
        print(f"Epoch {epoch} complete. average loss: {avg_loss:.4f}")
            
        # print(f"Batch {batch_idx}:")
        # print(f"Batch Size: {batch.num_graphs}")
        # print(f"Activation Probs: {out['activation']}")
        # print(f"Assignment Probs: {out['assignment']}")
        # print(f"Sequence Probs: {out['sequence']}")
        
        #if batch_idx >= 1: break #limit to 2 batches, just for demo

Using device: cuda
found 92 valid schedule instances.
Global params extracted from[schedule10M_10_A1.0_B100.0_H60.csv]: Alpha=1.0, Beta=100.0, H_fixed=60.0


epoch 1/10:   0%|          | 0/1 [00:00<?, ?it/s]

Global params extracted from[schedule10M_27_A1.0_B100.0_H90.csv]: Alpha=1.0, Beta=100.0, H_fixed=90.0
Global params extracted from[schedule10M_8_A1.0_B100.0_H90.csv]: Alpha=1.0, Beta=100.0, H_fixed=90.0
Global params extracted from[schedule10M_56_A1.0_B100.0_H480.csv]: Alpha=1.0, Beta=100.0, H_fixed=480.0
Global params extracted from[schedule10M_49_A1.0_B100.0_H480.csv]: Alpha=1.0, Beta=100.0, H_fixed=480.0
Global params extracted from[schedule10M_34_A1.0_B100.0_H90.csv]: Alpha=1.0, Beta=100.0, H_fixed=90.0
Global params extracted from[schedule10M_6_A1.0_B100.0_H90.csv]: Alpha=1.0, Beta=100.0, H_fixed=90.0
Global params extracted from[schedule10M_86_A1.0_B100.0_H480.csv]: Alpha=1.0, Beta=100.0, H_fixed=480.0
Global params extracted from[schedule10M_81_A1.0_B100.0_H90.csv]: Alpha=1.0, Beta=100.0, H_fixed=90.0
Global params extracted from[schedule10M_41_A1.0_B100.0_H120.csv]: Alpha=1.0, Beta=100.0, H_fixed=120.0
Global params extracted from[schedule10M_16_A1.0_B100.0_H480.csv]: Alpha=1.0

epoch 1/10:   0%|          | 0/1 [00:01<?, ?it/s]


ValueError: cannot convert float NaN to integer