# First part:  a simple instruction on how to apply the feasibility predictor. The main steps include:<br>
* Load the feasibility predictor
* Parse a packing instance
* Pass the feature of the instance to the predictor and obtain the prediction

In [134]:
import torch as T
import pickle as pkl
import numpy as np
import random
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
import torch.nn as nn
from sklearn.metrics import confusion_matrix
from collections import defaultdict

## Step 1. Load the feasibility predictor

In [2]:
predictor = T.jit.load('Feasibility_Predictor.pt')
device = T.device("cuda" if T.cuda.is_available() else "cpu")
predictor  = predictor.to(device)

## Step2. Parse a packing instance

In [86]:
def extract_manual_feature(items, bin_width, bin_height):
    """
    The method is to extract features from a set of items
    Parameters:
    ----------
    items (a list of np.arrays)
    bin_width (int): the width of a bin
    bin_height (int): the height of a bin
    Returns (a list of metrics)
    -------
    notes:
    we extract five types of features:
    1) the ratio between width and height, and four statistical metrics (mean, min, max, std) as the features
    2) the ratio between width and the bin width, four statistical metrics (mean, min, max, std) as the features
    3) the ratio between height and the bin height, four statistical metrics (mean, min, max, std) as the features
    4) the ratio between area of a item and the bin capacity, four statistical metrics (mean, min, max, std) as the features
    5) the ratio between total area of the items and the bin capacity, a single metric
    """
    MAX_W_H_RATIO = 18
    capacity = bin_width * bin_height
    w_h_ratios = np.asarray(list(map(lambda x: x[0] / x[1], items))) / MAX_W_H_RATIO
    w_bin_ratios = np.asarray(list(map(lambda x: x[0] / bin_width, items)))
    h_bin_ratios = np.asarray(list(map(lambda x: x[1] / bin_height, items)))
    area_capacity_ratios = np.asarray(list(map(lambda x: (x[0] * x[1]) / capacity, items)))
    total_area = np.asarray(list(map(lambda x: x[0] * x[1], items))).sum()
    w_h_features = [w_h_ratios.mean(), w_h_ratios.min(), w_h_ratios.max(), w_h_ratios.std()]
    w_bin_features = [w_bin_ratios.mean(), w_bin_ratios.min(), w_bin_ratios.max(), w_bin_ratios.std()]
    h_bin_features = [h_bin_ratios.mean(), h_bin_ratios.min(), h_bin_ratios.max(), h_bin_ratios.std()]
    area_capacity_features = [area_capacity_ratios.mean(), area_capacity_ratios.min(),
                              area_capacity_ratios.max(), area_capacity_ratios.std()]
    total_area_capacity_features = [total_area / capacity]
    extracted_features = [w_h_features, w_bin_features, h_bin_features, area_capacity_features,
                          total_area_capacity_features]
    result = []
    for x in extracted_features:
        result.extend(x)
    return result

In [122]:
# Demo instance A, result: infeasible 
# width, height
# 6,4
# 17,1
# 8,1
# 15,2
# 2,6
# 13,1
# 4,7
# 7,4
# 6,4
# 2,5

# Demo instance B, result: feasible:
# 2,9
# 3,4
# 2,6
# 13,1
# 4,7
# 7,4
# 6,4
# 2,5
instance = [
           # width, height
            [2,9], 
            [3,4],
            [2,6],
            [13,1],
            [4,7],
            [7,4],
            [6,4],
            [2,5]
            ]
features = extract_manual_feature(instance, bin_width = 20, bin_height = 10)

## Step3. Pass the feature of the instance to the predictor and obtain the prediction

In [123]:
predictor.eval()

RecursiveScriptModule(
  original_name=FFNNFeasibilityChecker
  (linear1): RecursiveScriptModule(original_name=Linear)
  (linear2): RecursiveScriptModule(original_name=Linear)
  (linear3): RecursiveScriptModule(original_name=Linear)
)

In [124]:
features = T.tensor(features, dtype=T.float).to(device)
features

tensor([0.1287, 0.0123, 0.7222, 0.2262, 0.2438, 0.1000, 0.6500, 0.1775, 0.5000,
        0.1000, 0.9000, 0.2236, 0.0906, 0.0500, 0.1400, 0.0352, 0.7250],
       device='cuda:0')

In [125]:
with T.no_grad():
    y = predictor(features)
y
if y > 0.5:
    print("infeasible")
else:
    print("feasible")

feasible


# Second part: an instruction on how to parse a .pkl file and run the feasibility predictor on testing samples.

The shared training, testing samples are in .pkl format. We present the right codes to parse them and apply the predictor on the testing samples to reproduce the results shown by Figure 5 in the paper. One could also train a new predictor with a different architecture by the shared training samples with the codes.

## unpickled the .pkl files
The testing samples and the training samples are both stored in .pkl format, which is a highly compact format for storing large dataset. The following snippet can be used to unpickled the files and access the data correctly.<br>

### What is inside a .pkl file?

In [55]:
# read the demo samples
with open("demo_samples.pkl", "rb") as fp:
    demo_dataset = pkl.load(fp)

Inside a .pkl file, samples are organized as in the defaultdict format. The key of the defaultdict is the id of each sample. The corresponding value is a sub-dict which contains the attributes of the sample.<br> 
Take the sample with "id = 24900" in the dataset as an example.

In [60]:
demo_dataset[24900]

{'key': '2L_VRPTW-25-R1-55-PC5_Batch4.csv21_0.tr',
 'label': 0,
 'bin': array([20, 10]),
 'items': array([[2, 4],
        [4, 2],
        [3, 3],
        [3, 3],
        [2, 5],
        [4, 2],
        [4, 1],
        [5, 2],
        [3, 1],
        [6, 2],
        [3, 2],
        [2, 2],
        [2, 2]]),
 'packing_class': '5'}

* key: the unique id of the sample
* label: 0 -> feasible, 1 -> infeasible
* bin: the size of the bin, [width, height]
* items: a list of items, [[width_1, height_1],..[width_m, height_m]]
* packing_class: specify which packing class it belongs to

## Customized dataset
Pytorch provides an abstract classic to represent samples for training and testing. One could click the link for more information.
https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [90]:
class ItemDataset(Dataset):
    def __init__(self, dataset_file_path):
        super(ItemDataset, self).__init__()
        # open the .pkl file, the dataset 
        with open(dataset_file_path, "rb") as fp:
            dataset = pkl.load(fp)
        self.dataset = {k: v for k, v in dataset.items() if v['label'] == 1 or v['label'] == 0}
    def __len__(self):
        return len(self.dataset.keys())

    def __getitem__(self, i):
        sample_idx = list(self.dataset.keys())[i]
        sample = self.dataset[sample_idx]
        items, label = sample['items'], sample['label']
        max_width, max_height = sample['bin']
        manual_features = np.asarray(extract_manual_feature(items, max_width, max_height))
        return manual_features, label

### Prepare data loader for the model

In [91]:
def create_loaders(dataset, samplers):
    test_loader = DataLoader(dataset, batch_size=1,
                             sampler=samplers["test"])
    return test_loader
def create_samplers(dataset, val_size=3000, split=True):
    """Create samplers to randomly sample from the dataset
    """
    dataset_idxs = list(range(len(dataset)))
    return {"test": SubsetRandomSampler(dataset_idxs)}
def inference_mode(model, test_loader, criterion, device):
    test_loss, test_acc, all_y, all_pred = eval(model, test_loader, criterion, device)
    print(f"Test loss {test_loss:.3f} Test acc {test_acc:.3f}")
    all_y = [x.item() for x in all_y]
    all_pred = [x.item() for x in all_pred]
    print(confusion_matrix(all_y, all_pred))
def eval(model, dataloader, criterion, device):
    model.eval()
    test_loss = 0
    correct = 0
    n_sample = 0
    all_y = []
    all_pred = []
    with T.no_grad():
        for batch in dataloader:
            x, y = batch
            x, y = x.float().to(device), y.float().to(device)
            y_pred = model(x)
            y_pred = y_pred.reshape(-1)
            loss = criterion(y_pred, y)
            all_y.append(y)
            all_pred.append((y_pred >= 0.5).float())
            test_loss += (x.shape[0] * loss.item())
            correct += ((y_pred >= 0.5).float() == y).sum().item()
            n_sample += x.shape[0]
    return test_loss / n_sample, correct / n_sample, all_y, all_pred

### Perform inference on the testing samples

In [184]:
def main(dataset_file_path):
    dataset = ItemDataset(dataset_file_path)
    samplers = create_samplers(dataset, split=False)
    test_loader = create_loaders(dataset, samplers)
    # feature_train_loader
    # Load model
    device = T.device("cuda" if T.cuda.is_available() else "cpu")
    print("Loading the model")
    model = T.jit.load('Feasibility_Predictor.pt')
    model = model.to(device)
    criterion = nn.BCELoss()
    # Train or Infer
    inference_mode(model,test_loader,criterion,device)

### We run test the model over the hybrid testing samples which includes instances of all the packing classes.

In [186]:
main(dataset_file_path = "./testing_samples/HybridClasses.pkl")

Loading the model
Test loss 0.121 Test acc 0.958
[[19773  1074]
 [  662 20185]]


#### Then the model is run over the testing samples of packing class 2

In [187]:
main(dataset_file_path = "./testing_samples/PackingClass2.pkl")

Loading the model
Test loss 0.268 Test acc 0.882
[[5219 1275]
 [ 176 5588]]


#### Then the model is run over the testing samples of packing class 3

In [188]:
main(dataset_file_path = "./testing_samples/PackingClass3.pkl")

Loading the model
Test loss 0.153 Test acc 0.945
[[4773  214]
 [ 337 4650]]


#### The the model is run over the testing samples of packing class 4

In [189]:
main(dataset_file_path = "./testing_samples/PackingClass4.pkl")

Loading the model
Test loss 0.235 Test acc 0.907
[[207   3]
 [ 36 174]]


#### The the model is run over the testing samples of packing class 5

In [190]:
main(dataset_file_path = "./testing_samples/PackingClass5.pkl")

Loading the model
Test loss 0.009 Test acc 1.000
[[62720    14]
 [    0     0]]
