# Predict bad LS moves for node operators

This notebook develops simple models for predicting bad local search moves. Particularly, given nodes $U$ and $V$ in routes $R_U$ and $R_V$, it predicts whether each LS operator we currently have is likely to produce an improving solution if the operator were applied to these node pairs $U$ and $V$.

<div class="alert alert-block alert-warning">
  <b>Issue:</b> testing the classifier developed below suggests there is little gain in performance. This is discussed further in issue 65. For something in actual use, have a look at the route-based notebook.
</div>

In [1]:
%cd ..

D:\Projects\Python\Euro-NeurIPS-2022


In [2]:
%matplotlib inline

In [3]:
from collections import defaultdict
from contextlib import suppress
from dataclasses import dataclass
from enum import IntEnum
from glob import glob
import itertools
from pathlib import Path
import re

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDClassifier
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import KFold

import tools

In [4]:
DATA_PATH = Path("data/raw/")
INST_PATH = Path("instances/")

## Utilities

These are used to parse the raw results for a single instance into something that contains the same data, but in a more workable format.

In [5]:
@dataclass
class Route:
    clients: list[int]
    load: int
    tw: int

    def index(self, client: int) -> int:
        return self.clients.index(client)

    def __getitem__(self, idx: int) -> int:
        return self.clients[idx]

    def __len__(self) -> int:
        return len(self.clients)

@dataclass
class Record:
    op: int
    U: int
    V: int
    delta: int
    Ru: Route
    Rv: Route

def parse_file(file: str) -> list[Record]:
    def parse_record(record: list[str]) -> Record:
        op = int(record[0].strip())
        U, V, delta = map(int, record[1].strip().split(" "))
        _, *Ru = map(int, re.findall('[0-9]+', record[2].strip()))
        _, *Rv = map(int, re.findall('[0-9]+', record[3].strip()))
        Lu, Lv = map(int, record[4].split(" "))
        TWu, TWv = map(int, record[5].split(" "))

        return Record(op, U, V, delta, Route(Ru, Lu, TWu), Route(Rv, Lv, TWv))

    with open(file, 'r') as fh:
        args = [iter(fh)] * 6
        records = zip(*args)

        # This could have been a generator, but each file is only 100-ish MB
        # in size, so that comfortably fits in memory. Also, we for now ignore
        # the reverse exchange (2) and 2-OPT (6) operators.
        return [parsed for record in records 
                if (parsed := parse_record(record)).op not in [2, 6]]

## Data and feature generation

Operators (in the order of `main.cpp`):

0. $(1, 0)$-Exchange
1. $(2, 0)$-Exchange
2. $(2, 0)$-Reverse-Exchange
3. $(2, 2)$-Exchange
4. $(2, 1)$-Exchange
5. $(1, 1)$-Exchange
6. 2-OPT

Note that we currently ignore 2 (reverse exchange) and 6 (2-opt), and focus only on the $(N, M)$-Exchange operators.

In [6]:
op2nm = [
    (1, 0),
    (2, 0),
    None,
    (2, 2),
    (2, 1),
    (1, 1),
    None
]

In [7]:
class Features(IntEnum):
    DELTA_DIST_U = 0
    DELTA_DIST_V = 1
    DELTA_DIST_UN = 2
    DELTA_DIST_VM = 3
    TW_U_INFEAS = 4
    TW_V_INFEAS = 5
    LD_U_INFEAS = 6
    LD_V_INFEAS = 7

In [8]:
def make_features(instance: dict, records: list[Record]) -> np.array:
    dist = instance['duration_matrix']
    dist_max = dist.max()

    data = np.empty((len(records), len(Features)))

    for idx, record in enumerate(records):
        n, m = op2nm[record.op]

        idx_u = record.Ru.index(record.U) if record.U != 0 else -1
        idx_v = record.Rv.index(record.V) if record.V != 0 else -1

        pu = 0 if idx_u <= 0 else record.Ru[idx_u - 1]
        pv = 0 if idx_v <= 0 else record.Rv[idx_v - 1]

        dist_un_vm1 = 0
        dist_un_un1 = 0
        with suppress(IndexError):
            un = record.Ru[idx_u + n]
            un1 = record.Rv[idx_u + n + 1] if idx_u + n + 1 < len(record.Ru) else 0
            dist_un_un1 = dist[un, un1]

            vm1 = record.Rv[idx_v + m + 1]   
            dist_un_vm1 = dist[un, vm1]

        dist_vm_un1 = 0
        dist_vm_vm1 = 0
        with suppress(IndexError):
            vm = record.Rv[idx_v + m]
            vm1 = record.Rv[idx_v + m + 1] if idx_v + m + 1 < len(record.Rv) else 0
            dist_vm_vm1 = dist[vm, vm1]

            un1 = record.Ru[idx_u + n + 1]
            dist_vm_un1 = dist[vm, un1]

        # Some of these features are unused for pure relocate moves (m == 0)
        data[idx, Features.DELTA_DIST_U] = (dist[pv, record.U] if m > 0 else dist[record.V, record.U]) - dist[pu, record.U]
        data[idx, Features.DELTA_DIST_V] = dist[pu, record.V] - dist[pv, record.V] if m > 0 else 0
        data[idx, Features.DELTA_DIST_UN] = dist_un_vm1 - dist_un_un1
        data[idx, Features.DELTA_DIST_VM] = dist_vm_un1 - dist_vm_vm1 if m > 0 else 0

        data[idx] /= dist_max  # normalise all distances to [0, 1]

        data[idx, Features.TW_U_INFEAS] = record.Ru.tw > 0
        data[idx, Features.TW_V_INFEAS] = record.Rv.tw > 0

        data[idx, Features.LD_U_INFEAS] = record.Ru.load > instance['capacity']
        data[idx, Features.LD_V_INFEAS] = record.Rv.load > instance['capacity']

    return data

In [9]:
def make_or_retrieve_data(file_loc: str) -> tuple[np.array, np.array]:
    cache_loc = DATA_PATH / (Path(file_loc).stem + '.npz')

    if cache_loc.exists():
        file = np.load(cache_loc)
        return file['X'], file['y']                
    
    instance = tools.read_vrplib(INST_PATH / file_loc)
    records = parse_file(DATA_PATH / file_loc)

    y = np.array([int(record.delta < 0) for record in records])
    X = make_features(instance, records)

    np.savez(cache_loc, X=X, y=y)
    return X, y

## Training and evaluation

In [10]:
def do_kfold(n_splits: int, weights: dict, files: list[Path]) -> list:
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=0)
    fold_results = []

    for idx, (train, test) in enumerate(kf.split(files), 1):
        print(f"Fold {idx}")
        model = SGDClassifier(loss="log_loss", 
                              class_weight=weights, 
                              random_state=idx)

        for idx in train:
            X, y = make_or_retrieve_data(files[idx])
            model.partial_fit(X, y, [0, 1])

        scores = []
        for idx in test:
            X, y = make_or_retrieve_data(files[idx])
            precision, recall, f1score, _ = score(y,
                                                  model.predict(X),
                                                  average='weighted')

            # Precision: number of relevant documents retrieved by a search 
            #            divided by the total number of documents retrieved
            # Recall: number of relevant documents retrieved by a search 
            #         divided by the total number of existing relevant documents
            # F1 score: 2 * (precision * recall) / (precision + recall)
            scores.append([precision, recall, f1score])

        mean_scores = np.mean(scores, axis=0)
        fold_results.append([mean_scores, model.coef_[0], model.intercept_])

    return fold_results

The number of improvements appears to be roughly ~0.25% to ~0.3% of the total number of evaluated moves, so we give those a weight of $\frac{1}{0.003}$ to compensate.

In [11]:
weights = {0: 1, 1: 1 / 0.003}
files = sorted([Path(file.name) for file in DATA_PATH.glob("ORTEC-*.txt")])

In [12]:
vals = do_kfold(10, weights, files)

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10


Folding results: precision, recall, and F1 score. The best folding result (according to F1 score) is marked with a \*, but the coefficients and performance should all be roughly similar across folds.

In [18]:
idx_max = max(range(len(vals)), key=lambda idx: vals[idx][0][2])

for idx, ((p, r, f1), coefs, intercept) in enumerate(vals, 1):
    print(f"   FOLD: {idx}")
    print(f"   PREC: {p:.3f}")
    print(f" RECALL: {r:.3f}")
    print("*" if idx - 1 == idx_max else " ", f"   F1: {f1:.3f}")

    coefs = [intercept[0]] + coefs.tolist()
    fmt = "{:6.2f} " * len(coefs)
    print(f" COEFFS: {fmt.format(*coefs)}", end="\n\n")

   FOLD: 1
   PREC: 0.996
 RECALL: 0.816
     F1: 0.894
 COEFFS:  -0.84  -9.41 -11.28  -0.01  -0.92   1.48   0.81   1.57   0.26 

   FOLD: 2
   PREC: 0.996
 RECALL: 0.811
     F1: 0.891
 COEFFS:  -0.86  -9.34 -11.18  -0.02  -0.92   1.46   0.73   1.57   0.26 

   FOLD: 3
   PREC: 0.996
 RECALL: 0.812
     F1: 0.892
 COEFFS:  -0.85  -9.22 -11.03   0.01  -0.95   1.51   0.78   1.55   0.26 

   FOLD: 4
   PREC: 0.996
 RECALL: 0.829
     F1: 0.902
 COEFFS:  -0.94  -9.35 -11.22  -0.04  -0.83   1.49   0.78   1.64   0.27 

   FOLD: 5
   PREC: 0.997
 RECALL: 0.769
     F1: 0.865
 COEFFS:  -0.60  -9.11 -11.13  -0.09  -0.97   1.51   0.91   1.41   0.20 

   FOLD: 6
   PREC: 0.996
 RECALL: 0.839
*    F1: 0.908
 COEFFS:  -0.89  -9.34 -11.19  -0.01  -0.91   1.49   0.81   1.58   0.25 

   FOLD: 7
   PREC: 0.996
 RECALL: 0.810
     F1: 0.890
 COEFFS:  -0.79  -9.08 -10.82   0.04  -0.86   1.54   0.84   1.56   0.29 

   FOLD: 8
   PREC: 0.996
 RECALL: 0.831
     F1: 0.904
 COEFFS:  -1.00  -9.39 -11.25  -0.