# Predict bad LS moves

This notebook develops simple models for predicting bad local search moves. Particularly, given routes $R_U$ and $R_V$, it predicts whether the route-based LS operators we currently have are likely to produce an improving solution if the operator were to be applied to these route pairs.

In [1]:
%cd ..

D:\Projects\Python\Euro-NeurIPS-2022


In [2]:
%matplotlib inline

In [3]:
from collections import defaultdict
from contextlib import suppress
from dataclasses import dataclass
from enum import IntEnum
from glob import glob
from functools import cache
import itertools
from pathlib import Path
import re
from typing import Optional

import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import SGDClassifier
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.model_selection import KFold

import tools

In [4]:
DATA_PATH = Path("data/raw/")
INST_PATH = Path("instances/")

## Utilities

These are used to parse the raw results for a single instance into something that contains the same data, but in a more workable format.

In [5]:
@dataclass
class Route:
    clients: list[int]
    load: int
    tw: int

    def index(self, client: int) -> int:
        return self.clients.index(client)

    def __getitem__(self, idx: int) -> int:
        return self.clients[idx]

    def __len__(self) -> int:
        return len(self.clients)

@dataclass
class Record:
    op: int
    delta: int
    Ru: tuple[Route, Optional[Route]]  # tuples of (before, after). After only
    Rv: tuple[Route, Optional[Route]]  # when delta is negative.

def parse_file(file: str) -> list[Record]:
    def parse_record(record: list[str]) -> Record:
        op, delta = map(int, record[0].split(" "))
        _, *Ru = map(int, re.findall('[0-9]+', record[1].strip()))
        _, *Rv = map(int, re.findall('[0-9]+', record[2].strip()))
        Lu, Lv = map(int, record[3].split(" "))
        TWu, TWv = map(int, record[4].split(" "))

        uRouteBefore = Route(Ru, Lu, TWu)  # before
        vRouteBefore = Route(Rv, Lv, TWv)  # before
        
        uRouteAfter = None
        vRouteAfter = None
        
        if delta < 0:
            _, *Ru = map(int, re.findall('[0-9]+', record[5].strip()))
            _, *Rv = map(int, re.findall('[0-9]+', record[6].strip()))
            Lu, Lv = map(int, record[7].split(" "))
            TWu, TWv = map(int, record[8].split(" "))
            
            uRouteAfter = Route(Ru, Lu, TWu)
            vRouteAfter = Route(Rv, Lv, TWv)

        return Record(op, 
                      delta, 
                      (uRouteBefore, uRouteAfter), 
                      (vRouteBefore, vRouteAfter))

    with open(file, 'r') as fh:
        records = []
        lines = fh.readlines()
        idx = 0

        while idx != len(lines):
            op, delta = map(int, lines[idx].split(" "))

            if delta < 0:
                record = lines[idx : idx + 9]
                records.append(parse_record(record))
                idx += 9
            else:
                record = lines[idx : idx + 5]
                records.append(parse_record(record))
                idx += 5

        return records

## Data and feature generation

Operators (in the order of `main.cpp`):

0. RELOCATE*
1. SWAP*

In [6]:
class Features(IntEnum):
    TW_U_INFEAS = 0
    TW_V_INFEAS = 1
    LD_U_INFEAS = 2
    LD_V_INFEAS = 3
    U_SIZE = 4
    V_SIZE = 5
    UV_ANGLE_DIFF = 6
    MIN_NODE_DIST = 7

In [7]:
def pseudo_angle(coords, route):
    if len(route) == 0:
        return 0

    dx = coords[route.clients][:, 0].mean() - coords[0, 0]
    dy = coords[route.clients][:, 1].mean() - coords[0, 1]
    
    if dy < 0:
        return -abs(1 - dx / (abs(dx) + abs(dy)))

    return abs(1 - dx / (abs(dx) + abs(dy)))

In [8]:
def make_features(instance: dict, records: list[Record]) -> np.array:
    data = np.zeros((len(records), len(Features)))
    coords = instance['coords']
    dist = instance['duration_matrix']
    max_dist = dist.max()

    for idx, record in enumerate(records):
        uRouteBefore, uRouteAfter = record.Ru
        vRouteBefore, vRouteAfter = record.Rv

        data[idx, Features.TW_U_INFEAS] = uRouteBefore.tw > 0
        data[idx, Features.TW_V_INFEAS] = vRouteBefore.tw > 0

        data[idx, Features.LD_U_INFEAS] = uRouteBefore.load > instance['capacity']
        data[idx, Features.LD_V_INFEAS] = vRouteBefore.load > instance['capacity']

        data[idx, Features.U_SIZE] = len(uRouteBefore) / len(coords)
        data[idx, Features.V_SIZE] = len(vRouteBefore) / len(coords)

        uAngle = pseudo_angle(coords, uRouteBefore)
        vAngle = pseudo_angle(coords, vRouteBefore)

        assert -2 <= uAngle <= 2, f"Got: {uAngle} for {uRouteBefore}"
        assert -2 <= vAngle <= 2, f"Got: {vAngle} for {vRouteBefore}"

        data[idx, Features.UV_ANGLE_DIFF] = vAngle - uAngle  # in [-4, 4]

        min_record_dist = max_dist

        for u in uRouteBefore.clients:
            for v in vRouteBefore.clients:
                min_record_dist = min(dist[u, v], dist[v, u], min_record_dist)
 
        data[idx, Features.MIN_NODE_DIST] = min_record_dist / max_dist
        
    return data

In [13]:
@cache
def make_or_retrieve_data(file_loc: str) -> tuple[np.array, np.array]:
    cache_loc = DATA_PATH / (Path(file_loc).stem + '.npz')

    if cache_loc.exists():
        file = np.load(cache_loc)
        return file['X'], file['y']                

    instance = tools.read_vrplib(INST_PATH / file_loc)
    records = parse_file(DATA_PATH / file_loc)

    y = np.array([int(record.delta < 0) for record in records])
    X = make_features(instance, records)

    np.savez(cache_loc, X=X, y=y)
    return X, y

## Training and evaluation

In [47]:
def do_kfold(n_splits: int, weights: dict, files: list[Path]) -> list:
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=0)
    fold_results = []

    for fold_idx, (train, test) in enumerate(kf.split(files), 1):
        print(f"Fold {fold_idx}")
        model = SGDClassifier(loss="log_loss", 
                              class_weight=weights, 
                              random_state=fold_idx)

        for idx in train:
            X, y = make_or_retrieve_data(files[idx])           
            assert y.sum() > 0

            model.partial_fit(X, y, [0, 1])

        scores = []
        for idx in test:
            X, y = make_or_retrieve_data(files[idx])
            assert y.sum() > 0

            precision, recall, f1score, _ = score(y,
                                                  model.predict(X),
                                                  average='weighted',
                                                  zero_division='warn')

            # Precision: number of relevant documents retrieved by a search 
            #            divided by the total number of documents retrieved
            # Recall: number of relevant documents retrieved by a search 
            #         divided by the total number of existing relevant documents
            # F1 score: 2 * (precision * recall) / (precision + recall)
            scores.append([precision, recall, f1score])

        mean_scores = np.mean(scores, axis=0)
        fold_results.append([mean_scores, model.coef_[0], model.intercept_])

    return fold_results

The number of improvements appears to be at least ~0.2% of the total number of evaluated moves, so we give those a weight of $0.002$ to the non-improving label to compensate.

In [48]:
weights = {0: 0.005, 1: 1}
files = sorted([Path(file.name) for file in DATA_PATH.glob("ORTEC-*.txt")])

In [49]:
vals = do_kfold(10, weights, files)

Fold 1
Fold 2
Fold 3
Fold 4
Fold 5
Fold 6
Fold 7
Fold 8
Fold 9
Fold 10


Folding results: precision, recall, and F1 score. The best folding result (according to F1 score) is marked with a \*, but the coefficients and performance should all be roughly similar across folds.

In [50]:
idx_max = max(range(len(vals)), key=lambda idx: vals[idx][0][2])

for idx, ((p, r, f1), coefs, intercept) in enumerate(vals, 1):
    print(f"   FOLD: {idx}")
    print(f"   PREC: {p:.3f}")
    print(f" RECALL: {r:.3f}")
    print("*" if idx - 1 == idx_max else " ", f"   F1: {f1:.3f}")

    coefs = [intercept[0]] + coefs.tolist()
    fmt = "{:6.2f} " * len(coefs)
    print(f" COEFFS: {fmt.format(*coefs)}", end="\n\n")

   FOLD: 1
   PREC: 0.992
 RECALL: 0.980
*    F1: 0.986
 COEFFS:  -0.24   0.14   0.12   0.19   0.12  -0.01  -0.03   0.24  -0.85 

   FOLD: 2
   PREC: 0.991
 RECALL: 0.979
     F1: 0.985
 COEFFS:  -0.23   0.14   0.11   0.20   0.13  -0.01  -0.03   0.24  -0.86 

   FOLD: 3
   PREC: 0.992
 RECALL: 0.971
     F1: 0.981
 COEFFS:  -0.20   0.14   0.12   0.20   0.13  -0.01  -0.03   0.25  -0.85 

   FOLD: 4
   PREC: 0.991
 RECALL: 0.977
     F1: 0.984
 COEFFS:  -0.26   0.14   0.13   0.19   0.14  -0.01  -0.03   0.24  -0.85 

   FOLD: 5
   PREC: 0.992
 RECALL: 0.974
     F1: 0.983
 COEFFS:  -0.22   0.15   0.14   0.20   0.12  -0.01  -0.02   0.24  -0.82 

   FOLD: 6
   PREC: 0.991
 RECALL: 0.972
     F1: 0.981
 COEFFS:  -0.23   0.14   0.13   0.18   0.12  -0.01  -0.03   0.24  -0.83 

   FOLD: 7
   PREC: 0.992
 RECALL: 0.971
     F1: 0.981
 COEFFS:  -0.22   0.15   0.12   0.20   0.13  -0.01  -0.02   0.24  -0.84 

   FOLD: 8
   PREC: 0.992
 RECALL: 0.975
     F1: 0.983
 COEFFS:  -0.24   0.14   0.12   0.