In [1]:
%cd ..

C:\Projects\CC-NDP


In [2]:
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

In [3]:
from contextlib import suppress

import matplotlib as mpl
from matplotlib.transforms import Bbox
from matplotlib.lines import Line2D
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    classification_report,
)
from sklearn.preprocessing import OneHotEncoder

from src.classes import Result, RootResult, ProblemData, FORMULATIONS

In [4]:
# For final compilation
mpl.rcParams["text.usetex"] = True
plt.style.use("grayscale")

# Training instances: analysis and model formulation

In [5]:
experiments = pd.read_csv("instances/train/instances.csv", index_col="name")
experiments["data"] = experiments.apply(
    lambda r: ProblemData.from_file(f"instances/train/{r.name}.ndp"), axis=1
)
experiments.head()

Unnamed: 0_level_0,num_nodes,num_arcs,num_commodities,num_scenarios,seed,data
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
train-19-1-60-32,20,60,1,32,19,ProblemData\n # nodes: 20\n # arcs:...
train-19-1-60-64,20,60,1,64,19,ProblemData\n # nodes: 20\n # arcs:...
train-19-1-60-128,20,60,1,128,19,ProblemData\n # nodes: 20\n # arcs:...
train-19-1-60-256,20,60,1,256,19,ProblemData\n # nodes: 20\n # arcs:...
train-19-1-90-32,20,90,1,32,19,ProblemData\n # nodes: 20\n # arcs:...


In [6]:
def make_formulation_perf_measures(df):
    df["run_time"] = df.result.apply(lambda res: res.run_time)
    df["num_iters"] = df.result.apply(lambda res: res.num_iters)
    df["objective"] = df.result.apply(lambda res: res.objective)

    return df


def make_formulation_dataframe(formulations):
    data = []
    results = []

    for experiment in experiments.index:
        data_loc = f"instances/train/{experiment}.ndp"
        data.append(
            dict(experiment=experiment, data=ProblemData.from_file(data_loc))
        )

        for formulation in formulations:
            with suppress(FileNotFoundError):
                res = Result.from_file(
                    f"out/train/{formulation}/{experiment}.json"
                )
                results.append(
                    dict(
                        experiment=experiment,
                        formulation=formulation,
                        result=res,
                    )
                )

    results = pd.DataFrame.from_records(results).join(
        experiments, on="experiment"
    )
    results = results.merge(pd.DataFrame.from_records(data), on="experiment")

    return make_formulation_perf_measures(results)

In [7]:
results = make_formulation_dataframe(FORMULATIONS)

In [8]:
pvt = pd.pivot(
    index="experiment", columns="formulation", values="run_time", data=results
)
pvt.style.highlight_min(subset=list(FORMULATIONS.keys()), axis=1)

formulation,BB,FlowMIS,MIS,SNC
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
train-101-1-120-128,1.687227,0.88611,1.685256,1.888843
train-101-1-120-256,5.316856,2.364592,2.704601,3.22746
train-101-1-120-32,0.311814,0.535666,0.420085,0.475127
train-101-1-120-64,0.478328,0.367131,0.411768,0.439902
train-101-1-180-128,18.207306,2.856867,2.317888,2.357097
train-101-1-180-256,18.539162,3.50472,4.550882,5.235019
train-101-1-180-32,0.628611,0.583795,0.496262,0.551471
train-101-1-180-64,10.664094,0.934835,1.097442,1.160337
train-101-1-60-128,0.461616,0.516771,0.428667,0.597236
train-101-1-60-256,0.864204,0.71331,0.978181,0.758292


In [9]:
results.experiment.groupby(results.formulation).count()  # num solved

formulation
BB         154
FlowMIS    209
MIS        212
SNC        217
Name: experiment, dtype: int64

In [10]:
pvt.idxmin(axis=1).value_counts()  # solved fastest

FlowMIS    75
MIS        68
SNC        54
BB         24
Name: count, dtype: int64

In [55]:
def make_features(pvt, experiments):
    res = pvt.fillna(7200)
    res = res.join(experiments)

    X = np.zeros((len(res), 3))
    X[:, 0] = res.num_commodities
    X[:, 1] = res.num_arcs
    X[:, 2] = res.num_scenarios

    enc = OneHotEncoder(handle_unknown="ignore")
    enc.fit(np.array(list(FORMULATIONS.keys())).reshape(-1, 1))
    y = np.array(
        enc.transform(pvt.idxmin(axis=1).values.reshape(-1, 1)).todense()
    )

    return X, y


X_train, y_train = make_features(pvt, experiments)

In [56]:
clf = RandomForestClassifier(n_estimators=64, random_state=42)
clf = clf.fit(X_train, y_train)

In [57]:
clf.feature_importances_

array([0.47871362, 0.28005071, 0.24123568])

In [58]:
print(classification_report(y_train, clf.predict(X_train)))

              precision    recall  f1-score   support

           0       0.79      0.79      0.79        24
           1       0.68      0.68      0.68        75
           2       0.70      0.56      0.62        68
           3       0.75      0.67      0.71        54

   micro avg       0.72      0.65      0.68       221
   macro avg       0.73      0.67      0.70       221
weighted avg       0.72      0.65      0.68       221
 samples avg       0.65      0.65      0.65       221



  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
print(accuracy_score(y_train, clf.predict(X_train)))

0.6515837104072398


In [60]:
test_experiments = pd.read_csv(
    "instances/test/instances.csv", index_col="name"
)
test_experiments["data"] = test_experiments.apply(
    lambda r: ProblemData.from_file(f"instances/test/{r.name}.ndp"), axis=1
)
test_experiments.head()

Unnamed: 0_level_0,group,correlation,num_nodes,num_arcs,num_commodities,num_scenarios,data
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
r04-3-0-128,r04,0.0,10,60,10,128,ProblemData\n # nodes: 10\n # arcs:...
r04-3-0-256,r04,0.0,10,60,10,256,ProblemData\n # nodes: 10\n # arcs:...
r04-3-0-512,r04,0.0,10,60,10,512,ProblemData\n # nodes: 10\n # arcs:...
r04-3-0-16,r04,0.0,10,60,10,16,ProblemData\n # nodes: 10\n # arcs:...
r04-3-0-32,r04,0.0,10,60,10,32,ProblemData\n # nodes: 10\n # arcs:...


In [61]:
def make_formulation_perf_measures(df):
    df["run_time"] = df.result.apply(lambda res: res.run_time)
    df["num_iters"] = df.result.apply(lambda res: res.num_iters)
    df["objective"] = df.result.apply(lambda res: res.objective)

    return df


def make_formulation_dataframe(formulations):
    data = []
    results = []

    for experiment in test_experiments.index:
        data_loc = f"instances/test/{experiment}.ndp"
        data.append(
            dict(experiment=experiment, data=ProblemData.from_file(data_loc))
        )

        for formulation in formulations:
            with suppress(FileNotFoundError):
                res = Result.from_file(
                    f"out/new/{formulation}/{experiment}.json"
                )
                results.append(
                    dict(
                        experiment=experiment,
                        formulation=formulation,
                        result=res,
                    )
                )

    results = pd.DataFrame.from_records(results).join(
        experiments, on="experiment"
    )
    results = results.merge(pd.DataFrame.from_records(data), on="experiment")

    return make_formulation_perf_measures(results)

In [62]:
test_results = make_formulation_dataframe(FORMULATIONS)

In [63]:
test_pvt = pd.pivot(
    index="experiment",
    columns="formulation",
    values="run_time",
    data=test_results,
)

In [64]:
X_test, y_test = make_features(test_pvt, test_experiments)

In [65]:
print(classification_report(y_test, clf.predict(X_test)))

              precision    recall  f1-score   support

           0       0.06      0.08      0.07        25
           1       0.34      0.22      0.27        67
           2       0.20      0.19      0.19        69
           3       0.16      0.22      0.18        32

   micro avg       0.20      0.19      0.19       193
   macro avg       0.19      0.18      0.18       193
weighted avg       0.22      0.19      0.20       193
 samples avg       0.19      0.19      0.19       193



  _warn_prf(average, modifier, msg_start, len(result))


In [66]:
print(accuracy_score(y_test, clf.predict(X_test)))

0.19170984455958548
