# Error Visualization
This notebook categorizes the errors encountered when running the AutoML benchmark creates visualizations that show when, where, and why errors happen.


### Loading Data

In [400]:
import itertools
from pathlib import Path
import re

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from data_processing import get_print_friendly_name

In [3]:
PROJECT_ROOT = Path(".").absolute().parent
DATA_DIRECTORY = PROJECT_ROOT / "data"
FIGURE_DIRECTORY = PROJECT_ROOT / "figures" / "CD"
FIGURE_DIRECTORY.mkdir(parents=True, exist_ok=True)

In [377]:
results = pd.read_csv(DATA_DIRECTORY / "amlb_all.csv", dtype={"info": str})
results["framework"] = results["framework"].apply(get_print_friendly_name)
results = results[results["framework"] != "NaiveAutoML"]

## Categorizing Errors

In [378]:
with_errors = results[~results["info"].isna()][["framework", "task", "fold", "constraint", "info"]]

In [379]:
TIMEOUT_PATTERN = re.compile("Interrupting thread MainThread \[ident=\d+\] after \d+s timeout.")
def is_timeout(message: str) -> bool:
    if re.search(TIMEOUT_PATTERN, message):
        return True
    return False

In [390]:
def is_memory(message: str) -> bool:
    if "Cannot allocate memory" in message:
        return True
    if "exit status 134" in message:
        return True
    if "exit status 137" in message:
        return True
    if "exit status 139" in message:
        return True
    if "exit status 143" in message:
        return True
    if "std::bad_alloc" in message:
        return True
    if "Dummy prediction failed with run state StatusType.MEMOUT" in message:
        return True  # autosklearn
    if "This could be caused by a segmentation fault while calling the function or by an excessive memory usage" in message:
        return True  # lightautoml
    if "OutOfMemoryError: GC overhead limit exceeded" in message:
        return True  # H2O
    return False

In [391]:
def is_data(message: str) -> bool:
    if "NoResultError: y_true and y_pred contain different number of classes" in message:
        return True
    return False

In [392]:
def is_implementation(message: str) -> bool:
    if "A pipeline has not yet been optimized. Please call fit() first." in message:
        return True  # TPOT
    if message == "NoResultError: probability estimates are not available for loss='hinge'":
        return True  # TPOT
    if  "object has no attribute 'predict_proba'" in message:
        return True  # TPOT
    if "'NoneType' object is not iterable" in message:
        return True  # GAMA
    if "The least populated class in y has only 1 member, which is too few." in message:
        return True  # GAMA
    if "Pipeline finished with 0 models for some reason." in message:
        return True  # Light AutoML
    if "No models produced. \nPlease check your data or submit" in message:
        return True  # MLJar
    if "The feature names should match those that were passed during fit" in message:
        return True  # MLJar
    if re.search("At position \d+ should be feature with name", message):
        return True  # MLJar
    if "Object of type float32 is not JSON serializable" in message:
        return True  # MLJar
    if "Ensemble_prediction_0_for_" in message:
        return True  # MLJar
    if "NeuralNetFastAI_BAG_L1'" in message:
        return True  # AutoGluon
    if "No learner was chosen in the initial phase." in message:
        return True  # NaiveAutoML
    return False

In [393]:
def needs_investigation(message: str) -> bool:
    if message == "NoResultError: ":
        return True  # GAMA
    if "Ran out of input" in message:
        return True  # GAMA
    if "Python int too large to convert to C ssize_t" in message:
        return True  # GAMA
    if "invalid load key, " in message:
        return True  # GAMA
    if "Unsupported metric `auc` for regression problems" in message:
        return True  # FLAML
    if "'NoneType' object has no attribute 'name'" in message:
        return True  # AutoGluon
    return False

In [394]:
def needs_rerun(message: str) -> bool:
    if "NoResultError: 'e'" in message:
        return True  # Autosklearn artifact save bug
    if "Auto-sklearn does not yet support sparse pandas Series" in message:
        return True  # Autosklearn
    if re.search("Exception: File: /input/org/openml/www/datasets/\d+/dataset_\d+.pq", message):
        return True
    return False

In [395]:
checks = dict(
    timeout=is_timeout,
    memory=is_memory,
    data=is_data,
    implementation=is_implementation,
    rerun=needs_rerun,
    investigate=needs_investigation,
)

def classify_error(message: str):
    for type_, check in checks.items():
        if check(message):
            return type_
    return "unknown"

In [396]:
with_errors["error_type"] = with_errors["info"].apply(classify_error)

#### Expand the above check functions until no unknown errors are left below

In [399]:
if n_errors:= len(with_errors[with_errors["error_type"] == "unknown"]["info"].unique()):
    print(n_errors)
    print(with_errors[with_errors["error_type"] == "unknown"].iloc[0])
    print(with_errors[with_errors["error_type"] == "unknown"].iloc[0]["info"])

## Visualizing Errors by Framework

In [403]:
help(sns)

Help on package seaborn:

NAME
    seaborn - # Import seaborn objects

PACKAGE CONTENTS
    _compat
    _core (package)
    _decorators
    _docstrings
    _marks (package)
    _oldcore
    _statistics
    _stats (package)
    _testing
    algorithms
    axisgrid
    categorical
    cm
    colors (package)
    distributions
    external (package)
    matrix
    miscplot
    objects
    palettes
    rcmod
    regression
    relational
    utils
    widgets

DATA
    crayons = {'Almond': '#EFDECD', 'Antique Brass': '#CD9575', 'Apricot':...
    xkcd_rgb = {'acid green': '#8ffe09', 'adobe': '#bd6c48', 'algae': '#54...

VERSION
    0.12.2

FILE
    /Users/pietergijsbers/repositories/amlb-results/venv39/lib/python3.9/site-packages/seaborn/__init__.py




In [401]:
sns.scatterplot(
    data=with_errors

AttributeError: module 'seaborn' has no attribute 'scatter'

## Visualizing Errors by Dataset Size