In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%%file utils.py

import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split

pd.options.mode.chained_assignment = None

error_messages = {
    "No clear target in training data": 
        ("The training data must have " 
         "exactly one more column than " 
         "the test data."),
    "Training data has too many columns":
        ("The training data has more "
         "than one column different than "
         "the testing data: %s"),
    "Column names inconsistent":
        ("The training columns and the "
         "test columns must have "
         "identical names excepts for "
         "the target variables. "
         "Different columns: %s")
    }

def X_y_split(X_train, X_test):
    """
    Determines which variables are the target
    and which are the features. Returns just
    The X and y data in the training dataset
    as a tuple.
    
    Example usage:
    X, y = learn.X_y_split(X_train, X_test)
    
    Parameters
    ----------
    X_train: pandas dataframe
        The data that has the target in it.
    
    X_test: pandas dataframe
        The data that does not have the target in it.
    """
    X_train = X_train.copy()
    n_train_cols = X_train.shape[1]
    n_test_cols = X_test.shape[1]
    
    if n_train_cols != n_test_cols + 1:
        msg = error_messages["No clear target in training data"]
        raise ValueError(msg)
        
    test_columns = set(X_test.columns)
    train_columns = set(X_train.columns)
    target_columns = train_columns - test_columns
    if len(target_columns) > 1:
        key = "Training data has too many columns"
        msg_ = error_messages[key]
        msg = msg_ % str(target_columns)
        raise ValueError(msg)

    extra_columns_in_test = test_columns - train_columns
    if extra_columns_in_test:
        key = "Column names inconsistent"
        msg_ = error_messages[key]
        msg = msg_ % str(extra_columns_in_test)
        raise ValueError(msg)     

    y_name = target_columns.pop()
    y = X_train.pop(y_name)
    return X_train, y


def X_to_train_test(X, target_name, test_size=.05):
    X = X.copy()
    y = X.pop(target_name)
    X_train, X_test, y_train, _ = train_test_split(X, 
                                                   y, 
                                                   test_size=test_size,
                                                   random_state=42)
    X_train[target_name] = y_train
    return X_train, X_test


def make_data(source):
    """
    Utility function to assist in loading different 
    sample datasets. Returns training data (that 
    contains the target) and testing data (that
    does not contain the target).
    
    Parameters
    ----------
    source: string, optional (default="boston")
        The specific dataset to load. Options:
        - Regression: "boston", "diabetes"
        - Classification: "cancer", "digits", "iris", "titanic"
    """
    if source == "boston":
        data = datasets.load_boston()
    elif source == "diabetes":
        data = datasets.load_diabetes()
        data["feature_names"] = ["f{}".format(v) 
                                 for v in range(10)]
    elif source == "cancer":
        data = datasets.load_breast_cancer()
    elif source == "digits":
        data = datasets.load_digits()
        data["feature_names"] = ["f{}".format(v) 
                                 for v in range(64)]        
    elif source == "iris":
        data = datasets.load_iris()
    elif source == "titanic":
        train_data_path = "../tests/test_data/titanic/train.csv"
        test_data_path = "../tests/test_data/titanic/test.csv"

        X_train = pd.read_csv(train_data_path)
        X_test = pd.read_csv(test_data_path)
        return X_train, X_test
    elif source == "abalone":
        train_data_path = "../tests/test_data/abalone_age/abalone.data"
        col_names = ["Sex", "Length", "Diameter", "Height", 
                     "Whole_weight", "Shucked_weight", 
                     "Viscera_weight", "Shell_weight", "Rings"]
        X = pd.read_csv(train_data_path, header=None, names=col_names)
        X["Rings"] = (X.Rings >= 9).astype(int)
        return X_to_train_test(X, "Rings")
    elif source == "bank_marketing":
        train_data_path = "../tests/test_data/bank_marketing/bank-full.csv"
        X = pd.read_csv(train_data_path, sep=";")
        return X_to_train_test(X, "y")
    elif source == "car_evaluation":
        train_data_path = "../tests/test_data/car_evaluation/car.data"
        col_names = ["buying", "maint", "doors", 
                     "persons", "lug_boot", "safety", "car_evaluation"]
        X = pd.read_csv(train_data_path, header=None, names=col_names)
        return X_to_train_test(X, "car_evaluation")
    elif source == "income":
        train_data_path = "../tests/test_data/census_income/adult.data"
        col_names = ["age", "workclass", "fnlwgt", 
                     "education", "education-num", 
                     "marital-status", "occupation", 
                     "relationship", "race", "sex",
                     "capital-gain", "capital-loss", 
                     "hours-per-week", "native-country",
                     "income"]
        train = pd.read_csv(train_data_path, skiprows=[0], 
                            header=None, names=col_names)
        test_data_path = "../tests/test_data/census_income/adult.test"
        test = pd.read_csv(test_data_path, skiprows=[0], 
                           header=None, names=col_names)
        X = pd.concat([train,test])
        return X_to_train_test(X, "income")
    elif source == "chess":
        train_data_path = "../tests/test_data/chess/kr-vs-kp.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 36)
    elif source == "mushrooms":
        train_data_path = "../tests/test_data/mushroom/agaricus-lepiota.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 0)
    elif source == "tictactoe":
        train_data_path = "../tests/test_data/tictactoe/tic-tac-toe.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 9)
    elif source == "wine-origin":
        train_data_path = "../tests/test_data/wine_origin/wine.data"
        X = pd.read_csv(train_data_path, header=None)
        return X_to_train_test(X, 0)
    elif source == "wine-quality":
        train_data_path = "../tests/test_data/wine_quality/winequality-white.csv"
        X = pd.read_csv(train_data_path, sep=";")
        X["quality"] = (X.quality > 5).astype(int)
        return X_to_train_test(X, "quality")
    else:
        raise ValueError("Not a valid dataset.")
    X = pd.DataFrame(data=data.data, 
                     columns=data.feature_names)
    y = pd.Series(data=data.target)
    X_train, X_test, y_train, _ = train_test_split(X, 
                                                   y, 
                                                   test_size=.05,
                                                   random_state=42)
    X_train["target"] = y_train
    return X_train, X_test


def is_categorical(x, 
                   max_classes="auto", 
                   strings_are_categorical=True):
    """
    Check if a target variable is a classification
    problem or a regression problem. Returns True if
    classification and False if regression. On failure,
    raises a ValueError.
    
    Parameters
    ----------
    x: array-like
        This should be the target variable. Ideally, 
        you should convert it to be numeric before 
        using this function.
        
    max_classes: int or float, optional (default="auto")
        Determines the max number of unique values
        there can be for it being a categorical variable
        
        If "auto" - sets it equal to 10% of the dataset or
            100, whichever is smaller
        If float - interprets as percent of dataset size
        If int - interprets as number of classes
        
    strings_are_categorical: bool, optional (default=True)
        If a variable is a string and cannot be coerced
        to a number, returns True regardless of the number
        of unique values. 
    """
    x = pd.Series(x)
    n = len(x)
    n_unique = len(x.unique())
    if max_classes == "auto":
        auto_n_classes = .05
        n_max_classes = int(n*auto_n_classes)
        max_classes = min(n_max_classes, 100)
    if isinstance(max_classes, float):
        n_max_classes = int(n*max_classes)
        max_classes = min(n_max_classes, int(n/2))
    # If x is numeric
    if x.dtype.kind in 'bifc':
        # If there are more than max_classes
        # classify as a regression problem
        if n_unique > max_classes:
            return False
        # If there are floating point numbers
        # classify as a regression problem
        decimals = (x - x.astype(int)).mean()
        if decimals > .01:
            return False
    if n_unique <= max_classes:
        return True
    try:
        x.astype(float)
        return False
    except ValueError:
        if strings_are_categorical:
            return True
        msg = ("Malformed data. "
               "Variable is non-numeric "
               "and there are more "
               "unique values than allowed "
               "by max_classes")
        raise ValueError(msg)
        
        
def categorical_columns(X):
    """Returns a list of all categorical columns"""
    cats = X.apply(is_categorical, axis=0)
    categoricals = cats[cats].index.tolist()
    return categoricals

Overwriting utils.py


In [3]:
%%file ../tests/test_utils.py
import unittest
from learn import utils

class TestUtils(unittest.TestCase):
    def test_making_data_simple(self):
        for data in ["boston", "iris"]:
            X_train, X_test = utils.make_data(source=data)
            train_cols = X_train.columns
            test_cols = X_test.columns
            # Training data should have exactly one additional column
            self.assertEqual(len(train_cols), len(test_cols)+1)
            # Ensure only one column name is different
            n_diff_cols = len(set(X_train.columns) - set(X_test.columns))
            self.assertEqual(1, n_diff_cols)
        
    def test_is_classification_problem(self):
        # Shorten function name
        icp = utils.is_categorical
        # Regression because floats
        result = icp([1.1, 2.1])
        self.assertEqual(result, 0)
        # Regression because number of unique
        result = icp([1,2,3,4])
        self.assertEqual(result, 0)
        # Classification because words
        result = icp(["cat"]*20+["dog"]*20)
        self.assertEqual(result, 1)
        # Classification because number of uniques
        result = icp([0]*20+[1]*20)
        self.assertEqual(result, 1)
        # Real data tests - Regression
        for dataset in ["boston", "diabetes"]:
            data = utils.make_data(source=dataset)
            X, y = utils.X_y_split(*data)
            self.assertEqual(icp(y), 0)
        # Real data tests - Classification
        for dataset in ["cancer", "digits", "iris"]:
            data = utils.make_data(source=dataset)
            X, y = utils.X_y_split(*data)
            self.assertEqual(icp(y), 1)
            
# class TestXYSplit(unittest.TestCase):
#     pass

if __name__ == '__main__':
    unittest.main()

Overwriting ../tests/test_utils.py


In [106]:
%%file forall.py

from collections import defaultdict
import pandas as pd
import numpy as np
from sklearn.preprocessing import label_binarize
from sklearn.linear_model import LogisticRegression, LinearRegression, RidgeCV, Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.base import BaseEstimator, TransformerMixin, ClassifierMixin, RegressorMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, roc_auc_score
from sklearn import metrics
from learn import utils

def categorical_unique_counts(X):
    """
    Returns series of categorical columns
    and count of unique values in each 
    column
    """
    cats = utils.categorical_columns(X)
    return X[cats].apply(pd.Series.nunique, axis=0)

def small_categorical(X, large_class_threshold=10):
    counts = categorical_unique_counts(X)
    mask = counts < large_class_threshold
    return counts[mask].index.tolist()

def large_categorical(X, large_class_threshold=10):
    counts = categorical_unique_counts(X)
    mask = counts>=large_class_threshold
    return counts[mask].index.tolist()

def word_to_num(word, max_char=5):
    """
    Assigns a number to a word that
    is the approximate sort order of
    the word
    
    Words with the same first max_char
    will have the same value.
    """
    word_val = 0
    for n, char in enumerate(str(word)):
        if n > max_char:
            break
        num = ord(char)/130
        den = 10**n
        total = num/den
        word_val += total
    return word_val

def word_size(word):
    """
    Returns the length of the word
    """
    return len(str(word))

class CategoricalImputer(BaseEstimator, TransformerMixin):
    """
    Adds a new "NULL" category for missing values
    """
    def __init__(self, fill_value="NULL"):
        self.fill_value = fill_value
    
    def fit(self, X, y=None):
        self.cat_cols = utils.categorical_columns(X)
        return self
    
    def transform(self, X, y=None):
        fill_values = {c:self.fill_value for c in self.cat_cols}
        return X.fillna(fill_values, axis=0)

class NumericImputer(BaseEstimator, TransformerMixin):
    """
    TODO: Add option for indicator variable if NaN
    """
    def __init__(self, method="mean"):
        self.method = method
        
    def fit(self, X, y=None):
        if self.method == "mean":
            self.fill_values = X.mean()
        if self.method == "max":
            self.fill_values = X.max() + 1
        return self
    
    def transform(self, X, y=None):
        cols = (X.dtypes[X.dtypes =="object"]).index
        if len(cols):
            print(X[cols].describe())
        X[~pd.np.isfinite(X)] = 0 #TODO: Fix
        return X.fillna(self.fill_values)

    
class KeepNumeric(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        self.numeric_columns = X.dtypes[X.dtypes != "object"].index.tolist()
        return self
        
    def transform(self, X, y=None):
        return X[self.numeric_columns]
    
    
class Categoricals(BaseEstimator, TransformerMixin):
    def __init__(self, large_class_threshold=10):
        """
        Anything large_class_threshold and larger
        will be treated as a categorical features
        with a large number of categories.
        """
        self.large_class_threshold = large_class_threshold
        
    def fit(self, X, y=None):
        lct = self.large_class_threshold
        self.small = small_categorical(X, lct)
        self.large = large_categorical(X, lct)
        self.all = self.small + self.large
        # Save category unique value counts for feature
        # engineering
        self.value_counts = defaultdict(int)
        for col in self.large:
            self.value_counts[col] = X[col].value_counts()
        return self
    
    def transform(self, X, y=None):
        # Add sort value based features
        for col in self.all:
            X[col] = X[col].asobject
            new_col = X[col].apply(word_to_num)
            col_name = str(col)+"__sort"
            X[col_name] = new_col
        
        for col in self.large:
            # Add count based features
            counts = self.value_counts[col]
            X = X.join(counts, 
                       on=col, 
                       rsuffix="__counts")
            # Add word length features
            new_col = X[col].apply(word_size)
            col_name = str(col)+"__length"
            X[col_name] = new_col
        return X

    
class Standardize(BaseException, TransformerMixin):
    def __init__(self):
        pass
    
    def fit(self, X, y):
        self.mean = X.mean(axis=0)
        self.std = X.std(axis=0)
        return self
    
    def transform(self, X, y=None):
        return (X - self.mean)/self.std
    

class DropBadColumns(BaseEstimator, TransformerMixin):
    def __init__(self):
        """
        Drops columns with:
        * NaN standard deviation
        * Zero standard deviation
        """
        pass
    
    def fit(self, X, y=None):
        std = X.std(axis=0)
        null_std = std.isnull()
        zero_std = std == 0
        bad_std_cols = std[null_std | zero_std].index.values.tolist()
        self.to_drop = bad_std_cols
        return self
    
    def transform(self, X):
        return X.drop(self.to_drop, axis=1)
    
    
def regression_metrics(y, y_hat):
    exp_var = metrics.explained_variance_score(y, y_hat)
    mae = metrics.mean_absolute_error(y, y_hat)
    mse = metrics.mean_squared_error(y, y_hat)
    medae = metrics.median_absolute_error(y, y_hat)
    r2 = metrics.r2_score(y, y_hat)
    results = {
        "Explained variance score": exp_var,
        "Mean absolute error": mae,
        "Mean squared error": mse,
        "Root mean squared error": mse**.5,
        "Median absolute error": medae,
        "R^2 score": r2
    }
    return results


class RegressionPredict(BaseEstimator):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        self.lr = RidgeCV()
        self.lr.fit(X, y)
        lr_pred = cross_val_predict(self.lr, X, y, cv=10, n_jobs=-1).reshape(-1, 1)
        
        self.rf = RandomForestRegressor(n_estimators=self.time_to_compute, 
                                        random_state=42, 
                                        oob_score=True, 
                                        n_jobs=-1)
        self.rf.fit(X, y)
        rf_pred = self.rf.oob_prediction_.reshape(-1, 1)

        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])

        self.lr_1 = LinearRegression()
        self.generalized_predictions = cross_val_predict(self.lr_1, 
                                                         layer_1, 
                                                         y, 
                                                         cv=10, 
                                                         n_jobs=-1, 
                                                         method="predict")
        self.lr_1.fit(layer_1, y)
        return self
    
    def predict(self, X):
        lr_pred = self.lr.predict(X).reshape(-1, 1)
        rf_pred = self.rf.predict(X).reshape(-1, 1)
        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])
        final_predictions = self.lr_1.predict(layer_1)
        return final_predictions
    

class Regression(BaseEstimator, RegressorMixin):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        self.model = RegressionPredict(time_to_compute=self.time_to_compute)
        self.model.fit(X, y)
        self.oob_predictions = self.model.generalized_predictions
        
        self.all_metrics = regression_metrics(y, self.oob_predictions)
        self.score_type = "R^2*100"
        self.score = int(self.all_metrics["R^2 score"]*100)
        self.display_score = "%d/100" % self.score
        self.understandable_metric_name = "Average prediction error"
        self.understandable_metric_value = self.all_metrics["Mean absolute error"]
        self.understandable_metric_description = "On average, the predictions will be off by %.2f." % self.understandable_metric_value
        return self
        
    def predict(self, X):
        predictions = self.model.predict(X)
        return predictions
    

def classification_metrics(y, y_hat):
    results = {}
    y_prob = y_hat[:, 1]
    y_pred = (y_prob > .5).astype(int)
    y_bin = label_binarize(y, 
                           sorted(pd.Series(y).unique()))
    binary = y_bin.shape[1] == 1
    if binary:
        # Fix the binary case returning a column vector
        y_bin = np.hstack((-(y_bin - 1), y_bin))
    ave_precision = metrics.average_precision_score(y_bin, y_hat)
    auc = metrics.roc_auc_score(y_bin, y_hat)
    log_loss = metrics.log_loss(y_bin, y_hat)
    data = {
        "Accuracy": (y_hat.argmax(axis=1) == y).mean(),
        "Average precision score": ave_precision,
        "AUC": auc,
        "Log loss (cross-entropy loss)": log_loss
            }
    if binary:
        brier = metrics.brier_score_loss(y, y_prob)
        f1 = metrics.f1_score(y, y_pred)
        cks = metrics.cohen_kappa_score(y, y_pred)
        hamming = metrics.hamming_loss(y, y_pred)
        hinge = metrics.hinge_loss(y, y_pred)
        jaccard = metrics.jaccard_similarity_score(y, y_pred)
        matt = metrics.matthews_corrcoef(y, y_pred)
        precision = metrics.precision_score(y, y_pred)
        recall = metrics.recall_score(y, y_pred)
        binary_data = {
            "Brier score loss": brier,
            "F1 score": f1,
            "Cohen's kappa": cks,
            "Average Hamming loss": hamming,
            "Hinge loss": hinge,
            "Jaccard similarity coefficient": jaccard,
            "Matthews correlation coefficient": matt,
            "Precision": precision,
            "Recall": recall
            }
        data.update(binary_data)
    return data


class ClassificationPredict(BaseEstimator):
    def __init__(self, time_to_compute=100):
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        smallest_class_size = pd.Series(y).value_counts().min()
        cv = min(smallest_class_size, 10)
        if cv == 1:
            raise ValueError("One of the classes you are trying to predict has only one observation!")
        
        self.lr = LogisticRegression(C=1)
        self.lr.fit(X, y)
        lr_pred = cross_val_predict(self.lr, X, y, cv=cv, n_jobs=-1, method="predict_proba")
        
        self.rf = RandomForestClassifier(n_estimators=self.time_to_compute, 
                                         random_state=42, oob_score=True, n_jobs=-1)
        self.rf.fit(X, y)
        rf_pred = self.rf.oob_decision_function_

        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])

        self.lr_1 = LogisticRegression(C=1)
        self.generalized_predictions = cross_val_predict(self.lr_1, 
                                                         layer_1, 
                                                         y, 
                                                         cv=cv, 
                                                         n_jobs=-1, 
                                                         method="predict_proba")
        self.lr_1.fit(layer_1, y)
        return self
    
    def predict_proba(self, X):
        lr_pred = self.lr.predict_proba(X)
        rf_pred = self.rf.predict_proba(X)
        layer_1 = np.hstack([
            lr_pred, 
            rf_pred
        ])
        final_predictions = self.lr_1.predict_proba(layer_1)
        return final_predictions

    def predict(self, X):
        predictions = self.predict_proba(X)
        return predictions.argmax(1)
    

class Classification(BaseEstimator, ClassifierMixin):
    def __init__(self, time_to_compute=100):
        """
        """
        self.time_to_compute = time_to_compute
        
    def fit(self, X, y):
        y = pd.Series(y)
        self.n_classes = len(y.unique())
        self.label_encoder = None
        self.label_encoder = LabelEncoder().fit(y)
        y = self.label_encoder.transform(y)

        self.model = ClassificationPredict(time_to_compute=self.time_to_compute)
        self.model.fit(X, y)
        self.oob_predictions = self.model.generalized_predictions
        
        self.all_metrics = classification_metrics(y, 
                                                  self.oob_predictions)
        self.score_type = "(AUC - .5)*200"
        self.score = int((self.all_metrics["AUC"] - .5)*200)
        self.display_score = "%d/100" % self.score
        self.understandable_metric_name = "Accuracy"
        self.understandable_metric_value = self.all_metrics["Accuracy"]*100
        self.understandable_metric_description = "The predictions are expected to be correct %.2f%% of the time" % self.understandable_metric_value
        return self
        
    def predict(self, X):
        predictions = self.model.predict(X)
        if self.label_encoder is not None:
            predictions = self.label_encoder.inverse_transform(predictions)
        return predictions

    
class All():
    def __init__(self, time_to_compute=None, force_model=None):
        """
        time_to_compute: higher numbers mean longer compute time 
        and more accurate results
        
        force_model: None, "regression", or "classification"
        Forces the model used
        """
        self.time_to_compute = time_to_compute or 100
        self.force_model = force_model
        
        
    def fit(self, X, y):
        if isinstance(y, pd.Series):
            self.target_name = y.name
        else:
            self.target_name = "what you are trying to predict"
        X.columns = [str(col) for col in X.columns]
        # Determine type of problem
        if self.force_model:
            self.classification = self.force_model == "classification"
        else:
            self.classification = utils.is_categorical(y, max_classes=.1)
        if self.classification:
            model = Classification(time_to_compute=self.time_to_compute)
        else:
            model = Regression(time_to_compute=self.time_to_compute)
        # Create pipeline
        steps = [("categorical_imputation", CategoricalImputer()),
                 ("make_categoricals_numeric", Categoricals()),
                 ("keep_only_numeric", KeepNumeric()),
                 ("numeric_imputation", NumericImputer("max")),
                 ("drop_bad_columns", DropBadColumns()),
                 ("scale", Standardize()),
                 ("model", model)]
        pipe = Pipeline(steps)
        pipe.fit(X, y)
        self.model = pipe
        self.score = pipe.named_steps["model"].score
        self.score_type = pipe.named_steps["model"].score_type
        self.display_score = pipe.named_steps["model"].display_score
        self.all_metrics = pipe.named_steps["model"].all_metrics
        self.understandable_metric_name = pipe.named_steps["model"].understandable_metric_name
        self.understandable_metric_description = pipe.named_steps["model"].understandable_metric_description
        self.understandable_metric_value = pipe.named_steps["model"].understandable_metric_value
        return self
        
    def predict(self, X):
        X.columns = [str(col) for col in X.columns]
        predictions = self.model.predict(X)
        return predictions

Overwriting forall.py


In [107]:
import os
import sys
from pprint import pprint

# Allows importing of local modules
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from learn import forall as fa
from learn import utils

In [122]:
from IPython.display import display, HTML

total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    print(dataset)
    X_train, X_test = utils.make_data(source=dataset)
    display(HTML(X_train.head().to_html()))

boston


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
79,0.08387,0.0,12.83,0.0,0.437,5.874,36.6,4.5026,5.0,398.0,18.7,396.06,9.1,20.3
454,9.51363,0.0,18.1,0.0,0.713,6.728,94.1,2.4961,24.0,666.0,20.2,6.68,18.71,14.9
210,0.17446,0.0,10.59,1.0,0.489,5.96,92.1,3.8771,4.0,277.0,18.6,393.25,17.27,21.7
497,0.26838,0.0,9.69,0.0,0.585,5.794,70.6,2.8927,6.0,391.0,19.2,396.9,14.1,18.3
172,0.13914,0.0,4.05,0.0,0.51,5.572,88.5,2.5961,5.0,296.0,16.6,396.9,14.69,23.1


diabetes


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,target
399,0.059871,0.05068,0.022895,0.049415,0.016318,0.011838,-0.013948,-0.002592,0.03954,0.019633,232.0
30,-0.060003,-0.044642,0.044451,-0.019442,-0.009825,-0.007577,0.022869,-0.039493,-0.027129,-0.009362,129.0
140,0.041708,0.05068,0.014272,0.04253,-0.030464,-0.001314,-0.043401,-0.002592,-0.033249,0.015491,118.0
341,0.030811,0.05068,0.059541,0.056301,-0.022208,0.001191,-0.032356,-0.002592,-0.024791,-0.017646,263.0
209,0.038076,0.05068,-0.018062,0.06663,-0.051103,-0.016658,-0.076536,0.034309,-0.011901,-0.013504,77.0


cancer


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
72,17.2,24.52,114.2,929.4,0.1071,0.183,0.1692,0.07944,0.1927,0.06487,0.5907,1.041,3.705,69.47,0.00582,0.05616,0.04252,0.01127,0.01527,0.006299,23.32,33.82,151.6,1681.0,0.1585,0.7394,0.6566,0.1899,0.3313,0.1339,0
551,11.13,22.44,71.49,378.4,0.09566,0.08194,0.04824,0.02257,0.203,0.06552,0.28,1.467,1.994,17.85,0.003495,0.03051,0.03445,0.01024,0.02912,0.004723,12.02,28.26,77.8,436.6,0.1087,0.1782,0.1564,0.06413,0.3169,0.08032,1
158,12.06,12.74,76.84,448.6,0.09311,0.05241,0.01972,0.01963,0.159,0.05907,0.1822,0.7285,1.171,13.25,0.005528,0.009789,0.008342,0.006273,0.01465,0.00253,13.14,18.41,84.08,532.8,0.1275,0.1232,0.08636,0.07025,0.2514,0.07898,1
424,9.742,19.12,61.93,289.7,0.1075,0.08333,0.008934,0.01967,0.2538,0.07029,0.6965,1.747,4.607,43.52,0.01307,0.01885,0.006021,0.01052,0.031,0.004225,11.21,23.17,71.79,380.9,0.1398,0.1352,0.02085,0.04589,0.3196,0.08009,1
532,13.68,16.33,87.76,575.5,0.09277,0.07255,0.01752,0.0188,0.1631,0.06155,0.2047,0.4801,1.373,17.25,0.003828,0.007228,0.007078,0.005077,0.01054,0.001697,15.85,20.2,101.6,773.4,0.1264,0.1564,0.1206,0.08704,0.2806,0.07782,1


digits


Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,target
1204,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,5.0,15.0,16.0,11.0,0.0,0.0,0.0,8.0,16.0,16.0,16.0,4.0,0.0,0.0,0.0,14.0,11.0,11.0,16.0,2.0,0.0,0.0,0.0,0.0,0.0,7.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,16.0,2.0,0.0,0.0,0.0,0.0,0.0,9.0,16.0,3.0,0.0,0.0,0.0,0.0,0.0,5.0,16.0,5.0,0.0,0.0,1
570,0.0,0.0,7.0,13.0,14.0,1.0,0.0,0.0,0.0,7.0,15.0,9.0,13.0,7.0,0.0,0.0,0.0,5.0,15.0,3.0,8.0,8.0,0.0,0.0,0.0,0.0,1.0,0.0,12.0,5.0,0.0,0.0,0.0,0.0,0.0,1.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,6.0,0.0,0.0,0.0,0.0,0.0,2.0,15.0,5.0,4.0,4.0,0.0,0.0,0.0,6.0,16.0,16.0,13.0,16.0,6.0,2
643,0.0,0.0,0.0,2.0,13.0,13.0,0.0,0.0,0.0,0.0,0.0,12.0,10.0,16.0,0.0,0.0,0.0,0.0,7.0,13.0,8.0,11.0,0.0,0.0,0.0,0.0,5.0,16.0,16.0,4.0,0.0,0.0,0.0,0.0,3.0,16.0,16.0,4.0,0.0,0.0,0.0,2.0,14.0,9.0,7.0,13.0,1.0,0.0,0.0,1.0,11.0,8.0,3.0,9.0,8.0,0.0,0.0,0.0,0.0,5.0,10.0,15.0,16.0,0.0,8
316,0.0,0.0,7.0,14.0,16.0,11.0,0.0,0.0,0.0,2.0,16.0,11.0,11.0,16.0,2.0,0.0,0.0,0.0,3.0,3.0,15.0,9.0,0.0,0.0,0.0,0.0,0.0,10.0,16.0,8.0,0.0,0.0,0.0,0.0,0.0,3.0,14.0,15.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,16.0,6.0,0.0,0.0,0.0,3.0,12.0,13.0,15.0,2.0,0.0,0.0,0.0,6.0,16.0,12.0,5.0,0.0,0.0,3
1010,0.0,0.0,9.0,12.0,12.0,12.0,6.0,0.0,0.0,1.0,14.0,6.0,4.0,4.0,2.0,0.0,0.0,4.0,15.0,12.0,9.0,1.0,0.0,0.0,0.0,4.0,15.0,8.0,11.0,11.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,14.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,8.0,0.0,0.0,0.0,10.0,1.0,0.0,8.0,8.0,0.0,0.0,0.0,9.0,16.0,16.0,15.0,4.0,0.0,5


iris


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
68,6.2,2.2,4.5,1.5,1
82,5.8,2.7,3.9,1.2,1
110,6.5,3.2,5.1,2.0,2
12,4.8,3.0,1.4,0.1,0
36,5.5,3.5,1.3,0.2,0


titanic


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


abalone


Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
4084,F,0.575,0.48,0.17,1.1,0.506,0.2485,0.31,1
2195,I,0.26,0.215,0.08,0.099,0.037,0.0255,0.045,0
2130,M,0.585,0.455,0.14,0.97,0.462,0.185,0.295,1
2803,M,0.65,0.51,0.175,1.446,0.6485,0.2705,0.45,1
3969,I,0.38,0.3,0.09,0.277,0.1655,0.0625,0.082,0


bank_marketing


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
42723,36,management,married,tertiary,no,203,no,no,cellular,25,jan,255,1,88,1,success,yes
43871,62,management,married,tertiary,no,0,yes,yes,cellular,4,jun,106,1,64,3,failure,no
42877,35,admin.,divorced,secondary,no,1085,no,no,cellular,4,feb,195,6,97,12,other,yes
10499,48,blue-collar,divorced,secondary,yes,296,no,yes,unknown,16,jun,144,4,-1,0,unknown,no
14664,60,management,married,primary,no,0,yes,no,cellular,15,jul,111,8,-1,0,unknown,no


car_evaluation


Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,car_evaluation
398,vhigh,low,4,more,small,high,acc
1670,low,low,3,more,med,high,vgood
998,med,high,2,more,big,high,acc
1356,low,vhigh,4,2,big,low,unacc
1694,low,low,4,more,small,high,good


income


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
11463,38,Local-gov,218763,Masters,14,Separated,Prof-specialty,Unmarried,White,Female,0,0,40,United-States,<=50K.
7193,37,Local-gov,218184,Bachelors,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,1887,40,United-States,>50K
5297,38,Private,189092,Assoc-voc,11,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,43,United-States,<=50K.
7473,41,Private,369781,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,55,United-States,<=50K.
27178,26,?,109564,HS-grad,9,Never-married,?,Own-child,White,Female,0,0,40,United-States,<=50K


chess


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36
2605,f,f,f,f,t,f,t,t,t,t,f,f,l,f,n,f,f,t,f,f,f,f,f,f,f,f,f,f,f,f,f,f,t,f,t,n,nowin
218,f,f,f,f,f,t,f,f,f,f,f,f,l,f,n,f,f,f,f,f,f,f,f,t,f,t,f,f,f,f,f,f,t,f,t,n,won
2883,f,f,f,f,f,f,f,f,f,f,t,f,l,f,n,f,t,t,f,f,f,f,t,f,f,t,f,f,f,f,f,f,t,t,f,n,nowin
229,f,f,f,f,f,t,f,f,f,f,t,f,l,f,n,f,f,f,f,f,f,f,f,f,f,t,f,f,f,f,t,f,t,f,f,n,won
3185,f,f,f,f,t,f,t,f,t,f,f,t,l,f,n,f,f,f,f,f,f,t,f,f,f,t,f,f,t,f,f,t,f,t,f,n,nowin


mushrooms


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,0
353,b,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,k,s,m,e
1562,x,f,n,f,n,f,w,b,p,t,e,f,s,w,w,p,w,o,e,k,a,g,e
3912,f,f,y,f,f,f,c,b,p,e,b,k,k,n,b,p,w,o,l,h,y,p,p
4500,x,f,y,f,f,f,c,b,g,e,b,k,k,n,n,p,w,o,l,h,y,d,p
7918,x,f,w,f,n,f,w,b,p,e,?,s,k,w,w,p,w,t,p,w,n,g,e


tictactoe


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
750,o,x,x,o,x,x,o,b,o,negative
665,x,x,b,o,o,o,x,o,x,negative
885,b,x,x,o,o,o,x,b,b,negative
907,b,x,o,b,b,o,x,x,o,negative
244,x,b,o,x,x,o,b,o,x,positive


wine-origin


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,0
141,13.36,2.56,2.35,20.0,89,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780,3
24,13.5,1.81,2.61,20.0,96,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845,1
150,13.5,3.12,2.62,24.0,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500,3
41,13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3.0,1035,1
118,12.77,3.43,1.98,16.0,80,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372,2


wine-quality


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4729,6.8,0.32,0.43,1.6,0.05,4.0,65.0,0.99346,3.27,0.47,10.7,0
1739,8.4,0.29,0.29,1.05,0.032,4.0,55.0,0.9908,2.91,0.32,11.4,0
2962,6.9,0.38,0.38,13.1,0.112,14.0,94.0,0.99792,3.02,0.48,9.2,0
3668,6.5,0.27,0.26,11.0,0.03,2.0,82.0,0.99402,3.07,0.36,11.2,0
3478,6.2,0.36,0.45,10.4,0.06,22.0,184.0,0.99711,3.31,0.56,9.8,1


In [117]:
%%time
total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All()
    model.fit(X, y)
    predictions = model.predict(X_test)
    print("%s: %s (%s)" % (dataset, model.display_score, model.score_type))
    print(model.understandable_metric_description,"\n")
    
    # Benchmarking
    total_score += model.score
    
print(total_score)

boston: 87/100 (R^2*100)
On average, the predictions will be off by 2.20. 

diabetes: 47/100 (R^2*100)
On average, the predictions will be off by 44.76. 

cancer: 98/100 ((AUC - .5)*200)
The predictions are expected to be correct 97.78% of the time 

digits: 99/100 ((AUC - .5)*200)
The predictions are expected to be correct 96.31% of the time 

iris: 97/100 ((AUC - .5)*200)
The predictions are expected to be correct 95.07% of the time 

titanic: 75/100 ((AUC - .5)*200)
The predictions are expected to be correct 81.93% of the time 

abalone: 81/100 ((AUC - .5)*200)
The predictions are expected to be correct 84.22% of the time 

bank_marketing: 85/100 ((AUC - .5)*200)
The predictions are expected to be correct 90.71% of the time 

car_evaluation: 99/100 ((AUC - .5)*200)
The predictions are expected to be correct 98.42% of the time 

income: 51/100 ((AUC - .5)*200)
The predictions are expected to be correct 57.00% of the time 

chess: 99/100 ((AUC - .5)*200)
The predictions are expected t

In [113]:
%%time
total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All(time_to_compute=200)
    model.fit(X, y)
    predictions = model.predict(X_test)
    print("%s: %s (%s)" % (dataset, model.display_score, model.score_type))
    
    # Benchmarking
    total_score += model.score
    
print(total_score)

boston: 87/100 (R^2*100)
diabetes: 47/100 (R^2*100)
cancer: 98/100 ((AUC - .5)*200)
digits: 99/100 ((AUC - .5)*200)
iris: 97/100 ((AUC - .5)*200)
titanic: 75/100 ((AUC - .5)*200)
abalone: 81/100 ((AUC - .5)*200)
bank_marketing: 86/100 ((AUC - .5)*200)
car_evaluation: 99/100 ((AUC - .5)*200)
income: 51/100 ((AUC - .5)*200)
chess: 99/100 ((AUC - .5)*200)
mushrooms: 100/100 ((AUC - .5)*200)
tictactoe: 99/100 ((AUC - .5)*200)
wine-origin: 100/100 ((AUC - .5)*200)
wine-quality: 82/100 ((AUC - .5)*200)
1300
CPU times: user 1min 3s, sys: 4.17 s, total: 1min 7s
Wall time: 1min 2s


In [16]:
%%time
total_score = 0

for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All(time_to_compute=200)
    model.fit(X, y)
    predictions = model.predict(X_test)
    print("%s: %s (%s)" % (dataset, model.display_score, model.score_type))
    
    # Benchmarking
    total_score += model.score
    
print(total_score)

boston: 87/100 (R^2*100)
diabetes: 47/100 (R^2*100)
cancer: 98/100 ((AUC - .5)*200)
digits: 99/100 ((AUC - .5)*200)
iris: 97/100 ((AUC - .5)*200)
titanic: 75/100 ((AUC - .5)*200)
abalone: 81/100 ((AUC - .5)*200)
bank_marketing: 86/100 ((AUC - .5)*200)
car_evaluation: 99/100 ((AUC - .5)*200)
income: 51/100 ((AUC - .5)*200)
chess: 99/100 ((AUC - .5)*200)
mushrooms: 100/100 ((AUC - .5)*200)
tictactoe: 99/100 ((AUC - .5)*200)
wine-origin: 100/100 ((AUC - .5)*200)
wine-quality: 82/100 ((AUC - .5)*200)
1300
CPU times: user 1min 14s, sys: 3.61 s, total: 1min 17s
Wall time: 1min 27s


In [17]:
%%time
for dataset in ["boston", "diabetes", 
                "cancer", "digits", 
                "iris", "titanic", 
                "abalone", "bank_marketing",
                "car_evaluation", "income",
                "chess", "mushrooms",
                "tictactoe",  "wine-origin",
                "wine-quality"
               ]:
    # In the flask app:
    X_train, X_test = utils.make_data(source=dataset)
    X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
    model = fa.All(time_to_compute=100)
    model.fit(X, y)
    predictions = model.predict(X_test)
    print(dataset)
    print("Score: %s" % model.display_score)
    print(model.understandable_metric_name, "=", model.understandable_metric_value)
    pprint(model.all_metrics)
    print()

boston
Score: 87/100
Average prediction error = 2.18994700093
{'Explained variance score': 0.87649582271482873,
 'Mean absolute error': 2.1899470009311672,
 'Mean squared error': 10.481110565137438,
 'Median absolute error': 1.5011250912032263,
 'R^2 score': 0.87649574580146905,
 'Root mean squared error': 3.2374543340620945}

diabetes
Score: 47/100
Average prediction error = 44.6849187927
{'Explained variance score': 0.4738710017729707,
 'Mean absolute error': 44.684918792690965,
 'Mean squared error': 3062.3986831492375,
 'Median absolute error': 39.832219870347757,
 'R^2 score': 0.47387075588059335,
 'Root mean squared error': 55.338943639621796}

cancer
Score: 98/100
Accuracy = 0.977777777778
{'AUC': 0.99214840253012226,
 'Accuracy': 0.97777777777777775,
 'Average Hamming loss': 0.022222222222222223,
 'Average precision score': 0.99223228995832868,
 'Brier score loss': 0.017105412294150812,
 "Cohen's kappa": 0.95235364185820792,
 'F1 score': 0.98235294117647076,
 'Hinge loss': 0.39

### Benchmarks

In [18]:
import pandas as pd

#### Allstate Claims Severity - Regression

In [23]:
%%time
X_train_path = "../tests/benchmark_data/allstate_claims_severity/train.csv.zip"
X_test_path = "../tests/benchmark_data/allstate_claims_severity/test.csv.zip"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=200)
model.fit(X, y)
predictions = model.predict(X_test)
print("Allstate Claims Severity")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Allstate Claims Severity
Score: 54/100
Average prediction error = 1225.00395103
{'Explained variance score': 0.54824293817351144,
 'Mean absolute error': 1225.0039510304584,
 'Mean squared error': 3809970.8065728154,
 'Median absolute error': 805.5986284541865,
 'R^2 score': 0.54824293619173847,
 'Root mean squared error': 1951.9146514570803}

CPU times: user 58min 48s, sys: 30.2 s, total: 59min 19s
Wall time: 33min 22s


#### Ames House Prices - Regression

In [108]:
%%time
X_train_path = "../tests/benchmark_data/ames_house_prices/train.csv.zip"
X_test_path = "../tests/benchmark_data/ames_house_prices/test.csv.zip"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=1000)
model.fit(X, y)
print("Ames House Prices")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Ames House Prices
Score: 86/100
Average prediction error = 16664.9216955
{'Explained variance score': 0.86647246020381707,
 'Mean absolute error': 16664.921695543213,
 'Mean squared error': 842133582.63987982,
 'Median absolute error': 10334.124771198316,
 'R^2 score': 0.86647188640568029,
 'Root mean squared error': 29019.537946698598}

CPU times: user 29 s, sys: 252 ms, total: 29.2 s
Wall time: 9.24 s


#### Liberty Mutual Property Insurance Prediction - Regression

In [80]:
%%time
X_train_path = "../tests/benchmark_data/liberty_mutual_property_insurance_prediction/train.csv.zip"
X_test_path = "../tests/benchmark_data/liberty_mutual_property_insurance_prediction/test.csv.zip"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=1000, force_model="regression")
model.fit(X, y)
predictions = model.predict(X_test)
print("Liberty Mutual Property Insurance Prediction")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Liberty Mutual Property Insurance Prediction
Score: 9/100
Average prediction error = 2.74311346883
{'Explained variance score': 0.093206583079835092,
 'Mean absolute error': 2.7431134688272363,
 'Mean squared error': 14.662562105650508,
 'Median absolute error': 2.1966460620385506,
 'R^2 score': 0.093206429534426372,
 'Root mean squared error': 3.8291725092571252}

CPU times: user 14min 38s, sys: 8.93 s, total: 14min 47s
Wall time: 4min 16s


#### Animal Center Shelter Animal Outcomes - Multiclass classification

In [81]:
%%time
X_train_path = "../tests/benchmark_data/animal_center_shelter_animal_outcomes/train.csv"
X_test_path = "../tests/benchmark_data/animal_center_shelter_animal_outcomes/test.csv"

X_train = pd.read_csv(X_train_path, encoding='latin-1')
X_test = pd.read_csv(X_test_path, encoding='latin-1')

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=1500)
model.fit(X, y)
predictions = model.predict(X_test)
print("Animal Center Shelter Animal Outcomes")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Animal Center Shelter Animal Outcomes
Score: 72/100
Accuracy = 67.5371319541
{'AUC': 0.86039842221699347,
 'Accuracy': 0.67537131954057394,
 'Average precision score': 0.50975215192965473,
 'Log loss (cross-entropy loss)': 0.80630904448405416}

CPU times: user 1min 32s, sys: 3.56 s, total: 1min 36s
Wall time: 39.9 s


#### Carvana Don't Get Kicked - Binary classification

In [115]:
%%time
X_train_path = "../tests/benchmark_data/carvana_dont_get_kicked/training.csv.zip"
X_test_path = "../tests/benchmark_data/carvana_dont_get_kicked/test.csv.zip"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=1000)
model.fit(X, y)
predictions = model.predict(X_test)
print("Carvana Don't Get Kicked")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Carvana Don't Get Kicked
Score: 52/100
Accuracy = 90.1593521779
{'AUC': 0.76163108253227763,
 'Accuracy': 0.90159352177904439,
 'Average Hamming loss': 0.098406478220955565,
 'Average precision score': 0.71316278644314468,
 'Brier score loss': 0.083958446286591007,
 "Cohen's kappa": 0.3481516119604513,
 'F1 score': 0.38531324888736734,
 'Hinge loss': 0.97541893317621908,
 'Jaccard similarity coefficient': 0.90159352177904439,
 'Log loss (cross-entropy loss)': 0.29874631568529125,
 'Matthews correlation coefficient': 0.42332996975354131,
 'Precision': 0.83124076809453473,
 'Recall': 0.25077985739750447}

CPU times: user 7min 8s, sys: 4.9 s, total: 7min 13s
Wall time: 2min 29s


#### Otto Product Classification - Binary classification

In [25]:
%%time
X_train_path = "../tests/benchmark_data/otto_product_classification/train.csv.zip"
X_test_path = "../tests/benchmark_data/otto_product_classification/test.csv.zip"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)
model = fa.All(time_to_compute=100)
model.fit(X, y)
predictions = model.predict(X_test)
print("Otto Product Classification")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Otto Product Classification
Score: 99/100
Accuracy = 98.9317689647
{'AUC': 0.9998332196896611,
 'Accuracy': 0.98931768964737066,
 'Average precision score': 0.99761315686995566,
 'Log loss (cross-entropy loss)': 0.038433256407936989}

CPU times: user 4min 52s, sys: 53.4 s, total: 5min 45s
Wall time: 1h 17min 42s


## Other tests

Rare classes

In [59]:
X_train_path = "../tests/test_data/zoo/zoo_train.csv"
X_test_path = "../tests/test_data/zoo/zoo_test.csv"

X_train = pd.read_csv(X_train_path)
X_test = pd.read_csv(X_test_path)

X, y = utils.X_y_split(X_train=X_train, X_test=X_test)

y.value_counts()

1    30
2    13
4    11
7     7
6     6
5     3
3     2
Name: class_type, dtype: int64

In [71]:
%%time

model = fa.All(time_to_compute=350)
model.fit(X, y)
predictions = model.predict(X_test)
print("Zoo")
print("Score: %s" % model.display_score)
print(model.understandable_metric_name, "=", model.understandable_metric_value)
pprint(model.all_metrics)
print()

Zoo
Score: 95/100
Accuracy = 91.6666666667
{'AUC': 0.97634404932541585,
 'Accuracy': 0.91666666666666663,
 'Average precision score': 0.81428718673616629,
 'Log loss (cross-entropy loss)': 0.81510459955792214}

CPU times: user 637 ms, sys: 102 ms, total: 739 ms
Wall time: 1.03 s
