In [7]:
from lazypredict.Supervised import LazyClassifier
from lazypredict.Supervised import LazyRegressor
import utils

from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.neighbors import NearestCentroid, KNeighborsRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

import joblib

import os

In [8]:
def train_models(X_train, X_val, y_train, y_val, models, is_dataset_classification):
    if len(models) == 0:
        return []
    
    if is_dataset_classification:
        lazy_ = LazyClassifier(
            verbose=0,
            ignore_warnings=True,
            custom_metric=None,
            classifiers=models  
        )
    else:
        lazy_ = LazyRegressor(
            verbose=0,
            ignore_warnings=False,
            custom_metric=None,
            regressors=models
        )
    scores, predictions = lazy_.fit(X_train, X_val, y_train, y_val)
    model_dictionary = lazy_.provide_models(X_train, X_val, y_train, y_val)
    return model_dictionary

In [9]:
CLASSIFIERS = [
    GaussianNB,
    RandomForestClassifier,
    SVC,
    NearestCentroid,
    LogisticRegression,
    DecisionTreeClassifier,
]

REGRESSORS = [
    RandomForestRegressor,
    SVR,
    KNeighborsRegressor,
    LinearRegression,
    DecisionTreeRegressor
]

In [10]:
FILE_EXTENSION = ".joblib"

In [11]:
def train(dataset_name):
    results_path = utils.get_classicdescriptors_path(dataset_name)

    # Select the models to train
    models_to_train = [
        # All the ones in the list
        c for c in (CLASSIFIERS if utils.is_dataset_classification(dataset_name) else REGRESSORS) \
        # Except the ones already trained
            if c.__name__+FILE_EXTENSION not in os.listdir(results_path)
    ]

    if models_to_train:
        X,y = utils.get_X_y(dataset_name)
        indices_train,indices_val = utils.get_indices_train_eval(dataset_name)
        X_train, X_val = X[indices_train], X[indices_val]
        y_train, y_val = y[indices_train], y[indices_val]
        del X
        del y
        del indices_train
        del indices_val

        models = train_models(
            X_train,
            X_val,
            y_train,
            y_val,
            models_to_train,
            utils.is_dataset_classification(dataset_name)
        )

        for model_name,model in models.items():
            # Create the path for the model
            model_path = os.path.join(results_path, model_name + FILE_EXTENSION)
            # Save it with joblib
            joblib.dump(model, model_path)

In [12]:
train(utils.HELOC_NAME)