In [19]:
import sys
sys.path.append("../") 

import src.carpricer.dataprep as dataprep
from src.carpricer.dataprep import transformations
from src.carpricer.train import evaluator, trainer


In [20]:
!cat ../src/carpricer/carpricer.params.yml

model:
  baseline: 
    objective: reg:squarederror
    min_child_weight: 4
    colsample_bytree: .7
    n_estimators: 200
  tune:
    cv: 5
    objective: neg_mean_squared_error
    search:
      learning_rate: [.003, .05, .07,]
      max_depth: [3, 20]
      alpha: [0, .5]
      subsample: [.7, 1]
data:
  label: lnprice
  test_size: 0.3

In [21]:
train_path = "../data/sample/automobile_prepared.csv"
params_file = "../src/carpricer/carpricer.params.yml"

In [22]:
from jobtools.arguments import ParamsNamespace

In [23]:
params = ParamsNamespace.load(params_file)

In [24]:
X_train, y_train, X_test, y_test = dataprep.read_and_split(train_path, 
                                                           params.data.test_size, 
                                                           params.data.label)

In [25]:
X_train_transformed, transforms = transformations.scale_and_encode(X_train)

In [26]:
X_test_transformed = transforms.transform(X_test)

In [27]:
import mlflow

In [28]:
if mlflow.active_run == 0:
    mlflow.start_run()

In [29]:
params.model.baseline

namespace(objective='reg:squarederror',
          min_child_weight=4,
          colsample_bytree=0.7,
          n_estimators=200)

In [41]:
from src.carpricer.train.trainer import PatchedXGBRegressor
from xgboost.sklearn import XGBRegressor

base_model = XGBRegressor(silent=True, nthread=4, **params.model.baseline.to_dict())

NameError: name 'XGBRegressor' is not defined

In [36]:
mlflow.log_params(params.model.baseline.to_dict()) 

In [37]:
params.model.tune.cv

5

In [38]:
search = trainer.fit_and_optimize(X_train_transformed, 
                                  y_train, 
                                  base_model=base_model,
                                  param_grid=params.model.tune.search.to_dict(),
                                  cv=params.model.tune.cv,
                                  scoring_fit=params.model.tune.objective)



AttributeError: 'super' object has no attribute '__sklearn_tags__'

In [None]:
evaluator.evaluate_search(search, plot_params_name=['learning_rate', 'max_depth'], to_mlflow=False)

In [None]:
best_model = search.best_estimator_
best_params = search.best_params_

In [None]:
mlflow.log_params(best_params)

In [None]:
metrics = evaluator.evaluate_regressor(best_model, X_test_transformed, y_test)

In [None]:
mlflow.log_metrics(metrics)

In [None]:
from sklearn.pipeline import Pipeline

In [None]:
model_pipeline = Pipeline(steps=[('preprocessing', transforms),
                                 ('model', best_model)])

In [None]:
model_pipeline.fit(X_test, y_test)

In [None]:
from mlflow.models.signature import infer_signature

In [None]:
signature = infer_signature(X_test, y_test.values)

In [None]:
mlflow.sklearn.log_model(model_pipeline, "model", signature=signature)

In [None]:
mlflow.end_run()