# Autoreload

In [0]:
%load_ext autoreload
%autoreload 2

# Trainer using our package

In [0]:
from taxifare.trainer import Trainer
trainer = Trainer()
fitted_pipeline = trainer.train()
fitted_pipeline

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', RandomForestRegressor(max_depth=1))])

In [0]:
!ls -la model.joblib

-rw-r--r--  1 gmanchon  staff  48800 Feb 16 22:24 model.joblib


In [0]:
fitted_pipeline.get_params()

{'memory': None,
 'steps': [('features',
   ColumnTransformer(transformers=[('distance',
                                    Pipeline(steps=[('distancetransformer',
                                                     DistanceTransformer()),
                                                    ('standardscaler',
                                                     StandardScaler())]),
                                    ['pickup_latitude', 'pickup_longitude',
                                     'dropoff_latitude', 'dropoff_longitude'])])),
  ('model', RandomForestRegressor(max_depth=1))],
 'verbose': False,
 'features': ColumnTransformer(transformers=[('distance',
                                  Pipeline(steps=[('distancetransformer',
                                                   DistanceTransformer()),
                                                  ('standardscaler',
                                                   StandardScaler())]),
                                  ['p

# Trainer with params and gridsearch

In [0]:
params = dict(
    random_forest = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ["euclidian", "manhattan"],
            features__distance__standardscaler__with_mean = [True, False],
            model__max_depth = [1, 2, 3]
        )
    ),
    linear_regression = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ["euclidian", "manhattan"],
            features__distance__standardscaler__with_mean = [True, False],
            model__normalize = [True, False]
        )
    )
)

from taxifare.paramtrainer import ParamTrainer
param_trainer = ParamTrainer()
models = param_trainer.train(params)
models

{'random_forest': GridSearchCV(cv=5,
              estimator=Pipeline(steps=[('features',
                                         ColumnTransformer(transformers=[('distance',
                                                                          Pipeline(steps=[('distancetransformer',
                                                                                           DistanceTransformer()),
                                                                                          ('standardscaler',
                                                                                           StandardScaler())]),
                                                                          ['pickup_latitude',
                                                                           'pickup_longitude',
                                                                           'dropoff_latitude',
                                                                           'dropoff_longit

In [0]:
! ls -la *.joblib

-rw-r--r--  1 gmanchon  staff    777 Feb 16 22:24 linear_regression.joblib
-rw-r--r--  1 gmanchon  staff  48800 Feb 16 22:24 model.joblib
-rw-r--r--  1 gmanchon  staff   1302 Feb 16 22:24 random_forest.joblib


In [0]:
models["random_forest"].__dict__.keys()

dict_keys(['scoring', 'estimator', 'n_jobs', 'iid', 'refit', 'cv', 'verbose', 'pre_dispatch', 'error_score', 'return_train_score', 'param_grid', 'multimetric_', 'best_index_', 'best_score_', 'best_params_', 'best_estimator_', 'refit_time_', 'scorer_', 'cv_results_', 'n_splits_'])

In [0]:
models["random_forest"].best_score_

0.6841204518418557

In [0]:
models["random_forest"].best_params_

{'features__distance__distancetransformer__distance_type': 'euclidian',
 'features__distance__standardscaler__with_mean': False,
 'model__max_depth': 3}

In [0]:
models["random_forest"].best_estimator_

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler(with_mean=False))]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', RandomForestRegressor(max_depth=3))])

In [0]:
models["linear_regression"].best_score_

0.695787104216038

In [0]:
models["linear_regression"].best_params_

{'features__distance__distancetransformer__distance_type': 'euclidian',
 'features__distance__standardscaler__with_mean': True,
 'model__normalize': True}

In [0]:
models["linear_regression"].best_estimator_

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', LinearRegression(normalize=True))])