## Imports

In [1]:
from taxifare.trainer import Trainer
from taxifare.paramtrainer import ParamTrainer

## Autoreload

In [2]:
%load_ext autoreload
%autoreload 2

# Trainer using our package

In [3]:
trainer = Trainer()
fitted_pipeline = trainer.train()

fitted_pipeline

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', RandomForestRegressor(max_depth=1))])

In [4]:
!ls -la model.joblib

-rw-r--r--  1 bruncky  staff  47968 Feb 22 20:58 model.joblib


In [5]:
fitted_pipeline.get_params()

{'memory': None,
 'steps': [('features',
   ColumnTransformer(transformers=[('distance',
                                    Pipeline(steps=[('distancetransformer',
                                                     DistanceTransformer()),
                                                    ('standardscaler',
                                                     StandardScaler())]),
                                    ['pickup_latitude', 'pickup_longitude',
                                     'dropoff_latitude', 'dropoff_longitude'])])),
  ('model', RandomForestRegressor(max_depth=1))],
 'verbose': False,
 'features': ColumnTransformer(transformers=[('distance',
                                  Pipeline(steps=[('distancetransformer',
                                                   DistanceTransformer()),
                                                  ('standardscaler',
                                                   StandardScaler())]),
                                  ['p

# Trainer with params and gridsearch

In [6]:
params = dict(
    random_forest = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ['euclidian', 'manhattan'],
            features__distance__standardscaler__with_mean = [True, False],
            model__max_depth = [1, 2, 3]
        )
    ),
    linear_regression = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ['euclidian', 'manhattan'],
            features__distance__standardscaler__with_mean = [True, False]
        )
    )
)


param_trainer = ParamTrainer()
models = param_trainer.train(params)

models

{'random_forest': GridSearchCV(cv=5,
              estimator=Pipeline(steps=[('features',
                                         ColumnTransformer(transformers=[('distance',
                                                                          Pipeline(steps=[('distancetransformer',
                                                                                           DistanceTransformer()),
                                                                                          ('standardscaler',
                                                                                           StandardScaler())]),
                                                                          ['pickup_latitude',
                                                                           'pickup_longitude',
                                                                           'dropoff_latitude',
                                                                           'dropoff_longit

In [7]:
! ls -la *.joblib

-rw-r--r--  1 bruncky  staff    826 Feb 22 20:59 linear_regression.joblib
-rw-r--r--  1 bruncky  staff  47968 Feb 22 20:58 model.joblib
-rw-r--r--  1 bruncky  staff   1287 Feb 22 20:58 random_forest.joblib


In [8]:
models['random_forest'].__dict__.keys()

dict_keys(['scoring', 'estimator', 'n_jobs', 'refit', 'cv', 'verbose', 'pre_dispatch', 'error_score', 'return_train_score', 'param_grid', 'multimetric_', 'best_index_', 'best_score_', 'best_params_', 'best_estimator_', 'refit_time_', 'feature_names_in_', 'scorer_', 'cv_results_', 'n_splits_'])

In [9]:
models['random_forest'].best_score_

0.7120389684669391

In [10]:
models['random_forest'].best_params_

{'features__distance__distancetransformer__distance_type': 'euclidian',
 'features__distance__standardscaler__with_mean': False,
 'model__max_depth': 3}

In [11]:
models['random_forest'].best_estimator_

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler(with_mean=False))]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', RandomForestRegressor(max_depth=3))])

In [12]:
models['linear_regression'].best_score_

0.7327319494306824

In [13]:
models['linear_regression'].best_params_

{'features__distance__distancetransformer__distance_type': 'euclidian',
 'features__distance__standardscaler__with_mean': True}

In [14]:
models['linear_regression'].best_estimator_

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', LinearRegression())])