## Imports

In [7]:
from taxifare.trainer import Trainer
from taxifare.paramtrainer import ParamTrainer

## Autoreload

In [2]:
%load_ext autoreload
%autoreload 2

# Trainer using our package

In [4]:
trainer = Trainer()
fitted_pipeline = trainer.train()

fitted_pipeline

Pipeline(steps=[('features',
                 ColumnTransformer(transformers=[('distance',
                                                  Pipeline(steps=[('distancetransformer',
                                                                   DistanceTransformer()),
                                                                  ('standardscaler',
                                                                   StandardScaler())]),
                                                  ['pickup_latitude',
                                                   'pickup_longitude',
                                                   'dropoff_latitude',
                                                   'dropoff_longitude'])])),
                ('model', RandomForestRegressor(max_depth=1))])

In [5]:
!ls -la model.joblib

-rw-r--r--  1 bruncky  staff  47968 Feb 22 17:51 model.joblib


In [6]:
fitted_pipeline.get_params()

{'memory': None,
 'steps': [('features',
   ColumnTransformer(transformers=[('distance',
                                    Pipeline(steps=[('distancetransformer',
                                                     DistanceTransformer()),
                                                    ('standardscaler',
                                                     StandardScaler())]),
                                    ['pickup_latitude', 'pickup_longitude',
                                     'dropoff_latitude', 'dropoff_longitude'])])),
  ('model', RandomForestRegressor(max_depth=1))],
 'verbose': False,
 'features': ColumnTransformer(transformers=[('distance',
                                  Pipeline(steps=[('distancetransformer',
                                                   DistanceTransformer()),
                                                  ('standardscaler',
                                                   StandardScaler())]),
                                  ['p

# Trainer with params and gridsearch

In [9]:
params = dict(
    random_forest = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ['euclidian', 'manhattan'],
            features__distance__standardscaler__with_mean = [True, False],
            model__max_depth = [1, 2, 3]
        )
    ),
    linear_regression = dict(
        line_count = 1_000,
        hyper_params = dict(
            features__distance__distancetransformer__distance_type = ['euclidian', 'manhattan'],
            features__distance__standardscaler__with_mean = [True, False],
            model__normalize = [True, False]
        )
    )
)


param_trainer = ParamTrainer()
models = param_trainer.train(params)

models

GridSearchCV(cv=5,
             estimator={'linear_regression': {'hyper_params': {'features__distance__distancetransformer__distance_type': ['euclidian',
                                                                                                                          'manhattan'],
                                                               'features__distance__standardscaler__with_mean': [True,
                                                                                                                 False],
                                                               'model__normalize': [True,
                                                                                    False]},
                                              'line_count': 1000},
                        'random_forest': {'hyper_params': {'features__distance__distancetransformer__distance_type': ['euclidian',
                                                                                         

In [10]:
! ls -la *.joblib

-rw-r--r--  1 bruncky  staff  47968 Feb 22 17:51 model.joblib


In [11]:
models['random_forest'].__dict__.keys()

TypeError: 'GridSearchCV' object is not subscriptable

In [None]:
models['random_forest'].best_score_

In [None]:
models['random_forest'].best_params_

In [None]:
models['random_forest'].best_estimator_

In [None]:
models['linear_regression'].best_score_

In [None]:
models['linear_regression'].best_params_

In [None]:
models['linear_regression'].best_estimator_