Skip to content

Commit

Permalink
modularizes, adds verbosity, and switches back to rmse scoring
Browse files Browse the repository at this point in the history
  • Loading branch information
ClimbsRocks committed Aug 17, 2016
1 parent 98b0d4a commit e0f451d
Showing 1 changed file with 23 additions and 9 deletions.
32 changes: 23 additions & 9 deletions auto_ml/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def train(self, raw_training_data, user_input_func=None, optimize_entire_pipelin
scoring = make_scorer(brier_score_loss, greater_is_better=True)

else:
scoring = None
# scoring = 'mean_squared_error'
# scoring = utils.rmse_scoring
# scoring = None
# # scoring = 'mean_squared_error'
scoring = utils.rmse_scoring

# We will be performing GridSearchCV every time, even if the space we are searching over is null
gs = GridSearchCV(
Expand Down Expand Up @@ -144,9 +144,7 @@ def _get_estimator_names(self, ml_for_analytics=False):
else:
raise('TypeError: type_of_algo must be either "classifier" or "regressor".')


def ml_for_analytics(self, raw_training_data, user_input_func=None, optimize_entire_pipeline=False, optimize_final_model=False, write_gs_param_results_to_file=True, perform_feature_selection=True):

def _prepare_for_training(self, raw_training_data, write_gs_param_results_to_file=True):
if write_gs_param_results_to_file:
gs_param_file_name = 'most_recent_pipeline_grid_search_result.csv'
try:
Expand Down Expand Up @@ -190,20 +188,33 @@ def ml_for_analytics(self, raw_training_data, user_input_func=None, optimize_ent
indices_to_delete = set(indices_to_delete)
X = [row for idx, row in enumerate(X) if idx not in indices_to_delete]

return X, y, gs_param_file_name

def ml_for_analytics(self, raw_training_data, user_input_func=None, optimize_entire_pipeline=False, optimize_final_model=False, write_gs_param_results_to_file=True, perform_feature_selection=True, verbose=True):

X, y, gs_param_file_name = self._prepare_for_training(raw_training_data, write_gs_param_results_to_file)
if verbose:
print('Successfully performed basic preparations and y-value cleaning')

ppl = self._construct_pipeline(user_input_func, optimize_final_model=optimize_final_model, ml_for_analytics=True, perform_feature_selection=perform_feature_selection)

if verbose:
print('Successfully constructed the pipeline')

estimator_names = self._get_estimator_names(ml_for_analytics=True)

if self.type_of_algo == 'classifier':
# scoring = 'roc_auc'
scoring = make_scorer(brier_score_loss, greater_is_better=True)
self._scorer = scoring
else:
scoring = None
# scoring = 'mean_squared_error'
# scoring = utils.rmse_scoring
# scoring = None
# # scoring = 'mean_squared_error'
scoring = utils.rmse_scoring
self._scorer = scoring

if verbose:
print('Created estimator_names and scoring')

for model_name in estimator_names:

Expand All @@ -225,6 +236,9 @@ def ml_for_analytics(self, raw_training_data, user_input_func=None, optimize_ent
scoring=scoring
)

if verbose:
print('About to fit the GridSearchCV on the pipeline for the model ' + model_name)

gs.fit(X, y)
self.trained_pipeline = gs.best_estimator_

Expand Down

0 comments on commit e0f451d

Please sign in to comment.