Skip to content

Commit

Permalink
add details for docs
Browse files Browse the repository at this point in the history
add examples for Optimiser
add details for df format (issue "dict target #34")
  • Loading branch information
AxeldeRomblay committed Aug 2, 2017
1 parent e2d47dc commit c4fd457
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 19 deletions.
49 changes: 41 additions & 8 deletions python-package/mlbox/optimisation/optimiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,34 @@ def evaluate(self, params, df):
- The values are those of the parameters. Ex: 4 for key = "est__max_depth", ...
df : dict, default = None
Train dictionary. Must contain keys "train" and "target" with
the train dataset (pandas.DataFrame) and the associated
target (pandas.Series) with dtype='float' for a regression
or dtype='int' for a classification)
Dataset dictionary. Must contain keys and values:
- "train": pandas DataFrame for the train set.
- "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
dtype='int' for a classification). Indexes should match the train set.
Returns
-------
float.
The score. The higher the better.
Positive for a score and negative for a loss.
Examples
--------
>>> from mlbox.optimisation import *
>>> from sklearn.datasets import load_boston
>>> dataset = load_boston()
>>>
>>> opt = Optimiser()
>>> params = {
>>> "ne__numerical_strategy" : 0,
>>> "ce__strategy" : "label_encoding",
>>> "fs__threshold" : 0.1,
>>> "stck__base_estimators" : [Regressor(strategy="RandomForest"), Regressor(strategy="ExtraTrees")],
>>> "est__strategy" : "Linear"
>>> }
>>> df = {"train" : pd.DataFrame(dataset.data), "target" : pd.Series(dataset.target)}
>>> opt.evaluate(params, df)
"""

ne = NA_encoder()
Expand Down Expand Up @@ -466,10 +484,11 @@ def optimise(self, space, df, max_evals=40):
- list : a list of values to be tested if strategy="choice". Else, list = [value_min, value_max].
df : dict, default = None
Train dictionary. Must contain keys "train" and "target" with the
train dataset (pandas.DataFrame) and the associated
target (pandas.Series) with dtype='float' for a regression
or dtype='int' for a classification)
Dataset dictionary. Must contain keys and values:
- "train": pandas DataFrame for the train set.
- "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
dtype='int' for a classification). Indexes should match the train set.
max_evals : int, default = 40.
Number of iterations.
Expand All @@ -479,6 +498,20 @@ def optimise(self, space, df, max_evals=40):
-------
dict.
The optimal hyper-parameter dictionary.
Examples
--------
>>> from mlbox.optimisation import *
>>> from sklearn.datasets import load_boston
>>> dataset = load_boston()
>>>
>>> opt = Optimiser()
>>> space = {
>>> 'fs__strategy':{"search":"choice","space":["variance","rf_feature_importance"]},
>>> 'est__colsample_bytree':{"search":"uniform", "space":[0.3,0.7]}
>>> }
>>> df = {"train" : pd.DataFrame(dataset.data), "target" : pd.Series(dataset.target)}
>>> best = opt.optimise(space, df, 3)
"""

hyperopt_objective = lambda params: -self.evaluate(params, df)
Expand Down
22 changes: 12 additions & 10 deletions python-package/mlbox/prediction/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@ def fit_predict(self, params, df):
Also outputs feature importances and the submission file
(.png and .csv format).
Parameters
----------
Expand All @@ -191,12 +190,13 @@ def fit_predict(self, params, df):
- The values are those of the parameters. Ex: 4 for key = "est__max_depth", ...
df : dict, default = None
Dataset dictionary. Must contain keys "train", "test"
and "target" with the train dataset (pandas.DataFrame),
the test dataset (pandas.DataFrame) and the associated
target (pandas Serie with dtype='float' for a regression or
dtype='int' for a classification)
Dataset dictionary. Must contain keys and values:
- "train": pandas DataFrame for the train set.
- "test" : pandas DataFrame for the test set.
- "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
dtype='int' for a classification). Indexes should match the train set.
Returns
-------
object
Expand Down Expand Up @@ -410,15 +410,17 @@ def fit_predict(self, params, df):

if (df['target'].dtype == 'int'):

enc_name = "target_encoder.obj"

try:

fhand = open(self.to_path + "/target_encoder.obj", 'rb')
fhand = open(self.to_path + "/" + enc_name, 'rb')
enc = pickle.load(fhand)
fhand.close()

except:
raise ValueError("Unable to load target encoder"
" from directory : " + self.to_path)
raise ValueError("Unable to load '" + enc_name +
"' from directory : " + self.to_path)

try:
if(self.verbose):
Expand Down
2 changes: 1 addition & 1 deletion python-package/mlbox/preprocessing/drift_thresholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fit_transform(self, df):
- 'train' : transformed pandas dataframe for train dataset
- 'test' : transformed pandas dataframe for test dataset
- 'target' : pandas serie for the target
- 'target' : pandas serie for the target on train set
"""

######################################################
Expand Down

0 comments on commit c4fd457

Please sign in to comment.