add details for docs

add examples for Optimiser add details for df format (issue "dict target #34")
AxeldeRomblay · Aug 2, 2017 · c4fd457 · c4fd457
1 parent e2d47dc
commit c4fd457
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 19 deletions.
diff --git a/python-package/mlbox/optimisation/optimiser.py b/python-package/mlbox/optimisation/optimiser.py
@@ -126,16 +126,34 @@ def evaluate(self, params, df):
             - The values are those of the parameters. Ex: 4 for key = "est__max_depth", ...
 
         df : dict, default = None
-            Train dictionary. Must contain keys "train" and "target" with
-            the train dataset (pandas.DataFrame) and the associated
-            target (pandas.Series) with dtype='float' for a regression
-            or dtype='int' for a classification)
+            Dataset dictionary. Must contain keys and values:
+
+            - "train": pandas DataFrame for the train set.
+            - "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
+            dtype='int' for a classification). Indexes should match the train set.
 
         Returns
         -------
         float.
             The score. The higher the better.
             Positive for a score and negative for a loss.
+
+        Examples
+        --------
+        >>> from mlbox.optimisation import *
+        >>> from sklearn.datasets import load_boston
+        >>> dataset = load_boston()
+        >>>
+        >>> opt = Optimiser()
+        >>> params = {
+        >>>     "ne__numerical_strategy" : 0,
+        >>>     "ce__strategy" : "label_encoding",
+        >>>     "fs__threshold" : 0.1,
+        >>>     "stck__base_estimators" : [Regressor(strategy="RandomForest"), Regressor(strategy="ExtraTrees")],
+        >>>     "est__strategy" : "Linear"
+        >>> }
+        >>> df = {"train" : pd.DataFrame(dataset.data), "target" : pd.Series(dataset.target)}
+        >>> opt.evaluate(params, df)
         """
 
         ne = NA_encoder()
@@ -466,10 +484,11 @@ def optimise(self, space, df, max_evals=40):
                 - list : a list of values to be tested if strategy="choice". Else, list = [value_min, value_max].
 
         df : dict, default = None
-            Train dictionary. Must contain keys "train" and "target" with the
-            train dataset (pandas.DataFrame) and the associated
-            target (pandas.Series) with dtype='float' for a regression
-            or dtype='int' for a classification)
+            Dataset dictionary. Must contain keys and values:
+
+            - "train": pandas DataFrame for the train set.
+            - "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
+            dtype='int' for a classification). Indexes should match the train set.
 
         max_evals : int, default = 40.
             Number of iterations.
@@ -479,6 +498,20 @@ def optimise(self, space, df, max_evals=40):
         -------
         dict.
             The optimal hyper-parameter dictionary.
+
+        Examples
+        --------
+        >>> from mlbox.optimisation import *
+        >>> from sklearn.datasets import load_boston
+        >>> dataset = load_boston()
+        >>>
+        >>> opt = Optimiser()
+        >>> space = {
+        >>>     'fs__strategy':{"search":"choice","space":["variance","rf_feature_importance"]},
+        >>>     'est__colsample_bytree':{"search":"uniform", "space":[0.3,0.7]}
+        >>> }
+        >>> df = {"train" : pd.DataFrame(dataset.data), "target" : pd.Series(dataset.target)}
+        >>> best = opt.optimise(space, df, 3)
         """
 
         hyperopt_objective = lambda params: -self.evaluate(params, df)

diff --git a/python-package/mlbox/prediction/predictor.py b/python-package/mlbox/prediction/predictor.py
@@ -171,7 +171,6 @@ def fit_predict(self, params, df):
 
         Also outputs feature importances and the submission file
         (.png and .csv format).
-
         
         Parameters
         ----------
@@ -191,12 +190,13 @@ def fit_predict(self, params, df):
             - The values are those of the parameters. Ex: 4 for key = "est__max_depth", ...
 
         df : dict, default = None
-            Dataset dictionary. Must contain keys "train", "test"
-            and "target" with the train dataset (pandas.DataFrame),
-            the test dataset (pandas.DataFrame) and the associated
-            target (pandas Serie with dtype='float' for a regression or
-            dtype='int' for a classification)
-        
+            Dataset dictionary. Must contain keys and values:
+
+            - "train": pandas DataFrame for the train set.
+            - "test" : pandas DataFrame for the test set.
+            - "target" : encoded pandas Serie for the target on train set (with dtype='float' for a regression or
+            dtype='int' for a classification). Indexes should match the train set.
+
         Returns
         -------
         object
@@ -410,15 +410,17 @@ def fit_predict(self, params, df):
 
                 if (df['target'].dtype == 'int'):
 
+                    enc_name = "target_encoder.obj"
+
                     try:
 
-                        fhand = open(self.to_path + "/target_encoder.obj", 'rb')
+                        fhand = open(self.to_path + "/" + enc_name, 'rb')
                         enc = pickle.load(fhand)
                         fhand.close()
 
                     except:
-                        raise ValueError("Unable to load target encoder"
-                                         " from directory : " + self.to_path)
+                        raise ValueError("Unable to load '" + enc_name +
+                                         "' from directory : " + self.to_path)
 
                     try:
                         if(self.verbose):

diff --git a/python-package/mlbox/preprocessing/drift_thresholder.py b/python-package/mlbox/preprocessing/drift_thresholder.py
@@ -74,7 +74,7 @@ def fit_transform(self, df):
 
             - 'train' : transformed pandas dataframe for train dataset
             - 'test' : transformed pandas dataframe for test dataset
-            - 'target' : pandas serie for the target
+            - 'target' : pandas serie for the target on train set
         """
 
         ######################################################