diff --git a/experiments/quantile_regression.py b/experiments/quantile_regression.py index bfe444f74..0281ffc3f 100644 --- a/experiments/quantile_regression.py +++ b/experiments/quantile_regression.py @@ -10,11 +10,7 @@ quantiles = [0.1, 0.50, 0.9] models = {} for q in quantiles: - params = { - 'objective': 'reg:quantileerror', - 'eval_metric': 'mae', - 'quantile_alpha': q - } + params = {"objective": "reg:quantileerror", "eval_metric": "mae", "quantile_alpha": q} model = xgb.XGBRegressor(**params) model.fit(X_train, y_train) models[q] = model diff --git a/src/sageworks/algorithms/dataframe/quantile_regression.py b/src/sageworks/algorithms/dataframe/quantile_regression.py index 510ef3472..b3ddcbd2f 100644 --- a/src/sageworks/algorithms/dataframe/quantile_regression.py +++ b/src/sageworks/algorithms/dataframe/quantile_regression.py @@ -4,7 +4,6 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.base import RegressorMixin from xgboost import XGBRegressor -from sklearn.model_selection import train_test_split class QuantileRegressor(BaseEstimator, TransformerMixin): @@ -52,9 +51,9 @@ def fit(self, X: pd.DataFrame, y: pd.Series) -> BaseEstimator: # Train models for each of the quantiles for q in self.quantiles: params = { - 'objective': 'reg:quantileerror', - 'eval_metric': 'rmse', - 'quantile_alpha': q, + "objective": "reg:quantileerror", + "eval_metric": "rmse", + "quantile_alpha": q, # 'n_estimators': 400, # More trees # 'max_depth': 1, # Shallow trees # 'learning_rate': 0.1, # Lower learning rate @@ -95,7 +94,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame: def fit_transform(self, X: pd.DataFrame, y: pd.Series, **fit_params) -> pd.DataFrame: """ - Fits the model and transforms the input DataFrame by adding 'quantile_05', 'quantile_50', and 'quantile_95' columns. + Fits the model and transforms the input DataFrame by adding 'quantile_05', 'quantile_50', + and 'quantile_95' columns. Args: X (pd.DataFrame): The input features. @@ -136,7 +136,15 @@ def unit_test(): confidence_df["interval"] = confidence_df["quantile_95"] - confidence_df["quantile_05"] # Columns of Interest - dropdown_columns = ["quantile_05", "quantile_25", "quantile_50", "quantile_75", "quantile_95", "interval", target_column] + dropdown_columns = [ + "quantile_05", + "quantile_25", + "quantile_50", + "quantile_75", + "quantile_95", + "interval", + target_column, + ] # Run the Unit Test on the Plugin plugin_test = PluginUnitTest( @@ -186,7 +194,16 @@ def integration_test(): confidence_df["confidence"] = 1.0 - (np.clip(confidence_df["interval"], 0, 4) * 0.25) # Columns of Interest - dropdown_columns = ["quantile_05", "quantile_25", "quantile_50", "quantile_75", "quantile_95", "interval", "confidence", target_column] + dropdown_columns = [ + "quantile_05", + "quantile_25", + "quantile_50", + "quantile_75", + "quantile_95", + "interval", + "confidence", + target_column, + ] # Run the Unit Test on the Plugin plugin_test = PluginUnitTest( diff --git a/src/sageworks/utils/test_data_generator.py b/src/sageworks/utils/test_data_generator.py index 4278f47d2..fd60c4b6c 100644 --- a/src/sageworks/utils/test_data_generator.py +++ b/src/sageworks/utils/test_data_generator.py @@ -56,15 +56,16 @@ def regression_with_varying_noise(n_samples: int = 1000, n_features: int = 4) -> # Generate a simple regression dataset with one feature and one target # Both the feature and the target vary from 0 to 100, the target is the feature plus 10% noise feature = np.float32(range(0, 1000)) - target = [x+1000.0 + x*np.random.uniform(-.1, .1) for x in feature] + target = [x + 1000.0 + x * np.random.uniform(-0.1, 0.1) for x in feature] # Create a DataFrame - df = pd.DataFrame({'feature': feature, 'target': target}) + df = pd.DataFrame({"feature": feature, "target": target}) return df """ # Generate basic synthetic data - X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_features, noise=0.0, random_state=42) + X, y = make_regression(n_samples=n_samples, n_features=n_features, n_informative=n_features, + noise=0.0, random_state=42) # Normalize target values to the range [0, 100] y = (y - y.min()) / (y.max() - y.min()) * 100