Skip to content

Commit

Permalink
fixed prophet horizon values and rearranged example notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
ngupta23 committed Jul 27, 2020
1 parent e22803b commit 8097292
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 47 deletions.
2 changes: 1 addition & 1 deletion auto_ts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#Defining AUTO_TIMESERIES here
##########################################################
module_type = 'Running' if __name__ == "__main__" else 'Imported'
version_number = '0.0.23'
version_number = '0.0.23.b2'
print(f"Running Auto Timeseries version: {version_number}...")

# Call by using:
Expand Down
77 changes: 48 additions & 29 deletions auto_ts/models/build_prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,51 +100,70 @@ def fit(self, ts_df: pd.DataFrame, target_col: str, cv: Optional[int], time_col:
for name in self.original_preds:
self.model.add_regressor(name)


self.model.fit(dft)

num_obs = dft.shape[0]
NFOLDS = self.get_num_folds_from_cv(cv)

# print(f"Min Time = {dft['ds'].min()}")
# print(f"Max Time = {dft['ds'].max()}")
if self.verbose >= 2:
print(f"NumObs: {num_obs}")
print(f"NFOLDS: {NFOLDS}")

total_days = (dft['ds'].max() - dft['ds'].min()).days
horizon_days = (dft['ds'].max() - dft.iloc[-self.forecast_period]['ds']).days
if self.verbose >= 2:
print("Variables used for calculating initial, horizon, period...")
print(f"Forcast Period: {self.forecast_period}")
print(f"Max Date: {dft['ds'].max()}")
print(f"Horizon Start: {dft.iloc[-self.forecast_period]['ds']}")

#horizon_days = (dft['ds'].max() - dft.iloc[-forecast_start]['ds']).days
horizon_days = (dft['ds'].max() - dft.iloc[-(self.forecast_period+1)]['ds']).days

initial_days = total_days - NFOLDS * horizon_days
period_days = horizon_days
# print(f"Total Days: {total_days}")
# print(f"Horizon Days: {horizon_days}")
# print(f"Initial Days: {initial_days}")
# print(f"Period Days: {period_days}")

if self.verbose >= 2:
print("Unadjusted Prophet CV Diagnostics:")
print(f"Total Days: {total_days}")
print(f"Initial Days: {initial_days}")
print(f"Period Days: {period_days}")
print(f"Horizon Days: {horizon_days}")

OFFSET = 5 # 5 days # adjusting some days to take into account uneven months.
OFFSET = 0 # 5 days # adjusting some days to take into account uneven months.
initial = str(initial_days-OFFSET) + " D"
period = str(period_days) + " D"
horizon = str(horizon_days+OFFSET) + " D"

# print("Prophet CV Diagnostics:")
# print(f"NumObs: {num_obs}")
# print(f"NFOLDS: {NFOLDS}")
# print(f"initial: {initial}")
# print(f"period: {period}")
# print(f"horizon: {horizon}")
if self.verbose >= 2:
print(f"OFFSET: {OFFSET}")
print(f"initial: {initial}")
print(f"period: {period}")
print(f"horizon: {horizon}")

# First Fold -->
# Train Set: 0:initial
# Test Set: initial:(initial+horizon)
# Second Fold -->
# Train Set: (period):(initial+period)
# Test Set: (initial+period):(initial+horizon+ period)
# Format: '850 D'
df_cv = cross_validation(self.model, initial=initial, period=period, horizon=horizon)

if self.verbose >= 2:
print("Prophet CV DataFrame")
print(df_cv)

num_obs_folds = df_cv.groupby('cutoff')['ds'].count()

df_cv = cross_validation(
self.model,
initial=initial, # '850 D',
period=period, # '100 D',
horizon=horizon #'300 D'
)

# first: train: 0 to 64 Test 65 to 65+52
# second: train: 0+26 to 65+26 Test 65+26 to 65+26+52
# next: train: 0+26+26. to 65+26+26. Test 65+26+26.. to 65+26+26+52

# print("Prophet CV DataFrame")
# print(df_cv)
# https://stackoverflow.com/questions/54405704/check-if-all-values-in-dataframe-column-are-the-same
a = num_obs_folds.to_numpy()
all_equal = (a[0] == a).all()

# print("Prophet Num Obs Per fold")
# print(df_cv.groupby('cutoff')['ds'].count())
if not all_equal:
print("WARNING: All folds did not have the same number of observations in the validation sets.")
print("Num Test Obs Per fold")
print(num_obs_folds)

rmse_folds = []
norm_rmse_folds = []
Expand Down
23 changes: 6 additions & 17 deletions auto_ts/test/test_auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def setUp(self):

self.rmse_gold_prophet_univar_cv_fold1 = 86.34827037
self.rmse_gold_prophet_univar_cv_fold2 = 56.5751 # Without CV gets this result


## External Test Set results
results = [
Expand Down Expand Up @@ -92,19 +92,19 @@ def setUp(self):

# Internal (to AutoML) validation set results
self.forecast_gold_prophet_multivar_internal_val_cv_fold1 = np.array([
408.247213, 496.038917, 556.120951, 604.455571,
584.852771, 653.133907, 648.77597 , 487.54389
502.111972, 569.181958, 578.128706, 576.069791,
663.258686, 677.851419, 750.972617, 781.269791
])

self.forecast_gold_prophet_multivar_internal_val_cv_fold2 = np.array([
618.244315, 555.784628, 524.396122, 611.513751,
584.936717, 605.940656, 702.652641, 736.639273
])

self.rmse_gold_prophet_multivar_cv_fold1 = 91.15254417
self.rmse_gold_prophet_multivar_cv_fold1 = 48.70419901
self.rmse_gold_prophet_multivar_cv_fold2 = 63.24631835 # Without CV gets this result


## External Test Set results
results = [
747.964093, 736.512241, 814.840792, 825.152970,
Expand Down Expand Up @@ -462,7 +462,6 @@ def setUp(self):
self.forecast_gold_ml_multivar_external_test_10.name = 'mean'


# @unittest.skip
def test_auto_ts_multivar_ns_SARIMAX(self):
"""
test to check functionality of the auto_ts function (multivariate with non seasonal SARIMAX)
Expand Down Expand Up @@ -816,7 +815,6 @@ def test_auto_ts_multivar_ns_SARIMAX(self):
round(ml_dict.get('ML').get('rmse')[0], 6), self.rmse_gold_ml_multivar,
"(Multivar Test) ML RMSE does not match up with expected values.")

# @unittest.skip
def test_auto_ts_univar_ns_SARIMAX(self):
"""
test to check functionality of the auto_ts function (univariate models with non seasonal SARIMAX)
Expand Down Expand Up @@ -1125,7 +1123,6 @@ def test_auto_ts_univar_ns_SARIMAX(self):
"(Univar Test) ML RMSE does not match up with expected values."
)

# @unittest.skip
def test_auto_ts_multivar_seasonal_SARIMAX(self):
"""
test to check functionality of the auto_ts function (multivariate with seasonal SARIMAX)
Expand Down Expand Up @@ -1241,7 +1238,6 @@ def test_auto_ts_multivar_seasonal_SARIMAX(self):
round(ml_dict.get('SARIMAX').get('rmse')[0], 6), self.rmse_gold_sarimax_multivar_s12,
"(Multivar Test) SARIMAX RMSE does not match up with expected values.")

# @unittest.skip
def test_auto_ts_multivar_seasonal_SARIMAX_withCV(self):
"""
test to check functionality of the auto_ts function (multivariate with seasonal SARIMAX)
Expand Down Expand Up @@ -1368,7 +1364,6 @@ def test_auto_ts_multivar_seasonal_SARIMAX_withCV(self):
"(Multivar Test) SARIMAX RMSE does not match up with expected values --> Fold 2.")


# @unittest.skip
def test_subset_of_models(self):
"""
test to check functionality of the training with only a subset of models
Expand Down Expand Up @@ -1435,7 +1430,6 @@ def test_subset_of_models(self):
sep=self.sep)
self.assertIsNone(status)

# @unittest.skip
def test_passing_list_instead_of_str(self):
"""
TODO: Add docstring
Expand Down Expand Up @@ -1464,7 +1458,6 @@ def test_passing_list_instead_of_str(self):

np.testing.assert_array_equal(automl_model.get_leaderboard()['name'].values, leaderboard_models)

# @unittest.skip
def test_cv_retreival_plotting(self):
"""
Tests CV Scores retreival and plotting
Expand Down Expand Up @@ -1502,7 +1495,6 @@ def test_cv_retreival_plotting(self):
automl_model.plot_cv_scores()


# @unittest.skip
def test_prophet_multivar_standalone_noCV(self):
"""
test to check functionality Prophet with CV
Expand Down Expand Up @@ -1570,7 +1562,6 @@ def test_prophet_multivar_standalone_noCV(self):
assert_series_equal(test_predictions.round(6), self.forecast_gold_prophet_multivar_external_test_10_cv)


# @unittest.skip
def test_prophet_multivar_standalone_withCV(self):
"""
test to check functionality Prophet with CV
Expand Down Expand Up @@ -1643,7 +1634,6 @@ def test_prophet_multivar_standalone_withCV(self):
model="Prophet")
assert_series_equal(test_predictions.round(6), self.forecast_gold_prophet_multivar_external_test_10_cv)

# @unittest.skip
def test_ml_standalone(self):
"""
Testing ML Standalone
Expand All @@ -1669,7 +1659,6 @@ def test_ml_standalone(self):
sep=self.sep)
print(automl_model.get_leaderboard())

# @unittest.skip
def test_ml_standalone_withCV(self):
"""
test to check functionality ML with CV
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 8097292

Please sign in to comment.