fixed prophet horizon values and rearranged example notebooks

AutoViML · Jul 27, 2020 · 8097292 · 8097292
1 parent e22803b
commit 8097292
Show file tree

Hide file tree

Showing 6 changed files with 55 additions and 47 deletions.
diff --git a/auto_ts/__init__.py b/auto_ts/__init__.py
@@ -2,7 +2,7 @@
 #Defining AUTO_TIMESERIES here
 ##########################################################
 module_type = 'Running' if  __name__ == "__main__" else 'Imported'
-version_number = '0.0.23'
+version_number = '0.0.23.b2'
 print(f"Running Auto Timeseries version: {version_number}...")
 
 # Call by using:

diff --git a/auto_ts/models/build_prophet.py b/auto_ts/models/build_prophet.py
@@ -100,51 +100,70 @@ def fit(self, ts_df: pd.DataFrame, target_col: str, cv: Optional[int], time_col:
             for name in self.original_preds:
                 self.model.add_regressor(name)
 
+
         self.model.fit(dft)
 
         num_obs = dft.shape[0]
         NFOLDS = self.get_num_folds_from_cv(cv)
 
-        # print(f"Min Time = {dft['ds'].min()}")
-        # print(f"Max Time = {dft['ds'].max()}")
+        if self.verbose >= 2:
+            print(f"NumObs: {num_obs}")
+            print(f"NFOLDS: {NFOLDS}")
+
         total_days = (dft['ds'].max() - dft['ds'].min()).days
-        horizon_days = (dft['ds'].max() - dft.iloc[-self.forecast_period]['ds']).days 
+        if self.verbose >= 2:
+            print("Variables used for calculating initial, horizon, period...")
+            print(f"Forcast Period: {self.forecast_period}")
+            print(f"Max Date: {dft['ds'].max()}")
+            print(f"Horizon Start: {dft.iloc[-self.forecast_period]['ds']}")
+
+        #horizon_days = (dft['ds'].max() - dft.iloc[-forecast_start]['ds']).days 
+        horizon_days = (dft['ds'].max() - dft.iloc[-(self.forecast_period+1)]['ds']).days 
+
         initial_days = total_days - NFOLDS * horizon_days
         period_days = horizon_days
-        # print(f"Total Days: {total_days}")
-        # print(f"Horizon Days: {horizon_days}")
-        # print(f"Initial Days: {initial_days}")
-        # print(f"Period Days: {period_days}")
+
+        if self.verbose >= 2:
+            print("Unadjusted Prophet CV Diagnostics:")
+            print(f"Total Days: {total_days}")
+            print(f"Initial Days: {initial_days}")
+            print(f"Period Days: {period_days}")
+            print(f"Horizon Days: {horizon_days}")
 
-        OFFSET = 5  # 5 days  # adjusting some days to take into account uneven months.
+        OFFSET = 0  # 5 days  # adjusting some days to take into account uneven months.
         initial = str(initial_days-OFFSET) + " D"  
         period = str(period_days) + " D" 
         horizon = str(horizon_days+OFFSET) + " D" 
 
-        # print("Prophet CV Diagnostics:")
-        # print(f"NumObs: {num_obs}")
-        # print(f"NFOLDS: {NFOLDS}")
-        # print(f"initial: {initial}")
-        # print(f"period: {period}")
-        # print(f"horizon: {horizon}")
+        if self.verbose >= 2:
+            print(f"OFFSET: {OFFSET}")
+            print(f"initial: {initial}")
+            print(f"period: {period}")
+            print(f"horizon: {horizon}")
+
+        # First  Fold --> 
+        #   Train Set: 0:initial
+        #   Test Set: initial:(initial+horizon)
+        # Second Fold --> 
+        #   Train Set: (period):(initial+period)
+        #   Test Set: (initial+period):(initial+horizon+ period)
+        # Format: '850 D'
+        df_cv = cross_validation(self.model, initial=initial, period=period, horizon=horizon) 
+
+        if self.verbose >= 2:
+            print("Prophet CV DataFrame")
+            print(df_cv)
 
+        num_obs_folds = df_cv.groupby('cutoff')['ds'].count()
 
-        df_cv = cross_validation(
-            self.model,
-            initial=initial,   # '850 D', 
-            period=period,   # '100 D', 
-            horizon=horizon   #'300 D' 
-        ) 
-
-        # first: train: 0 to 64 Test 65 to 65+52
-        # second: train: 0+26 to 65+26 Test 65+26 to 65+26+52
-        # next: train: 0+26+26. to 65+26+26. Test 65+26+26.. to 65+26+26+52
-
-        # print("Prophet CV DataFrame")
-        # print(df_cv)
+        # https://stackoverflow.com/questions/54405704/check-if-all-values-in-dataframe-column-are-the-same
+        a = num_obs_folds.to_numpy() 
+        all_equal = (a[0] == a).all()
 
-        # print("Prophet Num Obs Per fold")
-        # print(df_cv.groupby('cutoff')['ds'].count())
+        if not all_equal:
+            print("WARNING: All folds did not have the same number of observations in the validation sets.")
+            print("Num Test Obs Per fold")
+            print(num_obs_folds)
 
         rmse_folds = []
         norm_rmse_folds = []

diff --git a/auto_ts/test/test_auto_ts.py b/auto_ts/test/test_auto_ts.py
@@ -61,7 +61,7 @@ def setUp(self):
 
         self.rmse_gold_prophet_univar_cv_fold1 = 86.34827037
         self.rmse_gold_prophet_univar_cv_fold2 = 56.5751 # Without CV gets this result
-
+        
 
         ## External Test Set results 
         results = [
@@ -92,19 +92,19 @@ def setUp(self):
 
         # Internal (to AutoML) validation set results
         self.forecast_gold_prophet_multivar_internal_val_cv_fold1 = np.array([            
-            408.247213, 496.038917, 556.120951, 604.455571,
-            584.852771, 653.133907, 648.77597 , 487.54389
+            502.111972, 569.181958, 578.128706, 576.069791,
+            663.258686, 677.851419, 750.972617, 781.269791
         ])
 
         self.forecast_gold_prophet_multivar_internal_val_cv_fold2 = np.array([            
             618.244315, 555.784628, 524.396122, 611.513751,
             584.936717, 605.940656, 702.652641, 736.639273
         ])
 
-        self.rmse_gold_prophet_multivar_cv_fold1 = 91.15254417  
+        self.rmse_gold_prophet_multivar_cv_fold1 = 48.70419901 
         self.rmse_gold_prophet_multivar_cv_fold2 = 63.24631835 # Without CV gets this result 
-
-
+        
+        
         ## External Test Set results 
         results = [
             747.964093, 736.512241, 814.840792, 825.152970,
@@ -462,7 +462,6 @@ def setUp(self):
         self.forecast_gold_ml_multivar_external_test_10.name = 'mean'
 
 
-    # @unittest.skip    
     def test_auto_ts_multivar_ns_SARIMAX(self):
         """
         test to check functionality of the auto_ts function (multivariate with non seasonal SARIMAX)
@@ -816,7 +815,6 @@ def test_auto_ts_multivar_ns_SARIMAX(self):
             round(ml_dict.get('ML').get('rmse')[0], 6), self.rmse_gold_ml_multivar,
             "(Multivar Test) ML RMSE does not match up with expected values.")
 
-    # @unittest.skip    
     def test_auto_ts_univar_ns_SARIMAX(self):
         """
         test to check functionality of the auto_ts function (univariate models with non seasonal SARIMAX)
@@ -1125,7 +1123,6 @@ def test_auto_ts_univar_ns_SARIMAX(self):
             "(Univar Test) ML RMSE does not match up with expected values."
         )
 
-    # @unittest.skip 
     def test_auto_ts_multivar_seasonal_SARIMAX(self):
         """
         test to check functionality of the auto_ts function (multivariate with seasonal SARIMAX)
@@ -1241,7 +1238,6 @@ def test_auto_ts_multivar_seasonal_SARIMAX(self):
             round(ml_dict.get('SARIMAX').get('rmse')[0], 6), self.rmse_gold_sarimax_multivar_s12,
             "(Multivar Test) SARIMAX RMSE does not match up with expected values.")
 
-    # @unittest.skip  
     def test_auto_ts_multivar_seasonal_SARIMAX_withCV(self):
         """
         test to check functionality of the auto_ts function (multivariate with seasonal SARIMAX)
@@ -1368,7 +1364,6 @@ def test_auto_ts_multivar_seasonal_SARIMAX_withCV(self):
             "(Multivar Test) SARIMAX RMSE does not match up with expected values --> Fold 2.")
 
 
-    # @unittest.skip    
     def test_subset_of_models(self):
         """
         test to check functionality of the training with only a subset of models
@@ -1435,7 +1430,6 @@ def test_subset_of_models(self):
             sep=self.sep)
         self.assertIsNone(status)
 
-    # @unittest.skip  
     def test_passing_list_instead_of_str(self):
         """
         TODO: Add docstring
@@ -1464,7 +1458,6 @@ def test_passing_list_instead_of_str(self):
 
         np.testing.assert_array_equal(automl_model.get_leaderboard()['name'].values, leaderboard_models)
 
-    # @unittest.skip  
     def test_cv_retreival_plotting(self):
         """
         Tests CV Scores retreival and plotting
@@ -1502,7 +1495,6 @@ def test_cv_retreival_plotting(self):
         automl_model.plot_cv_scores()
 
 
-    # @unittest.skip
     def test_prophet_multivar_standalone_noCV(self):
         """
         test to check functionality Prophet with CV
@@ -1570,7 +1562,6 @@ def test_prophet_multivar_standalone_noCV(self):
         assert_series_equal(test_predictions.round(6), self.forecast_gold_prophet_multivar_external_test_10_cv)        
 
 
-    # @unittest.skip 
     def test_prophet_multivar_standalone_withCV(self):
         """
         test to check functionality Prophet with CV
@@ -1643,7 +1634,6 @@ def test_prophet_multivar_standalone_withCV(self):
             model="Prophet")
         assert_series_equal(test_predictions.round(6), self.forecast_gold_prophet_multivar_external_test_10_cv)        
 
-    # @unittest.skip  
     def test_ml_standalone(self):
         """
         Testing ML Standalone
@@ -1669,7 +1659,6 @@ def test_ml_standalone(self):
             sep=self.sep) 
         print(automl_model.get_leaderboard())
 
-    # @unittest.skip  
     def test_ml_standalone_withCV(self):
         """
         test to check functionality ML with CV

diff --git a/Auto_Timeseries_Test.ipynb → example_notebooks/Auto_Timeseries_Test.ipynb b/Auto_Timeseries_Test.ipynb → example_notebooks/Auto_Timeseries_Test.ipynb
diff --git a/autots_multivariate_example.ipynb → ...tebooks/autots_multivariate_example.ipynb b/autots_multivariate_example.ipynb → ...tebooks/autots_multivariate_example.ipynb
diff --git a/autots_univariate_example.ipynb → ...notebooks/autots_univariate_example.ipynb b/autots_univariate_example.ipynb → ...notebooks/autots_univariate_example.ipynb