diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index fd336af9d..990d4cc67 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-latest, macos-13, windows-latest]
         python-version: ['3.8', '3.9', '3.10', '3.11']
         exclude:
           - os: windows-latest
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 14165c3a9..eb1f7be7f 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -932,9 +932,29 @@
     "            output_length = len(model.loss.output_names)\n",
     "            fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n",
     "            col_idx += output_length\n",
-    "        if self.scalers_:            \n",
-    "            indptr = np.append(0, np.full(self.dataset.n_groups, self.h * n_windows).cumsum())\n",
-    "            fcsts = self._scalers_target_inverse_transform(fcsts, indptr)\n",
+    "        # we may have allocated more space than needed\n",
+    "        # each serie can produce at most (serie.size - 1) // self.h CV windows\n",
+    "        effective_sizes = ufp.counts_by_id(fcsts_df, id_col)['counts'].to_numpy()\n",
+    "        needs_trim = effective_sizes.sum() != fcsts.shape[0]\n",
+    "        if self.scalers_ or needs_trim:\n",
+    "            indptr = np.arange(\n",
+    "                0,\n",
+    "                n_windows * self.h * (self.dataset.n_groups + 1),\n",
+    "                n_windows * self.h,\n",
+    "                dtype=np.int32,\n",
+    "            )\n",
+    "            if self.scalers_:\n",
+    "                fcsts = self._scalers_target_inverse_transform(fcsts, indptr)\n",
+    "            if needs_trim:\n",
+    "                # we keep only the effective samples of each serie from the cv results\n",
+    "                trimmed = np.empty_like(\n",
+    "                    fcsts, shape=(effective_sizes.sum(), fcsts.shape[1])\n",
+    "                )\n",
+    "                cv_indptr = np.append(0, effective_sizes).cumsum(dtype=np.int32)\n",
+    "                for i in range(fcsts.shape[1]):\n",
+    "                    ga = GroupedArray(fcsts[:, i], indptr)\n",
+    "                    trimmed[:, i] = ga._tails(cv_indptr)\n",
+    "                fcsts = trimmed\n",
     "\n",
     "        self._fitted = True\n",
     "\n",
@@ -2204,6 +2224,7 @@
     "        Y_hat_df[Y_hat_df_cv.columns],\n",
     "        Y_hat_df_cv,\n",
     "        check_dtype=False,\n",
+    "        atol=1e-5,\n",
     "    )"
    ]
   },
@@ -2218,6 +2239,34 @@
     "test_cross_validation(AirPassengersPanel, AirPassengersStatic, h=12, test_size=12)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03396c73",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| hide\n",
+    "# test cv with series of different sizes\n",
+    "series = pd.DataFrame({\n",
+    "    'unique_id': np.repeat([0, 1], [10, 15]),\n",
+    "    'ds': np.arange(25),\n",
+    "    'y': np.random.rand(25),\n",
+    "})\n",
+    "nf = NeuralForecast(\n",
+    "    freq=1,\n",
+    "    models=[MLP(input_size=5, h=5, max_steps=0, enable_progress_bar=False)]\n",
+    ")\n",
+    "cv_df = nf.cross_validation(df=series, n_windows=3, step_size=5)\n",
+    "expected = pd.DataFrame({\n",
+    "    'unique_id': np.repeat([0, 1], [5, 10]),\n",
+    "    'ds': np.hstack([np.arange(5, 10), np.arange(15, 25)]),\n",
+    "    'cutoff': np.repeat([4, 14, 19], 5)\n",
+    "})\n",
+    "expected = expected.merge(series, on=['unique_id', 'ds'])\n",
+    "pd.testing.assert_frame_equal(expected, cv_df.drop(columns='MLP'))"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/nbs/models.bitcn.ipynb b/nbs/models.bitcn.ipynb
index d08bec764..7010720d1 100644
--- a/nbs/models.bitcn.ipynb
+++ b/nbs/models.bitcn.ipynb
@@ -407,7 +407,7 @@
     "Y_test_df = Y_df[Y_df.ds>'1959-12-31']   # 12 test\n",
     "\n",
     "dataset, *_ = TimeSeriesDataset.from_df(Y_train_df)\n",
-    "model = BiTCN(h=12, input_size=24, max_steps=500, scaler_type='standard')\n",
+    "model = BiTCN(h=12, input_size=24, max_steps=5, scaler_type='standard')\n",
     "model.fit(dataset=dataset)\n",
     "y_hat = model.predict(dataset=dataset)\n",
     "Y_test_df['BiTCN'] = y_hat\n",
@@ -449,7 +449,7 @@
     "            BiTCN(h=12,\n",
     "                input_size=24,\n",
     "                loss=GMM(n_components=7, return_params=True, level=[80,90]),\n",
-    "                max_steps=500,\n",
+    "                max_steps=5,\n",
     "                scaler_type='standard',\n",
     "                futr_exog_list=['y_[lag12]'],\n",
     "                hist_exog_list=None,\n",
diff --git a/nbs/models.hint.ipynb b/nbs/models.hint.ipynb
index e34cf569a..534263749 100644
--- a/nbs/models.hint.ipynb
+++ b/nbs/models.hint.ipynb
@@ -495,7 +495,7 @@
     "    for parent_idx, children_list in parent_children_dict.items():\n",
     "        parent_value = hint_mean[parent_idx]\n",
     "        children_sum = hint_mean[children_list].sum()\n",
-    "        np.testing.assert_allclose(children_sum, parent_value)"
+    "        np.testing.assert_allclose(children_sum, parent_value, rtol=1e-6)"
    ]
   },
   {
diff --git a/neuralforecast/core.py b/neuralforecast/core.py
index b4f4ad1e8..6725a69c4 100644
--- a/neuralforecast/core.py
+++ b/neuralforecast/core.py
@@ -874,11 +874,29 @@ def _no_refit_cross_validation(
             output_length = len(model.loss.output_names)
             fcsts[:, col_idx : (col_idx + output_length)] = model_fcsts
             col_idx += output_length
-        if self.scalers_:
-            indptr = np.append(
-                0, np.full(self.dataset.n_groups, self.h * n_windows).cumsum()
+        # we may have allocated more space than needed
+        # each serie can produce at most (serie.size - 1) // self.h CV windows
+        effective_sizes = ufp.counts_by_id(fcsts_df, id_col)["counts"].to_numpy()
+        needs_trim = effective_sizes.sum() != fcsts.shape[0]
+        if self.scalers_ or needs_trim:
+            indptr = np.arange(
+                0,
+                n_windows * self.h * (self.dataset.n_groups + 1),
+                n_windows * self.h,
+                dtype=np.int32,
             )
-            fcsts = self._scalers_target_inverse_transform(fcsts, indptr)
+            if self.scalers_:
+                fcsts = self._scalers_target_inverse_transform(fcsts, indptr)
+            if needs_trim:
+                # we keep only the effective samples of each serie from the cv results
+                trimmed = np.empty_like(
+                    fcsts, shape=(effective_sizes.sum(), fcsts.shape[1])
+                )
+                cv_indptr = np.append(0, effective_sizes).cumsum(dtype=np.int32)
+                for i in range(fcsts.shape[1]):
+                    ga = GroupedArray(fcsts[:, i], indptr)
+                    trimmed[:, i] = ga._tails(cv_indptr)
+                fcsts = trimmed
 
         self._fitted = True