AdityaLab · kage08 · Aug 6, 2025 · Apr 29, 2025 · Aug 6, 2025
diff --git a/example/chronos.ipynb b/example/chronos.ipynb
diff --git a/example/chronosbolt.ipynb b/example/chronosbolt.ipynb
diff --git a/example/moment_forecasting.ipynb b/example/moment_forecasting.ipynb
@@ -11,15 +11,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/gluonts/json.py:102: UserWarning: Using `json`-module for json-handling. Consider installing one of `orjson`, `ujson` to speed up serialization and deserialization.\n",
+      "  warnings.warn(\n",
+      "INFO:p-2597098:t-140082893653824:moment.py:_validate_inputs:Setting d_model to 1024\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Loading MOMENT model from AutonLab/MOMENT-1-large\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:p-2597098:t-140082893653824:moment.py:_get_transformer_backbone:Initializing pre-trained transformer from google/flan-t5-large.\n",
+      "INFO:p-2597098:t-140082893653824:moment.py:_get_transformer_backbone:Enabling gradient checkpointing.\n"
+     ]
     }
    ],
    "source": [
@@ -57,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -72,38 +89,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Epoch 0: Train loss: 0.572\n",
-      "Epoch 1: Train loss: 0.502\n",
-      "Epoch 2: Train loss: 0.473\n",
-      "Epoch 3: Train loss: 0.457\n",
-      "Epoch 4: Train loss: 0.452\n"
+      "Epoch 0: Train loss: 0.068\n",
+      "Epoch 1: Train loss: 0.064\n",
+      "Epoch 2: Train loss: 0.060\n",
+      "Epoch 3: Train loss: 0.056\n",
+      "Epoch 4: Train loss: 0.053\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'mse': 0.5570129,\n",
-       " 'mae': 0.5200917,\n",
-       " 'mase': 0.82687783,\n",
-       " 'mape': -0.30029196,\n",
-       " 'rmse': 0.746333,\n",
-       " 'nrmse': 0.07844285181705611,\n",
-       " 'smape': 0.8625615,\n",
-       " 'msis': 0.081994146,\n",
-       " 'nd': 28.05683713775537}"
+       "{'mse': 0.06429593,\n",
+       " 'mae': 0.05884363,\n",
+       " 'mase': 1.8647041,\n",
+       " 'mape': 0.02874577,\n",
+       " 'rmse': 0.2535664,\n",
+       " 'nrmse': 0.02665093539652813,\n",
+       " 'smape': 0.2105672,\n",
+       " 'msis': 0.046476997,\n",
+       " 'nd': 26.39926135498086}"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "train_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/tsfmproject/models/moment/data/ETTh1.csv', \n",
+    "train_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/samay/models/moment/data/ETTh1.csv', \n",
     "                              mode='train', horizon_len=192)\n",
     "# dtl = train_dataset.get_data_loader()\n",
     "\n",
-    "val_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/tsfmproject/models/moment/data/ETTh1.csv',\n",
+    "val_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/samay/models/moment/data/ETTh1.csv',\n",
     "                            mode='test', horizon_len=192)\n",
     "# path = '../src/tsfmproject/models/moment/data/ETTh1.csv'\n",
     "\n",
@@ -191,7 +208,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "torch",
    "language": "python",
    "name": "python3"
   },
@@ -205,7 +222,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.4"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/example/timemoe.ipynb b/example/timemoe.ipynb
@@ -147,47 +147,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "id": "9846b368",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 1])) that is different to the input size (torch.Size([32, 512, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 8, 1])) that is different to the input size (torch.Size([32, 512, 8, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 32, 1])) that is different to the input size (torch.Size([32, 512, 32, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 64, 1])) that is different to the input size (torch.Size([32, 512, 64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n"
-     ]
-    },
-    {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 1])) that is different to the input size (torch.Size([13, 512, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 8, 1])) that is different to the input size (torch.Size([13, 512, 8, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 32, 1])) that is different to the input size (torch.Size([13, 512, 32, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
-      "/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 64, 1])) that is different to the input size (torch.Size([13, 512, 64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n"
+      "predictions shape: torch.Size([32, 512, 1])\n"
      ]
     },
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 0, Loss: 0.1356\n",
-      "Epoch 1, Loss: 0.1170\n",
-      "Epoch 2, Loss: 0.1028\n",
-      "Epoch 3, Loss: 0.0947\n",
-      "Epoch 4, Loss: 0.0839\n"
+     "ename": "RuntimeError",
+     "evalue": "shape '[32, 512, 96, -1]' is invalid for input of size 16384",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtme\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfinetune\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_dataset\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Samay/src/samay/model.py:2077\u001b[0m, in \u001b[0;36mTimeMoEModel.finetune\u001b[0;34m(self, dataset, **kwargs)\u001b[0m\n\u001b[1;32m   2075\u001b[0m loss_mask \u001b[38;5;241m=\u001b[39m loss_mask\u001b[38;5;241m.\u001b[39mfloat()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   2076\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[0;32m-> 2077\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforecast_seq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mloss_masks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mloss_mask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2078\u001b[0m loss \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mloss\n\u001b[1;32m   2079\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n",
+      "File \u001b[0;32m~/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
+      "File \u001b[0;32m~/Samay/src/samay/models/Time_MoE/time_moe/models/modeling_time_moe.py:1019\u001b[0m, in \u001b[0;36mTimeMoeForPrediction.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, loss_masks, use_cache, output_attentions, output_hidden_states, return_dict, max_horizon_length)\u001b[0m\n\u001b[1;32m   1017\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m lm_head, horizon_length \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_heads, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mhorizon_lengths):\n\u001b[1;32m   1018\u001b[0m     one_predictions \u001b[38;5;241m=\u001b[39m lm_head(hidden_states)\n\u001b[0;32m-> 1019\u001b[0m     one_loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalc_ar_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mone_predictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mloss_masks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhorizon_length\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1020\u001b[0m     ar_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m one_loss\n\u001b[1;32m   1021\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m predictions \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Samay/src/samay/models/Time_MoE/time_moe/models/modeling_time_moe.py:1077\u001b[0m, in \u001b[0;36mTimeMoeForPrediction.calc_ar_loss\u001b[0;34m(self, predictions, labels, loss_masks, horizon_length)\u001b[0m\n\u001b[1;32m   1075\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions shape: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpredictions\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   1076\u001b[0m batch_size, seq_len, output_size \u001b[38;5;241m=\u001b[39m predictions\u001b[38;5;241m.\u001b[39mshape\n\u001b[0;32m-> 1077\u001b[0m shift_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mpredictions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mview\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseq_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhorizon_length\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1079\u001b[0m \u001b[38;5;66;03m# pad to the same length with predictions\u001b[39;00m\n\u001b[1;32m   1080\u001b[0m \u001b[38;5;66;03m# shape -> [B, input_size, seq_len + horizon_length -1]\u001b[39;00m\n\u001b[1;32m   1081\u001b[0m labels \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mpad(labels\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m), (\u001b[38;5;241m0\u001b[39m, horizon_length \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m), mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mconstant\u001b[39m\u001b[38;5;124m'\u001b[39m, value\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: shape '[32, 512, 96, -1]' is invalid for input of size 16384"
      ]
     }
    ],