Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions example/chronos.ipynb

Large diffs are not rendered by default.

56 changes: 24 additions & 32 deletions example/chronosbolt.ipynb

Large diffs are not rendered by default.

59 changes: 38 additions & 21 deletions example/moment_forecasting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,32 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/gluonts/json.py:102: UserWarning: Using `json`-module for json-handling. Consider installing one of `orjson`, `ujson` to speed up serialization and deserialization.\n",
" warnings.warn(\n",
"INFO:p-2597098:t-140082893653824:moment.py:_validate_inputs:Setting d_model to 1024\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading MOMENT model from AutonLab/MOMENT-1-large\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:p-2597098:t-140082893653824:moment.py:_get_transformer_backbone:Initializing pre-trained transformer from google/flan-t5-large.\n",
"INFO:p-2597098:t-140082893653824:moment.py:_get_transformer_backbone:Enabling gradient checkpointing.\n"
]
}
],
"source": [
Expand Down Expand Up @@ -57,7 +74,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -72,38 +89,38 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0: Train loss: 0.572\n",
"Epoch 1: Train loss: 0.502\n",
"Epoch 2: Train loss: 0.473\n",
"Epoch 3: Train loss: 0.457\n",
"Epoch 4: Train loss: 0.452\n"
"Epoch 0: Train loss: 0.068\n",
"Epoch 1: Train loss: 0.064\n",
"Epoch 2: Train loss: 0.060\n",
"Epoch 3: Train loss: 0.056\n",
"Epoch 4: Train loss: 0.053\n"
]
},
{
"data": {
"text/plain": [
"{'mse': 0.5570129,\n",
" 'mae': 0.5200917,\n",
" 'mase': 0.82687783,\n",
" 'mape': -0.30029196,\n",
" 'rmse': 0.746333,\n",
" 'nrmse': 0.07844285181705611,\n",
" 'smape': 0.8625615,\n",
" 'msis': 0.081994146,\n",
" 'nd': 28.05683713775537}"
"{'mse': 0.06429593,\n",
" 'mae': 0.05884363,\n",
" 'mase': 1.8647041,\n",
" 'mape': 0.02874577,\n",
" 'rmse': 0.2535664,\n",
" 'nrmse': 0.02665093539652813,\n",
" 'smape': 0.2105672,\n",
" 'msis': 0.046476997,\n",
" 'nd': 26.39926135498086}"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/tsfmproject/models/moment/data/ETTh1.csv', \n",
"train_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/samay/models/moment/data/ETTh1.csv', \n",
" mode='train', horizon_len=192)\n",
"# dtl = train_dataset.get_data_loader()\n",
"\n",
"val_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/tsfmproject/models/moment/data/ETTh1.csv',\n",
"val_dataset = MomentDataset(name=\"ett\", datetime_col='date', path='../src/samay/models/moment/data/ETTh1.csv',\n",
" mode='test', horizon_len=192)\n",
"# path = '../src/tsfmproject/models/moment/data/ETTh1.csv'\n",
"\n",
Expand Down Expand Up @@ -191,7 +208,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "torch",
"language": "python",
"name": "python3"
},
Expand All @@ -205,7 +222,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
48 changes: 16 additions & 32 deletions example/timemoe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -147,47 +147,31 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"id": "9846b368",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 1])) that is different to the input size (torch.Size([32, 512, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 8, 1])) that is different to the input size (torch.Size([32, 512, 8, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 32, 1])) that is different to the input size (torch.Size([32, 512, 32, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([32, 1, 64, 1])) that is different to the input size (torch.Size([32, 512, 64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 1])) that is different to the input size (torch.Size([13, 512, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 8, 1])) that is different to the input size (torch.Size([13, 512, 8, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 32, 1])) that is different to the input size (torch.Size([13, 512, 32, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n",
"/nethome/sli999/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/loss.py:1100: UserWarning: Using a target size (torch.Size([13, 1, 64, 1])) that is different to the input size (torch.Size([13, 512, 64, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
" return F.huber_loss(input, target, reduction=self.reduction, delta=self.delta)\n"
"predictions shape: torch.Size([32, 512, 1])\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 0, Loss: 0.1356\n",
"Epoch 1, Loss: 0.1170\n",
"Epoch 2, Loss: 0.1028\n",
"Epoch 3, Loss: 0.0947\n",
"Epoch 4, Loss: 0.0839\n"
"ename": "RuntimeError",
"evalue": "shape '[32, 512, 96, -1]' is invalid for input of size 16384",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtme\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfinetune\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_dataset\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/Samay/src/samay/model.py:2077\u001b[0m, in \u001b[0;36mTimeMoEModel.finetune\u001b[0;34m(self, dataset, **kwargs)\u001b[0m\n\u001b[1;32m 2075\u001b[0m loss_mask \u001b[38;5;241m=\u001b[39m loss_mask\u001b[38;5;241m.\u001b[39mfloat()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 2076\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[0;32m-> 2077\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforecast_seq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mloss_masks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mloss_mask\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2078\u001b[0m loss \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mloss\n\u001b[1;32m 2079\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n",
"File \u001b[0;32m~/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1734\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1736\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/anaconda3/envs/torch/lib/python3.11/site-packages/torch/nn/modules/module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1747\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n",
"File \u001b[0;32m~/Samay/src/samay/models/Time_MoE/time_moe/models/modeling_time_moe.py:1019\u001b[0m, in \u001b[0;36mTimeMoeForPrediction.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, loss_masks, use_cache, output_attentions, output_hidden_states, return_dict, max_horizon_length)\u001b[0m\n\u001b[1;32m 1017\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m lm_head, horizon_length \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlm_heads, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mhorizon_lengths):\n\u001b[1;32m 1018\u001b[0m one_predictions \u001b[38;5;241m=\u001b[39m lm_head(hidden_states)\n\u001b[0;32m-> 1019\u001b[0m one_loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalc_ar_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mone_predictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mloss_masks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhorizon_length\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1020\u001b[0m ar_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m one_loss\n\u001b[1;32m 1021\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m predictions \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"File \u001b[0;32m~/Samay/src/samay/models/Time_MoE/time_moe/models/modeling_time_moe.py:1077\u001b[0m, in \u001b[0;36mTimeMoeForPrediction.calc_ar_loss\u001b[0;34m(self, predictions, labels, loss_masks, horizon_length)\u001b[0m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions shape: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpredictions\u001b[38;5;241m.\u001b[39mshape\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1076\u001b[0m batch_size, seq_len, output_size \u001b[38;5;241m=\u001b[39m predictions\u001b[38;5;241m.\u001b[39mshape\n\u001b[0;32m-> 1077\u001b[0m shift_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mpredictions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mview\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseq_len\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhorizon_length\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;66;03m# pad to the same length with predictions\u001b[39;00m\n\u001b[1;32m 1080\u001b[0m \u001b[38;5;66;03m# shape -> [B, input_size, seq_len + horizon_length -1]\u001b[39;00m\n\u001b[1;32m 1081\u001b[0m labels \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39mpad(labels\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m), (\u001b[38;5;241m0\u001b[39m, horizon_length \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m), mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mconstant\u001b[39m\u001b[38;5;124m'\u001b[39m, value\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n",
"\u001b[0;31mRuntimeError\u001b[0m: shape '[32, 512, 96, -1]' is invalid for input of size 16384"
]
}
],
Expand Down
Loading