NVIDIA · azrael417 · Sep 30, 2025 · Sep 17, 2025 · Sep 18, 2025 · Sep 18, 2025
diff --git a/docs/api/config.rst b/docs/api/config.rst
@@ -188,15 +188,17 @@ The following table lists the available model types:
 
 The following table lists the available options by model type:
 
-+-----------------+-----------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| Model Type      | Option          | Data Type        | Description                                                                                                                                                                        |
-+=================+=================+==================+====================================================================================================================================================================================+
-| ``torchscript`` | ``filename``    | string           | path to TorchScript exported model file                                                                                                                                            |
-+-----------------+-----------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| ``mlp``         | ``layer_sizes`` | list of integers | sequence of input/output sizes for linear layers e.g., ``[16, 32, 4]`` will create two linear layers with input/output of 16/32 for the first layer and 32/4 for the second layer. |
-+                 +-----------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-|                 | ``dropout``     | float            | probability of an element to be zeroed in dropout layers (default = ``0.0``)                                                                                                       |
-+-----------------+-----------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
++-----------------+----------------------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| Model Type      | Option                     | Data Type        | Description                                                                                                                                                                        |
++=================+============================+==================+====================================================================================================================================================================================+
+| ``torchscript`` | ``filename``               | string           | path to TorchScript exported model file                                                                                                                                            |
++-----------------+----------------------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``mlp``         | ``layer_sizes``            | list of integers | sequence of input/output sizes for linear layers e.g., ``[16, 32, 4]`` will create two linear layers with input/output of 16/32 for the first layer and 32/4 for the second layer. |
++                 +----------------------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                 | ``dropout``                | float            | probability of an element to be zeroed in dropout layers (default = ``0.0``)                                                                                                       |
++                 +----------------------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|                 | ``flatten_non_batch_dims`` | bool             | if set, input tensors are reshaped from ``[batch_size, ...]`` to ``[batch_size, -1]`` before passing to first linear layer (default = ``true``)                                    |
++-----------------+----------------------------+------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
 
 
 Loss Properties

diff --git a/src/csrc/include/internal/models.h b/src/csrc/include/internal/models.h
@@ -33,6 +33,7 @@ struct MLPModel : BaseModel, public std::enable_shared_from_this<BaseModel> {
   std::vector<torch::Tensor> forward(const std::vector<torch::Tensor>& inputs) override;
 
   double dropout;
+  bool flatten_non_batch_dims;
   std::vector<int> layer_sizes;
 
   // Use one of many "standard library" modules.

diff --git a/src/csrc/models/mlp_model.cpp b/src/csrc/models/mlp_model.cpp
@@ -28,10 +28,11 @@ namespace torchfort {
 // MLP model in C++ using libtorch
 void MLPModel::setup(const ParamMap& params) {
   // Extract params from input map.
-  std::set<std::string> supported_params{"dropout", "layer_sizes"};
+  std::set<std::string> supported_params{"dropout", "flatten_non_batch_dims", "layer_sizes"};
   check_params(supported_params, params.keys());
 
   dropout = params.get_param<double>("dropout", 0.0)[0];
+  flatten_non_batch_dims = params.get_param<bool>("flatten_non_batch_dims", true)[0];
   layer_sizes = params.get_param<int>("layer_sizes");
 
   // Construct and register submodules.
@@ -50,7 +51,10 @@ std::vector<torch::Tensor> MLPModel::forward(const std::vector<torch::Tensor>& i
     THROW_INVALID_USAGE("Built-in MLP model does not support multiple input tensors.");
 
   auto x = inputs[0];
-  x = x.reshape({x.size(0), -1});
+
+  if (flatten_non_batch_dims) {
+    x = x.reshape({x.size(0), -1});
+  }
 
   for (int i = 0; i < layer_sizes.size() - 1; ++i) {
     if (i < layer_sizes.size() - 2) {

diff --git a/tests/supervised/CMakeLists.txt b/tests/supervised/CMakeLists.txt
@@ -59,6 +59,7 @@ install(
 # copy files
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/mlp.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/mlp2.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/mlp3.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/mlp2_gradacc.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/missing_opt.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)
 install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/configs/missing_loss.yaml DESTINATION ${CMAKE_INSTALL_PREFIX}/bin/tests/supervised/configs)

diff --git a/tests/supervised/configs/mlp3.yaml b/tests/supervised/configs/mlp3.yaml
@@ -0,0 +1,12 @@
+model:
+  type: mlp
+  parameters:
+    dropout: 0.0
+    layer_sizes: [10, 10, 10]
+    flatten_non_batch_dims: false
+
+loss:
+  type: MSE
+
+optimizer:
+  type: adam
diff --git a/tests/supervised/test_training.cpp b/tests/supervised/test_training.cpp
@@ -31,8 +31,8 @@
 
 #include "test_utils.h"
 
-void training_test(const std::string& model_config, int dev_model, int dev_input, bool should_fail_create,
-                   bool should_fail_train, bool should_fail_inference, bool check_result) {
+void training_test(const std::string& model_config, int dev_model, int dev_input, std::vector<int64_t> shape,
+                   bool should_fail_create, bool should_fail_train, bool should_fail_inference, bool check_result) {
 
   std::string model_name = generate_random_name(10);
 
@@ -55,7 +55,6 @@ void training_test(const std::string& model_config, int dev_model, int dev_input
   }
 #endif
 
-  std::vector<int64_t> shape = {10, 10};
   auto input = generate_random<float>(shape);
   auto label = generate_random<float>(shape);
   auto output = generate_random<float>(shape);
@@ -77,6 +76,13 @@ void training_test(const std::string& model_config, int dev_model, int dev_input
     } else {
       FAIL();
     }
+  } catch (const c10::Error& e) {
+    std::cout << e.what() << std::endl;
+    if (should_fail_train) {
+      // pass
+    } else {
+      FAIL();
+    }
   }
 
   try {
@@ -91,6 +97,13 @@ void training_test(const std::string& model_config, int dev_model, int dev_input
     } else {
       FAIL();
     }
+  } catch (const c10::Error& e) {
+    std::cout << e.what() << std::endl;
+    if (should_fail_train) {
+      // pass
+    } else {
+      FAIL();
+    }
   }
 
 #ifdef ENABLE_GPU
@@ -342,10 +355,19 @@ void training_test_grad_accumulation(const std::string& model_config, int dev_mo
 }
 
 TEST(TorchFort, TrainTestMLPCPUCPU) {
-  training_test("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, false, false, false);
+  training_test("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 2, 5}, false, false, false,
+                false);
+}
+TEST(TorchFort, TrainTestMLPCPUCPUNoFlatten) {
+  training_test("configs/mlp3.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 2, 10}, false, false, false,
+                false);
+}
+TEST(TorchFort, TrainTestMLPCPUCPU1DNoFlatten) {
+  training_test("configs/mlp3.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10}, false, false, false, false);
 }
 TEST(TorchFort, TrainTestTorchScriptCPUCPU) {
-  training_test("configs/torchscript.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, false, false, true);
+  training_test("configs/torchscript.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 2, 10}, false, false,
+                false, true);
 }
 TEST(TorchFort, TrainTestTorchScriptMultiArgCPUCPU) {
   training_test_multiarg("configs/torchscript_multiarg.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, false,
@@ -362,20 +384,20 @@ TEST(TorchFort, TrainTestGradAccumulationCPUCPU) {
 
 #ifdef ENABLE_GPU
 TEST(TorchFort, TrainTestMLPGPUCPU) {
-  training_test("configs/mlp2.yaml", 0, TORCHFORT_DEVICE_CPU, false, false, false, false);
+  training_test("configs/mlp2.yaml", 0, TORCHFORT_DEVICE_CPU, {10, 2, 5}, false, false, false, false);
 }
 TEST(TorchFort, TrainTestMLPCPUGPU) {
-  training_test("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, 0, false, false, false, false);
+  training_test("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, 0, {10, 2, 5}, false, false, false, false);
 }
-TEST(TorchFort, TrainTestMLPGPUGPU) { training_test("configs/mlp2.yaml", 0, 0, false, false, false, false); }
+TEST(TorchFort, TrainTestMLPGPUGPU) { training_test("configs/mlp2.yaml", 0, 0, {10, 10}, false, false, false, false); }
 TEST(TorchFort, TrainTestTorchScriptCPUGPU) {
-  training_test("configs/torchscript.yaml", TORCHFORT_DEVICE_CPU, 0, false, false, false, true);
+  training_test("configs/torchscript.yaml", TORCHFORT_DEVICE_CPU, 0, {10, 2, 10}, false, false, false, true);
 }
 TEST(TorchFort, TrainTestTorchScriptGPUCPU) {
-  training_test("configs/torchscript.yaml", 0, TORCHFORT_DEVICE_CPU, false, false, false, true);
+  training_test("configs/torchscript.yaml", 0, TORCHFORT_DEVICE_CPU, {10, 2, 10}, false, false, false, true);
 }
 TEST(TorchFort, TrainTestTorchScriptGPUGPU) {
-  training_test("configs/torchscript.yaml", 0, 0, false, false, false, true);
+  training_test("configs/torchscript.yaml", 0, 0, {10, 2, 10}, false, false, false, true);
 }
 TEST(TorchFort, TrainTestTorchScriptMultiArgCPUGPU) {
   training_test_multiarg("configs/torchscript_multiarg.yaml", TORCHFORT_DEVICE_CPU, 0, false, false, false, false,
@@ -403,19 +425,27 @@ TEST(TorchFort, TrainTestTorchScriptMultiArgExtraGPUGPU) {
 
 // Testing expected error cases
 TEST(TorchFort, TrainTestBadConfigName) {
-  training_test("configs/blah.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, true, true, true, false);
+  training_test("configs/blah.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 10}, true, true, true, false);
 }
 TEST(TorchFort, TrainTestNoOptimizerBlock) {
-  training_test("configs/missing_opt.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, true, false, false);
+  training_test("configs/missing_opt.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 10}, false, true, false,
+                false);
 }
 TEST(TorchFort, TrainTestNoLossBlock) {
-  training_test("configs/missing_loss.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, true, false, false);
+  training_test("configs/missing_loss.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 10}, false, true, false,
+                false);
 }
 TEST(TorchFort, TrainTestMultiArgErrors) { training_test_multiarg_errors("configs/torchscript_multiarg.yaml"); }
 TEST(TorchFort, TrainTestMultiArgMLPError) {
   training_test_multiarg("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, false, false, true, true,
                          false);
 }
+TEST(TorchFort, TrainTestMLPCPUCPUNoFlattenDimError) {
+  training_test("configs/mlp3.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10, 2, 5}, false, true, true, false);
+}
+TEST(TorchFort, TrainTestMLPCPUCPU1DDimError) {
+  training_test("configs/mlp2.yaml", TORCHFORT_DEVICE_CPU, TORCHFORT_DEVICE_CPU, {10}, false, true, true, false);
+}
 
 int main(int argc, char* argv[]) {
   ::testing::InitGoogleTest(&argc, argv);