perf: improve performance of model & forward layer (#616)

Closes #610 ### Summary of Changes Fixed some bugs and improved the performance of some methods, there are still some changes to be made but it is helpful to merge this now as @Marsmaennchen221 and @Gerhardsa0 partly depend on it --------- Co-authored-by: Alexander Gréus <alexgreus51@gmail.com> Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> Co-authored-by: Alexander <47296670+Marsmaennchen221@users.noreply.github.com> Co-authored-by: WinPlay02 <winplay02_gh@woberlaender.de>
Safe-DS · Apr 17, 2024 · e856cd5 · e856cd5
1 parent 1ed2d56
commit e856cd5
Show file tree

Hide file tree

Showing 11 changed files with 327 additions and 129 deletions.
diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
@@ -2418,7 +2418,7 @@ def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True):  #
 
     def _into_dataloader(self, batch_size: int) -> DataLoader:
         """
-        Return a Dataloader for the data stored in this table, used for training neural networks.
+        Return a Dataloader for the data stored in this table, used for predicting with neural networks.
 
         The original table is not modified.
 

diff --git a/src/safeds/data/tabular/containers/_tagged_table.py b/src/safeds/data/tabular/containers/_tagged_table.py
@@ -3,8 +3,8 @@
 import sys
 from typing import TYPE_CHECKING
 
-import numpy as np
 import torch
+from torch import Tensor
 from torch.utils.data import DataLoader, Dataset
 
 from safeds._utils import _structural_hash
@@ -876,7 +876,7 @@ def transform_column(self, name: str, transformer: Callable[[Row], Any]) -> Tagg
             feature_names=self.features.column_names,
         )
 
-    def _into_dataloader(self, batch_size: int) -> DataLoader:
+    def _into_dataloader_with_classes(self, batch_size: int, num_of_classes: int) -> DataLoader:
         """
         Return a Dataloader for the data stored in this table, used for training neural networks.
 
@@ -893,24 +893,35 @@ def _into_dataloader(self, batch_size: int) -> DataLoader:
             The DataLoader.
 
         """
-        feature_rows = self.features.to_rows()
-        all_rows = []
-        for row in feature_rows:
-            new_item = []
-            for column_name in row:
-                new_item.append(row.get_value(column_name))
-            all_rows.append(new_item.copy())
-        return DataLoader(dataset=_CustomDataset(np.array(all_rows), np.array(self.target)), batch_size=batch_size)
+        if num_of_classes <= 2:
+            return DataLoader(
+                dataset=_CustomDataset(
+                    torch.Tensor(self.features._data.values),
+                    torch.Tensor(self.target._data).unsqueeze(dim=-1),
+                ),
+                batch_size=batch_size,
+                shuffle=True,
+            )
+        else:
+            return DataLoader(
+                dataset=_CustomDataset(
+                    torch.Tensor(self.features._data.values),
+                    torch.nn.functional.one_hot(torch.LongTensor(self.target._data), num_classes=num_of_classes),
+                ),
+                batch_size=batch_size,
+                shuffle=True,
+            )
 
 
 class _CustomDataset(Dataset):
-    def __init__(self, features: np.array, target: np.array):
-        self.X = torch.from_numpy(features.astype(np.float32))
-        self.Y = torch.from_numpy(target.astype(np.float32))
-        self.len = self.X.shape[0]
+
+    def __init__(self, features: Tensor, target: Tensor):
+        self.X = features.to(torch.float32)
+        self.Y = target.to(torch.float32)
+        self.len = self.X.size(dim=0)
 
     def __getitem__(self, item: int) -> tuple[torch.Tensor, torch.Tensor]:
-        return self.X[item], self.Y[item].unsqueeze(-1)
+        return self.X[item], self.Y[item]
 
     def __len__(self) -> int:
         return self.len
diff --git a/src/safeds/exceptions/__init__.py b/src/safeds/exceptions/__init__.py
@@ -27,10 +27,12 @@
     DatasetContainsTargetError,
     DatasetMissesDataError,
     DatasetMissesFeaturesError,
+    InputSizeError,
     LearningError,
     ModelNotFittedError,
     NonTimeSeriesError,
     PredictionError,
+    TestTrainDataMismatchError,
     UntaggedTableError,
 )
 
@@ -57,10 +59,12 @@
     "DatasetContainsTargetError",
     "DatasetMissesDataError",
     "DatasetMissesFeaturesError",
+    "InputSizeError",
     "LearningError",
     "ModelNotFittedError",
     "NonTimeSeriesError",
     "PredictionError",
+    "TestTrainDataMismatchError",
     "UntaggedTableError",
     # Other
     "Bound",

diff --git a/src/safeds/exceptions/_ml.py b/src/safeds/exceptions/_ml.py
@@ -68,6 +68,24 @@ def __init__(self, reason: str):
         super().__init__(f"Error occurred while predicting: {reason}")
 
 
+class TestTrainDataMismatchError(Exception):
+    """Raised when the columns of the table passed to the predict method do not match with the feature columns of the training data."""
+
+    def __init__(self) -> None:
+        super().__init__(
+            "The column names in the test table do not match with the feature columns names of the training data.",
+        )
+
+
+class InputSizeError(Exception):
+    """Raised when the amount of features being passed to a network does not match with its input size."""
+
+    def __init__(self, table_size: int, input_layer_size: int) -> None:
+        super().__init__(
+            f"The amount of columns being passed to the network({table_size}) does not match with its input size({input_layer_size}). Consider changing the number of neurons in the first layer or reformatting the table.",
+        )
+
+
 class UntaggedTableError(Exception):
     """Raised when an untagged table is used instead of a TaggedTable in a regression or classification."""
 

diff --git a/src/safeds/ml/nn/__init__.py b/src/safeds/ml/nn/__init__.py
@@ -1,10 +1,10 @@
 """Classes for classification tasks."""
 
-from ._fnn_layer import FNNLayer
+from ._forward_layer import ForwardLayer
 from ._model import NeuralNetworkClassifier, NeuralNetworkRegressor
 
 __all__ = [
-    "FNNLayer",
+    "ForwardLayer",
     "NeuralNetworkClassifier",
     "NeuralNetworkRegressor",
 ]
diff --git a/src/safeds/ml/nn/_fnn_layer.py → src/safeds/ml/nn/_forward_layer.py b/src/safeds/ml/nn/_fnn_layer.py → src/safeds/ml/nn/_forward_layer.py
@@ -1,6 +1,7 @@
-from torch import nn
+from torch import Tensor, nn
 
 from safeds.exceptions import ClosedBound, OutOfBoundsError
+from safeds.ml.nn._layer import Layer
 
 
 class _InternalLayer(nn.Module):
@@ -17,11 +18,11 @@ def __init__(self, input_size: int, output_size: int, activation_function: str):
             case _:
                 raise ValueError("Unknown Activation Function: " + activation_function)
 
-    def forward(self, x: float) -> float:
+    def forward(self, x: Tensor) -> Tensor:
         return self._fn(self._layer(x))
 
 
-class FNNLayer:
+class ForwardLayer(Layer):
     def __init__(self, output_size: int, input_size: int | None = None):
         """
         Create a FNN Layer.
@@ -49,6 +50,18 @@ def __init__(self, output_size: int, input_size: int | None = None):
     def _get_internal_layer(self, activation_function: str) -> _InternalLayer:
         return _InternalLayer(self._input_size, self._output_size, activation_function)
 
+    @property
+    def input_size(self) -> int:
+        """
+        Get the input_size of this layer.
+
+        Returns
+        -------
+        result :
+            The amount of values being passed into this layer.
+        """
+        return self._input_size
+
     @property
     def output_size(self) -> int:
         """

diff --git a/src/safeds/ml/nn/_layer.py b/src/safeds/ml/nn/_layer.py
@@ -0,0 +1,27 @@
+from abc import ABC, abstractmethod
+
+from torch import nn
+
+
+class Layer(ABC):
+    @abstractmethod
+    def __init__(self) -> None:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _get_internal_layer(self, activation_function: str) -> nn.Module:
+        pass  # pragma: no cover
+
+    @property
+    @abstractmethod
+    def input_size(self) -> int:
+        pass  # pragma: no cover
+
+    @property
+    @abstractmethod
+    def output_size(self) -> int:
+        pass  # pragma: no cover
+
+    @abstractmethod
+    def _set_input_size(self, input_size: int) -> None:
+        pass  # pragma: no cover