-
Notifications
You must be signed in to change notification settings - Fork 115
Issue/517 InfiniCore 添加 nn::module c++接口 &&线性层实现 #531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| #pragma once | ||
|
|
||
| #include "infinicore/nn.hpp" | ||
| #include "infinicore/ops.hpp" | ||
| #include "infinicore/tensor.hpp" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| #pragma once | ||
|
|
||
| #include "nn/embedding.hpp" | ||
| #include "nn/linear.hpp" | ||
| #include "nn/rmsnorm.hpp" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| #pragma once | ||
|
|
||
| #include "module.hpp" | ||
| #include "../ops.hpp" | ||
| #include <optional> | ||
|
|
||
| namespace infinicore::nn { | ||
|
|
||
| /** | ||
| * @brief Embedding layer that maps indices to dense vectors | ||
| * | ||
| * A simple lookup table that stores embeddings of a fixed dictionary and size. | ||
| * This module is often used to store word embeddings and retrieve them using indices. | ||
| * The input to the module is a tensor of indices, and the output is the corresponding | ||
| * embedding vectors. | ||
| * | ||
| * Similar to PyTorch's nn.Embedding: | ||
| * https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html | ||
| * | ||
| * Example: | ||
| * @code | ||
| * // Create embedding: 10000 words, 300-dimensional embeddings | ||
| * auto embedding = Embedding(10000, 300); | ||
| * | ||
| * // Input: tensor of indices [batch_size, seq_len] | ||
| * auto indices = Tensor::from_data({2, 5}, {3, 5, 12, 8, 99, 0, 1, 45, 67, 23}); | ||
| * | ||
| * // Output: [batch_size, seq_len, embedding_dim] = [2, 5, 300] | ||
| * auto embeddings = embedding.forward(indices); | ||
| * @endcode | ||
| */ | ||
| class Embedding : public Module { | ||
| public: | ||
| /** | ||
| * @brief Construct an Embedding layer | ||
| * | ||
| * @param num_embeddings Size of the dictionary of embeddings (vocabulary size) | ||
| * @param embedding_dim The size of each embedding vector | ||
| * @param padding_idx If specified, the entries at padding_idx do not contribute to gradient | ||
| * and the embedding vector at padding_idx is not updated during training | ||
| * @param dtype Data type for the embedding weights (default: DataType::F32) | ||
| * @param device Device to create the embedding weight on | ||
| */ | ||
| Embedding(size_t num_embeddings, | ||
| size_t embedding_dim, | ||
| std::optional<int64_t> padding_idx = std::nullopt, | ||
| const DataType &dtype = DataType::F32, | ||
| const Device &device = Device()); | ||
|
|
||
| /** | ||
| * @brief Forward pass: lookup embeddings for given indices | ||
| * | ||
| * @param indices Tensor containing indices into the embedding matrix. | ||
| * Can be any shape (*), typically [batch_size] or [batch_size, seq_len] | ||
| * @return Tensor containing the embedding vectors. | ||
| * Shape: (*, embedding_dim) where * matches the input shape | ||
| * | ||
| * Example: | ||
| * Input shape: [2, 3] -> Output shape: [2, 3, embedding_dim] | ||
| * Input shape: [10] -> Output shape: [10, embedding_dim] | ||
| */ | ||
| Tensor forward(const Tensor &indices) const; | ||
|
|
||
| // Module information | ||
| size_t num_embeddings() const { return num_embeddings_; } | ||
| size_t embedding_dim() const { return embedding_dim_; } | ||
| std::optional<int64_t> padding_idx() const { return padding_idx_; } | ||
| DataType dtype() const { return dtype_; } | ||
|
|
||
| // String representation | ||
| std::string extra_repr() const; | ||
|
|
||
| // Accessors for parameters | ||
| Tensor weight() const { return weight_; } | ||
|
|
||
| protected: | ||
| // Parameters | ||
| INFINICORE_NN_PARAMETER(weight); | ||
|
|
||
| private: | ||
| size_t num_embeddings_; // Vocabulary size | ||
| size_t embedding_dim_; // Embedding dimension | ||
| std::optional<int64_t> padding_idx_; // Optional padding index | ||
| DataType dtype_; // Data type for embedding weights | ||
| }; | ||
|
|
||
| } // namespace infinicore::nn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| #pragma once | ||
|
|
||
| #include "module.hpp" | ||
| #include "../ops.hpp" | ||
|
|
||
| namespace infinicore::nn { | ||
|
|
||
| class Linear : public Module { | ||
| public: | ||
| Linear(size_t in_features, size_t out_features, bool bias = true, const DataType &dtype = DataType::F32, const Device &device = Device()); | ||
|
|
||
| // Forward pass: output = input @ weight.T + bias | ||
| Tensor forward(Tensor &input) const; | ||
|
|
||
| // Forward pass with residual connection (InfiniLM-style) | ||
| // output = input @ weight.T + bias + residual | ||
| Tensor forward(Tensor &input, Tensor &residual) const; | ||
|
|
||
| // Module information | ||
| size_t in_features() const { return in_features_; } | ||
| size_t out_features() const { return out_features_; } | ||
| bool has_bias() const { return has_bias_; } | ||
| DataType dtype() const { return dtype_; } | ||
|
|
||
| // String representation | ||
| std::string extra_repr() const; | ||
|
|
||
| // Accessors for parameters | ||
| Tensor weight() const { return weight_; } | ||
| Tensor bias() const { return bias_; } | ||
|
|
||
| protected: | ||
| // Parameters | ||
| INFINICORE_NN_PARAMETER(weight); | ||
| INFINICORE_NN_PARAMETER(bias); | ||
|
|
||
| private: | ||
| // Helper method for common forward computation | ||
| Tensor compute_linear(Tensor &input) const; | ||
|
|
||
| size_t in_features_; | ||
| size_t out_features_; | ||
| bool has_bias_; | ||
| DataType dtype_; | ||
| }; | ||
|
|
||
| } // namespace infinicore::nn | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| #pragma once | ||
|
|
||
| #include "parameter.hpp" | ||
| #include "../tensor.hpp" | ||
|
|
||
| #include <unordered_map> | ||
| #include <type_traits> | ||
| #include <vector> | ||
|
|
||
| namespace infinicore::nn { | ||
| class Module { | ||
| public: | ||
| Module() = default; | ||
|
|
||
| const std::unordered_map<std::string, Parameter> &state_dict() const; | ||
|
|
||
| void load_state_dict(const std::unordered_map<std::string, Tensor> &_state_dict); | ||
|
|
||
| void load_parameter(const std::string &name, const Tensor ¶m); | ||
|
|
||
| void load_parameter_from_blob(const std::string &name, const void *data); | ||
|
|
||
| protected: | ||
| Tensor register_parameter(const std::string &name, Parameter param); | ||
|
|
||
| // Add an existing submodule to this module's hierarchy | ||
| // Template parameter M must be a type derived from Module | ||
| // Returns the submodule for convenience (allows method chaining) | ||
| template <typename M> | ||
| std::shared_ptr<M> add_module(const std::string &name, std::shared_ptr<M> submodule) { | ||
| // Ensure M is derived from Module (compile-time check) | ||
| static_assert(std::is_base_of<Module, M>::value, | ||
| "Template parameter M must be derived from infinicore::nn::Module"); | ||
|
|
||
| // Store in the submodules map (std::shared_ptr<M> automatically converts to std::shared_ptr<Module>) | ||
| submodules_[name] = submodule; | ||
|
|
||
| return submodule; | ||
| } | ||
|
|
||
| // Create and register a new submodule by constructing it with the given arguments | ||
| // Template parameter M must be a type derived from Module | ||
| // Args are forwarded to M's constructor | ||
| template <typename M, typename... Args> | ||
| std::shared_ptr<M> register_module(const std::string &name, Args &&...args) { | ||
| // Ensure M is derived from Module (compile-time check) | ||
| static_assert(std::is_base_of<Module, M>::value, | ||
| "Template parameter M must be derived from infinicore::nn::Module"); | ||
|
|
||
| // Construct the submodule | ||
| auto submodule = std::make_shared<M>(std::forward<Args>(args)...); | ||
|
|
||
| return add_module(name, submodule); | ||
| } | ||
|
|
||
| // Create and register multiple submodules of the same type | ||
| // Each submodule is named as "name.0", "name.1", etc. | ||
| // Template parameter M must be a type derived from Module | ||
| template <typename M, typename... Args> | ||
| std::vector<std::shared_ptr<M>> register_modules(size_t count, const std::string &name, Args &&...args) { | ||
| static_assert(std::is_base_of<Module, M>::value, | ||
| "Template parameter M must be derived from infinicore::nn::Module"); | ||
|
|
||
| std::vector<std::shared_ptr<M>> modules; | ||
| modules.reserve(count); | ||
| for (size_t i = 0; i < count; i++) { | ||
| modules.push_back(register_module<M>(name + "." + std::to_string(i), std::forward<Args>(args)...)); | ||
| } | ||
| return modules; | ||
| } | ||
|
|
||
| protected: | ||
| Device device_; | ||
| std::unordered_map<std::string, std::shared_ptr<Module>> submodules_; | ||
| std::unordered_map<std::string, Parameter> parameters_; | ||
|
|
||
| private: | ||
| void collect_all_parameters(std::unordered_map<std::string, Parameter> &all_params, const std::string &prefix = "") const; | ||
| }; | ||
|
|
||
| // ============================================================================ | ||
| // PyTorch-like Macros for Convenient Module Registration | ||
| // ============================================================================ | ||
|
|
||
| /** | ||
| * @brief Register submodules with automatic name inference from variable name | ||
| * | ||
| * Usage: | ||
| * @code | ||
| * class MyModel : public Module { | ||
| * protected: | ||
| * INFINICORE_NN_MODULE(Linear, layer1); | ||
| * INFINICORE_NN_MODULE(Linear, layer2); | ||
| * INFINICORE_NN_MODULE_VEC(Linear, layers); | ||
| * INFINICORE_NN_PARAMETER(scaling_factor); | ||
| * | ||
| * public: | ||
| * MyModel() { | ||
| * INFINICORE_NN_MODULE_INIT(layer1, 128, 64); | ||
| * INFINICORE_NN_MODULE_INIT(layer2, 64, 32); | ||
| * INFINICORE_NN_MODULE_VEC_INIT(layers, 3, Linear, 32, 16); | ||
| * INFINICORE_NN_PARAMETER_INIT(scaling_factor, ({1}, DataType::F32, Device())); | ||
| * } | ||
| * }; | ||
| * @endcode | ||
| */ | ||
|
|
||
| // Declare a single module member variable | ||
| #define INFINICORE_NN_MODULE(ModuleType, name) \ | ||
| std::shared_ptr<ModuleType> name##_ | ||
|
|
||
| // Declare a vector of modules member variable | ||
| #define INFINICORE_NN_MODULE_VEC(ModuleType, name) \ | ||
| std::vector<std::shared_ptr<ModuleType>> name##_ | ||
|
|
||
| // Initialize a module in constructor | ||
| #define INFINICORE_NN_MODULE_INIT(name, ...) \ | ||
| name##_ = this->register_module<std::remove_reference<decltype(*name##_)>::type>(#name, ##__VA_ARGS__) | ||
|
|
||
| // Initialize a vector of modules in constructor | ||
| // Usage: INFINICORE_NN_MODULE_VEC_INIT(layers, count, ModuleType, ctor_args...) | ||
| // Example: INFINICORE_NN_MODULE_VEC_INIT(layers, 3, Linear, 128, 64) | ||
| #define INFINICORE_NN_MODULE_VEC_INIT(name, count, ModuleType, ...) \ | ||
| name##_ = this->register_modules<ModuleType>(count, #name, ##__VA_ARGS__) | ||
|
|
||
|
PanZezhong1725 marked this conversation as resolved.
|
||
| // Declare a parameter member variable | ||
| #define INFINICORE_NN_PARAMETER(name) \ | ||
| infinicore::nn::Parameter name##_ | ||
|
|
||
| // Initialize a parameter in constructor | ||
| // Usage: INFINICORE_NN_PARAMETER_INIT(name, (shape, dtype, device)) | ||
| // Example: INFINICORE_NN_PARAMETER_INIT(weight, ({out_features, in_features}, DataType::F32, device)) | ||
| #define INFINICORE_NN_PARAMETER_INIT(name, args) \ | ||
| name##_ = infinicore::nn::Parameter args; \ | ||
| this->register_parameter(#name, name##_) | ||
|
|
||
| } // namespace infinicore::nn | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| #pragma once | ||
|
|
||
| #include "../tensor.hpp" | ||
|
|
||
| namespace infinicore::nn { | ||
| class Parameter : public Tensor { | ||
| public: | ||
| Parameter(); | ||
|
|
||
| Parameter(const Shape &shape, | ||
| const DataType &dtype, | ||
| const Device &device); | ||
|
|
||
| void load_blob(const void *data); | ||
| }; | ||
| } // namespace infinicore::nn |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| #pragma once | ||
|
|
||
| #include "module.hpp" | ||
| #include "../ops.hpp" | ||
|
|
||
| namespace infinicore::nn { | ||
|
|
||
| /** | ||
| * @brief Root Mean Square Layer Normalization (RMSNorm) | ||
| * | ||
| * Applies Root Mean Square Layer Normalization over the last dimension. | ||
| * Unlike LayerNorm, RMSNorm doesn't subtract mean and doesn't use bias. | ||
| * | ||
| * Formula: y = (x / RMS(x)) * weight | ||
| * where RMS(x) = sqrt(mean(x^2) + eps) | ||
| * | ||
| * Used in LLaMA, Galactica, and other modern language models as a | ||
| * simpler and faster alternative to LayerNorm. | ||
| * | ||
| * Example: | ||
| * @code | ||
| * // Create RMSNorm for hidden size 4096 | ||
| * auto norm = RMSNorm(4096); | ||
| * | ||
| * // Input: [batch, seq_len, hidden_size] | ||
| * auto input = Tensor::randn({2, 10, 4096}); | ||
| * | ||
| * // Output: [batch, seq_len, hidden_size] | ||
| * auto output = norm.forward(input); | ||
| * @endcode | ||
| */ | ||
| class RMSNorm : public Module { | ||
| public: | ||
| /** | ||
| * @brief Construct a RMSNorm layer | ||
| * | ||
| * @param normalized_shape Size of the feature dimension to normalize (typically hidden_size) | ||
| * @param eps Small constant for numerical stability (default: 1e-6) | ||
| * @param dtype Data type for the weight (default: DataType::F32) | ||
| * @param device Device to create the weight on | ||
| */ | ||
| RMSNorm(size_t normalized_shape, | ||
| double eps = 1e-6, | ||
| const DataType &dtype = DataType::F32, | ||
| const Device &device = Device()); | ||
|
|
||
| /** | ||
| * @brief Forward pass: apply RMSNorm | ||
| * | ||
| * @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions | ||
| * @return Normalized tensor with same shape as input | ||
| * | ||
| * The normalization is applied over the last dimension. | ||
| * For example: | ||
| * Input: [batch, seq_len, hidden_size] -> normalize over hidden_size | ||
| * Input: [batch, hidden_size] -> normalize over hidden_size | ||
| */ | ||
| Tensor forward(const Tensor &x) const; | ||
|
|
||
| // Module information | ||
| size_t normalized_shape() const { return normalized_shape_; } | ||
| double eps() const { return eps_; } | ||
| DataType dtype() const { return dtype_; } | ||
|
|
||
| // String representation | ||
| std::string extra_repr() const; | ||
|
|
||
| // Accessors for parameters | ||
| Tensor weight() const { return weight_; } | ||
|
|
||
| protected: | ||
| // Parameters | ||
| INFINICORE_NN_PARAMETER(weight); | ||
|
|
||
| private: | ||
| size_t normalized_shape_; // Size of the feature dimension | ||
| double eps_; // Epsilon for numerical stability | ||
| DataType dtype_; // Data type for weight | ||
| }; | ||
|
|
||
| } // namespace infinicore::nn |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.