Skip to content

Commit

Permalink
Progress towards beta release #1
Browse files Browse the repository at this point in the history
  • Loading branch information
ToddThomson committed Jan 23, 2022
1 parent 6e82a9e commit 8994d47
Show file tree
Hide file tree
Showing 20 changed files with 186 additions and 130 deletions.
1 change: 1 addition & 0 deletions Mila/Mila.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
<files>
<file src="build\x64\Debug\Mila.lib" target="build\native\x64\Debug" />
<file src="build\x64\Debug\Mila.pdb" target="build\native\x64\Debug" />
<!-- file src="build\x64\Debug\Mila.xml" target="build\native\x64\Debug" / -->
<file src="build\x64\Debug\*.ifc" target="build\native\x64\Debug" />
<file src="..\icon.png" target="images\" />
<!-- .targets -->
Expand Down
14 changes: 8 additions & 6 deletions Mila/Mila.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<CudaCompile Include="Source\Dnn\CuDNN\init_data.cu" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="Mila.Core.ixx" />
<ClCompile Include="Source\Core\Core.ixx" />
<ClCompile Include="Source\Core\Core.Version.ixx" />
<ClCompile Include="Source\Dnn\Cuda\Cuda.ixx" />
<ClCompile Include="Source\Dnn\Cuda\CudaDevice.ixx" />
Expand All @@ -29,7 +29,7 @@
<ClCompile Include="Source\Dnn\Cuda\CudaProfiler.ixx" />
<ClCompile Include="Source\Dnn\Cuda\CudaStream.ixx" />
<ClCompile Include="Source\Dnn\Cuda\CudaUniqueHandle.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\MilaCuDnn.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnn.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnnContext.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnnDescriptor.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnnError.ixx" />
Expand All @@ -40,12 +40,14 @@
<ClCompile Include="Source\Dnn\CuDNN\CuDnnOpaqueHandle.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnnStatus.ixx" />
<ClCompile Include="Source\Dnn\CuDNN\CuDnnUtils.ixx" />
<ClCompile Include="Source\Dnn\Data\Data.ixx" />
<ClCompile Include="Source\Dnn\Data\DataDataset.ixx" />
<ClCompile Include="Source\Dnn\Data\H5\DataH5DataTypeMapper.ixx" />
<ClCompile Include="Source\Dnn\DnnModel.ixx" />
<ClCompile Include="Source\Dnn\DnnModelBuilder.ixx" />
<ClCompile Include="Source\Dnn\DnnModelOptions.ixx" />
<ClCompile Include="Source\Dnn\DnnDropoutDescriptor.ixx" />
<ClCompile Include="Source\Dnn\MilaDnn.ixx" />
<ClCompile Include="Source\Dnn\Dnn.ixx" />
<ClCompile Include="Source\Dnn\DnnNeuralNetType.ixx" />
<ClCompile Include="Source\Dnn\DnnRnnDataSetDescriptor.ixx" />
<ClCompile Include="Source\Dnn\DnnRnnLayerCollection.ixx" />
Expand All @@ -55,13 +57,12 @@
<ClCompile Include="Source\Dnn\DnnRnnOpDescriptor.ixx" />
<ClCompile Include="Source\Dnn\DnnStateTensorDescriptor.ixx" />
<ClCompile Include="Source\Dnn\DnnTensorDescriptor.ixx" />
<ClCompile Include="Source\Dnn\Data\DataDatasetLoader.ixx" />
<ClCompile Include="Source\Dnn\Data\DataDatasetType.ixx" />
<ClCompile Include="Source\Dnn\Data\FileStream.ixx" />
<ClCompile Include="Source\Dnn\Data\H5\DataH5DatasetReader.ixx" />
<ClCompile Include="Source\Dnn\Data\H5\DataH5DatasetWriter.ixx" />
<ClCompile Include="Source\Dnn\Data\DataCategoryToVectorEncoder.ixx" />
<ClCompile Include="Source\Dnn\Data\DataTextToDataset.ixx" />
<ClCompile Include="Source\Dnn\Data\DataCategoryVectorEncoder.ixx" />
<ClCompile Include="Source\Dnn\Data\DataCharDatasetGenerator.ixx" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Source\Dnn\CuDNN\fp16_emu.h" />
Expand Down Expand Up @@ -129,6 +130,7 @@
<PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions);WIN32;_WINDOWS</PreprocessorDefinitions>
<ObjectFileName>$(IntDir)</ObjectFileName>
<ModuleOutputFile>$(IntDir)%(FileName).ifc</ModuleOutputFile>
<GenerateXMLDocumentationFiles>false</GenerateXMLDocumentationFiles>
</ClCompile>
<ResourceCompile>
<PreprocessorDefinitions>%(PreprocessorDefinitions);WIN32;_DEBUG;_WINDOWS;CMAKE_INTDIR=\"Debug\"</PreprocessorDefinitions>
Expand Down
17 changes: 9 additions & 8 deletions Mila/Mila.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<ClCompile Include="Source\Dnn\CuDNN\fp16_emu.cpp">
<Filter>Dnn\CuDnn</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\CuDNN\MilaCuDnn.ixx">
<ClCompile Include="Source\Dnn\CuDNN\CuDnn.ixx">
<Filter>Dnn\CuDnn</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\CuDNN\CuDnnContext.ixx">
Expand Down Expand Up @@ -101,7 +101,7 @@
<ClCompile Include="Source\Dnn\DnnDropoutDescriptor.ixx">
<Filter>Dnn</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\MilaDnn.ixx">
<ClCompile Include="Source\Dnn\Dnn.ixx">
<Filter>Dnn</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\DnnNeuralNetType.ixx">
Expand Down Expand Up @@ -137,31 +137,32 @@
<ClCompile Include="Source\Dnn\Data\H5\DataH5DatasetWriter.ixx">
<Filter>Dnn\Data\H5</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\DataDatasetLoader.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\DataDatasetType.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\FileStream.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\DataCategoryToVectorEncoder.ixx">
<ClCompile Include="Source\Dnn\Data\DataCategoryVectorEncoder.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\DataTextToDataset.ixx">
<ClCompile Include="Source\Dnn\Data\DataCharDatasetGenerator.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Core\Core.Version.ixx">
<Filter>Core</Filter>
</ClCompile>
<ClCompile Include="Mila.Core.ixx">
<ClCompile Include="Source\Core\Core.ixx">
<Filter>Core</Filter>
</ClCompile>
<ClCompile Include="Source\Mila.ixx" />
<ClCompile Include="Source\Dnn\Data\H5\DataH5DataTypeMapper.ixx">
<Filter>Dnn\Data\H5</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\DataDataset.ixx">
<Filter>Dnn\Data</Filter>
</ClCompile>
<ClCompile Include="Source\Dnn\Data\Data.ixx" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="Source\Dnn\CuDNN\fp16_emu.h">
Expand Down
2 changes: 1 addition & 1 deletion Mila/Mila.Core.ixx → Mila/Source/Core/Core.ixx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
* DEALINGS IN THE SOFTWARE.
*/

export module Mila.Core;
export module Core;

export import Core.Version;
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* DEALINGS IN THE SOFTWARE.
*/

export module Mila.Cudnn;
export module Cudnn;

export import CuDnn.Context;
export import CuDnn.Descriptor;
Expand Down
27 changes: 27 additions & 0 deletions Mila/Source/Dnn/Data/Data.ixx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2021 Todd Thomson, Achilles Software. All rights reserved.
*
* Please refer to the Mila end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/

export module Data;

export import Data.Dataset;
export import Data.DatasetType;
export import Data.CharDatasetGenerator;
export import Data.CategoryVectorEncoder;
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ module;
#include <iterator>
#include <vector>

export module Data.CategoryToVectorEncoder;
export module Data.CategoryVectorEncoder;

namespace Mila::Dnn::Data
{
/// <summary>
/// A category to vector (One-hot) encoder
/// </summary>
export template <typename TElement>
class CategoryToVectorEncoder
class CategoryVectorEncoder
{
public:

Expand All @@ -42,7 +42,7 @@ namespace Mila::Dnn::Data
/// </summary>
/// <param name="k">Size of vector</param>
/// <param name="value">Numeric value to use for marking category</param>
CategoryToVectorEncoder( size_t k, TElement value )
CategoryVectorEncoder( size_t k, TElement value )
: k_( k ), value_( value )
{
k_vector_ = std::vector<TElement>( k_, {} );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ module;
#include <string>
#include <map>

export module Data.TextToDataset;
export module Data.CharDatasetGenerator;

import Data.DatasetType;
import Data.H5DatasetWriter;

namespace fs = std::filesystem;
Expand All @@ -61,18 +62,18 @@ namespace Mila::Dnn::Data
/// <summary>
/// A class to convert a text file to an H5 formatted dataset.
/// </summary>
export class TextToDataset
export class CharDatasetGenerator
{
public:

/// <summary>
/// Creates a dataset from a text file with the specified test, validation
/// and training splits.
/// splits.
/// </summary>
/// <param name="file_path"></param>
/// <param name="test_split"></param>
/// <param name="validation_split"></param>
TextToDataset( const fs::path& file_path, float test_split = 0.1f, float validation_split = 0.1f )
CharDatasetGenerator( const fs::path& file_path, float test_split = 0.1f, float validation_split = 0.1f )
: file_path_( file_path), test_split_( test_split ), validation_split_( validation_split )
{
if ( test_split + validation_split >= 1.0f )
Expand All @@ -87,9 +88,9 @@ namespace Mila::Dnn::Data
}

/// <summary>
///
/// Generates a dataset from the text file.
/// </summary>
void CreateDataset()
void GenerateDataset()
{
std::ifstream text_file( file_path_ );

Expand All @@ -99,8 +100,8 @@ namespace Mila::Dnn::Data
}

// First go through the file once to determine its size and to
// build the vocabulary token map.
std::map<char, int> tokens = {};
// build the vocabulary map.
std::map<char, int> vocabulary = {};
int total_chars = 0;
int index = 1;
std::string line;
Expand All @@ -111,19 +112,19 @@ namespace Mila::Dnn::Data

for ( char c : line )
{
auto it = tokens.find( c );
if ( it == tokens.end() )
auto it = vocabulary.find( c );
if ( it == vocabulary.end() )
{
tokens.insert( {c, index++} );
vocabulary.insert( {c, index++} );
}
}
}

std::cout << "File size: " << std::to_string( total_chars ) << std::endl;
std::cout << "Vocab size: " << std::to_string( tokens.size() ) << std::endl;
std::cout << "Vocab size: " << std::to_string( vocabulary.size() ) << std::endl;

vocabulary_size = tokens.size();
text_size = total_chars;
vocabulary_size_ = vocabulary.size();
text_size_ = total_chars;

// Now we can figure out the split sizes
int validation_size = validation_split_ * total_chars;
Expand Down Expand Up @@ -152,7 +153,7 @@ namespace Mila::Dnn::Data

for ( char& c : line )
{
splits[ split_index ][ split_position++ ] = tokens[ c ];
splits[ split_index ][ split_position++ ] = vocabulary[ c ];

if ( split_position >= splits[ split_index ].size() )
{
Expand All @@ -172,15 +173,14 @@ namespace Mila::Dnn::Data
h5Writer.WriteDataset<char>( "validation_ds", splits[ VALIDATION_SET ] );
h5Writer.WriteDataset<char>( "testing_ds", splits[ TESTING_SET ] );

// Convert the m
// Write the vocabulary map to a linear vector
std::vector<int> vocabulary_vector;

for ( const auto& [key, value ] : tokens ) {
for ( const auto& [key, value ] : vocabulary ) {
vocabulary_vector.push_back( key );
vocabulary_vector.push_back( value );
}

h5Writer.WriteDataset<int>( "vocabulary_ds", vocabulary_vector );
h5Writer.WriteDataset<int>( to_string( DatasetType::vocabulary ), vocabulary_vector );
}

private:
Expand All @@ -190,8 +190,10 @@ namespace Mila::Dnn::Data
float validation_split_ = 0.1f;
float test_split_ = 0.1f;

size_t vocabulary_size = 0;
size_t text_size = 0;
std::map<int, int> vocabulary_ = {};

size_t vocabulary_size_ = 0;
size_t text_size_ = 0;

const int TRAINING_SET = 0;
const int VALIDATION_SET = 1;
Expand Down
Loading

0 comments on commit 8994d47

Please sign in to comment.