diff --git a/Textify.Data.Analysis/Tools/DataInitializer.cs b/Textify.Data.Analysis/Tools/DataInitializer.cs index 7d90242..b8e6373 100644 --- a/Textify.Data.Analysis/Tools/DataInitializer.cs +++ b/Textify.Data.Analysis/Tools/DataInitializer.cs @@ -48,6 +48,8 @@ internal static void Initialize(DataType types) bool needsWordsDirty = types.HasFlag(DataType.WordsDirty); bool needsWordsDirtyFull = types.HasFlag(DataType.WordsDirtyFull); bool needsWordsJustDirty = types.HasFlag(DataType.WordsJustDirty); + bool needsCommonWords = types.HasFlag(DataType.CommonWords); + bool needsCommonWordsDirty = types.HasFlag(DataType.CommonWordsDirty); // Go through all the types if (needsNames) @@ -125,6 +127,16 @@ internal static void Initialize(DataType types) if (!DataTools.dataStreams.ContainsKey(nameof(WordsData.bad_words))) DataTools.dataStreams.Add(nameof(WordsData.bad_words), WordsData.bad_words); } + if (needsCommonWords) + { + if (!DataTools.dataStreams.ContainsKey(nameof(WordsData.words_common_clean))) + DataTools.dataStreams.Add(nameof(WordsData.words_common_clean), WordsData.words_common_clean); + } + if (needsCommonWordsDirty) + { + if (!DataTools.dataStreams.ContainsKey(nameof(WordsData.words_common))) + DataTools.dataStreams.Add(nameof(WordsData.words_common), WordsData.words_common); + } } } } diff --git a/Textify.Data.Analysis/Tools/DataType.cs b/Textify.Data.Analysis/Tools/DataType.cs index 9867d77..8f59158 100644 --- a/Textify.Data.Analysis/Tools/DataType.cs +++ b/Textify.Data.Analysis/Tools/DataType.cs @@ -84,5 +84,13 @@ internal enum DataType /// Initializes the zip files containing just the offensive words (18+) for bad word filtering /// WordsJustDirty = 16384, + /// + /// Initializes the zip files containing common words list + /// + CommonWords = 32768, + /// + /// Initializes the zip files containing common words list, including the offensive words (18+) + /// + CommonWordsDirty = 65536, } } diff --git a/Textify.Data.Analysis/Words/WordDataType.cs b/Textify.Data.Analysis/Words/WordDataType.cs index d051498..ed14031 100644 --- a/Textify.Data.Analysis/Words/WordDataType.cs +++ b/Textify.Data.Analysis/Words/WordDataType.cs @@ -44,5 +44,13 @@ public enum WordDataType /// Offensive words list (18+) for bad word filtering /// BadWords, + /// + /// Common word list + /// + CommonWords, + /// + /// Common word list, including offensive words (18+) + /// + CommonWordsDirty, } } diff --git a/Textify.Data.Analysis/Words/WordManager.cs b/Textify.Data.Analysis/Words/WordManager.cs index f069862..2004eb4 100644 --- a/Textify.Data.Analysis/Words/WordManager.cs +++ b/Textify.Data.Analysis/Words/WordManager.cs @@ -147,6 +147,8 @@ private static async Task GetWordListAsync(WordDataType type) type == WordDataType.WordsDirty ? (DataType.WordsDirty, "words_alpha", "words_alpha.txt") : type == WordDataType.WordsDirtyFull ? (DataType.WordsDirtyFull, "words", "words.txt") : type == WordDataType.BadWords ? (DataType.WordsJustDirty, "bad_words", "bad-words.txt") : + type == WordDataType.CommonWords ? (DataType.CommonWords, "words_common_clean", "words-common-clean.txt") : + type == WordDataType.CommonWordsDirty ? (DataType.CommonWordsDirty, "words_common", "words-common.txt") : throw new TextifyException("Invalid word data type"); DataInitializer.Initialize(dataType); var contentStream = new MemoryStream(DataTools.GetDataFrom(resourceName)); diff --git a/Textify.Data/DataRes/WordsData.Designer.cs b/Textify.Data/DataRes/WordsData.Designer.cs index 1340dd2..b168036 100644 --- a/Textify.Data/DataRes/WordsData.Designer.cs +++ b/Textify.Data/DataRes/WordsData.Designer.cs @@ -109,5 +109,25 @@ internal class WordsData { return ((byte[])(obj)); } } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] words_common { + get { + object obj = ResourceManager.GetObject("words_common", resourceCulture); + return ((byte[])(obj)); + } + } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] words_common_clean { + get { + object obj = ResourceManager.GetObject("words_common_clean", resourceCulture); + return ((byte[])(obj)); + } + } } } diff --git a/Textify.Data/DataRes/WordsData.resx b/Textify.Data/DataRes/WordsData.resx index 23ceeb9..32cdf4f 100644 --- a/Textify.Data/DataRes/WordsData.resx +++ b/Textify.Data/DataRes/WordsData.resx @@ -133,4 +133,10 @@ ../../assets/WordsList/bad-words.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + ../../assets/WordsList/words-common.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + ../../assets/WordsList/words-common-clean.zip;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + diff --git a/Textify.Demos.Offline/Fixtures/Cases/WordGet.cs b/Textify.Demos.Offline/Fixtures/Cases/WordGet.cs index 7ed06e7..f5459c7 100644 --- a/Textify.Demos.Offline/Fixtures/Cases/WordGet.cs +++ b/Textify.Demos.Offline/Fixtures/Cases/WordGet.cs @@ -33,6 +33,13 @@ public void RunFixture() string word = WordManager.GetRandomWord(); Console.WriteLine($"Word {i + 1}: {word}"); } + + // Get 10 random common words + for (int i = 0; i < 10; i++) + { + string word = WordManager.GetRandomWord(WordDataType.CommonWords); + Console.WriteLine($"Common word {i + 1}: {word}"); + } } } } diff --git a/assets/WordsList b/assets/WordsList index 82eb85b..26946c7 160000 --- a/assets/WordsList +++ b/assets/WordsList @@ -1 +1 @@ -Subproject commit 82eb85b4d8bea0c97cdb6ad5540428941dea945b +Subproject commit 26946c7de93469f49417ba6d6d23e978536b09a4