diff --git a/BotSharp.sln b/BotSharp.sln
index e992d26ad..102137084 100644
--- a/BotSharp.sln
+++ b/BotSharp.sln
@@ -149,6 +149,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ExcelHandle
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.ImageHandler", "src\Plugins\BotSharp.Plugin.ImageHandler\BotSharp.Plugin.ImageHandler.csproj", "{242F2D93-FCCE-4982-8075-F3052ECCA92C}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Plugin.FuzzySharp", "src\Plugins\BotSharp.Plugin.FuzzySharp\BotSharp.Plugin.FuzzySharp.csproj", "{E7C243B9-E751-B3B4-8F16-95C76CA90D31}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -629,6 +631,14 @@ Global
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|Any CPU.Build.0 = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.ActiveCfg = Release|Any CPU
{242F2D93-FCCE-4982-8075-F3052ECCA92C}.Release|x64.Build.0 = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Debug|x64.Build.0 = Debug|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.ActiveCfg = Release|Any CPU
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31}.Release|x64.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -701,6 +711,7 @@ Global
{0428DEAA-E4FE-4259-A6D8-6EDD1A9D0702} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{FC63C875-E880-D8BB-B8B5-978AB7B62983} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
{242F2D93-FCCE-4982-8075-F3052ECCA92C} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
+ {E7C243B9-E751-B3B4-8F16-95C76CA90D31} = {51AFE054-AE99-497D-A593-69BAEFB5106F}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {A9969D89-C98B-40A5-A12B-FC87E55B3A19}
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 83b31d0e2..c0899132a 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -5,6 +5,8 @@
true
+
+
@@ -18,6 +20,7 @@
+
diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs
new file mode 100644
index 000000000..71823515d
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseCollection.cs
@@ -0,0 +1,7 @@
+namespace BotSharp.Abstraction.Knowledges;
+
+public interface IPhraseCollection
+{
+ Task>> LoadVocabularyAsync();
+ Task> LoadDomainTermMappingAsync();
+}
diff --git a/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs
new file mode 100644
index 000000000..6165f67da
--- /dev/null
+++ b/src/Infrastructure/BotSharp.Abstraction/Knowledges/IPhraseService.cs
@@ -0,0 +1,12 @@
+namespace BotSharp.Abstraction.Knowledges;
+
+public interface IPhraseService
+{
+ ///
+ /// Search similar phrases in the collection
+ ///
+ ///
+ ///
+ ///
+ Task> SearchPhrasesAsync(string collection, string term);
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj
new file mode 100644
index 000000000..ec9dfde32
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/BotSharp.Plugin.FuzzySharp.csproj
@@ -0,0 +1,25 @@
+
+
+
+ $(TargetFramework)
+ enable
+ $(LangVersion)
+ $(BotSharpVersion)
+ $(GeneratePackageOnBuild)
+ $(GenerateDocumentationFile)
+ $(SolutionDir)packages
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs
new file mode 100644
index 000000000..369d3fb9c
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/MatchReason.cs
@@ -0,0 +1,21 @@
+
+namespace BotSharp.Plugin.FuzzySharp.Constants
+{
+ public static class MatchReason
+ {
+ ///
+ /// Token matched a domain term mapping (e.g., HVAC -> Air Conditioning/Heating)
+ ///
+ public const string DomainTermMapping = "domain_term_mapping";
+
+ ///
+ /// Token exactly matched a vocabulary entry
+ ///
+ public const string ExactMatch = "exact_match";
+
+ ///
+ /// Token was flagged as a potential typo and a correction was suggested
+ ///
+ public const string TypoCorrection = "typo_correction";
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs
new file mode 100644
index 000000000..8f160ae5f
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Constants/TextConstants.cs
@@ -0,0 +1,30 @@
+
+namespace BotSharp.Plugin.FuzzySharp.Constants
+{
+ public static class TextConstants
+ {
+ ///
+ /// Characters that need to be separated during tokenization (by adding spaces before and after)
+ /// Includes: parentheses, brackets, braces, punctuation marks, special symbols, etc.
+ /// This ensures "(IH)" is split into "(", "IH", ")"
+ ///
+ public static readonly char[] SeparatorChars =
+ {
+ // Parentheses and brackets
+ '(', ')', '[', ']', '{', '}',
+ // Punctuation marks
+ ',', '.', ';', ':', '!', '?',
+ // Special symbols
+ '=', '@', '#', '$', '%', '^', '&', '*', '+', '-', '\\', '|', '<', '>', '~', '`'
+ };
+
+ ///
+ /// Whitespace characters used as token separators during tokenization.
+ /// Includes: space, tab, newline, and carriage return.
+ ///
+ public static readonly char[] TokenSeparators =
+ {
+ ' ', '\t', '\n', '\r'
+ };
+ }
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs
new file mode 100644
index 000000000..dc18c73d7
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Controllers/FuzzySharpController.cs
@@ -0,0 +1,61 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.FuzzSharp.Arguments;
+using BotSharp.Abstraction.FuzzSharp.Models;
+using Microsoft.AspNetCore.Http;
+using Microsoft.AspNetCore.Mvc;
+using Microsoft.Extensions.Logging;
+
+namespace BotSharp.Plugin.FuzzySharp.Controllers
+{
+ [ApiController]
+ public class FuzzySharpController : ControllerBase
+ {
+ private readonly ITextAnalysisService _textAnalysisService;
+ private readonly ILogger _logger;
+
+ public FuzzySharpController(
+ ITextAnalysisService textAnalysisService,
+ ILogger logger)
+ {
+ _textAnalysisService = textAnalysisService;
+ _logger = logger;
+ }
+
+ ///
+ /// Analyze text for typos and entities using domain-specific vocabulary.
+ ///
+ /// Returns:
+ /// - `original`: Original input text
+ /// - `tokens`: Tokenized text (only included if `include_tokens=true`)
+ /// - `flagged`: List of flagged items (each with `match_type`):
+ /// - `domain_term_mapping` - Business abbreviations (confidence=1.0)
+ /// - `exact_match` - Exact vocabulary matches (confidence=1.0)
+ /// - `typo_correction` - Spelling corrections (confidence less than 1.0)
+ /// - `processing_time_ms`: Processing time in milliseconds
+ ///
+ /// Text analysis request
+ /// Text analysis response
+ [HttpPost("fuzzy-sharp/analyze-text")]
+ [ProducesResponseType(typeof(TextAnalysisResponse), StatusCodes.Status200OK)]
+ [ProducesResponseType(StatusCodes.Status400BadRequest)]
+ [ProducesResponseType(StatusCodes.Status500InternalServerError)]
+ public async Task AnalyzeText([FromBody] TextAnalysisRequest request)
+ {
+ try
+ {
+ if (string.IsNullOrWhiteSpace(request.Text))
+ {
+ return BadRequest(new { error = "Text is required" });
+ }
+
+ var result = await _textAnalysisService.AnalyzeTextAsync(request);
+ return Ok(result);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Error analyzing text");
+ return StatusCode(500, new { error = $"Error analyzing text: {ex.Message}" });
+ }
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs
new file mode 100644
index 000000000..79fbd9894
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Arguments/TextAnalysisRequest.cs
@@ -0,0 +1,53 @@
+using System.ComponentModel.DataAnnotations;
+using System.Text.Json.Serialization;
+
+namespace BotSharp.Abstraction.FuzzSharp.Arguments;
+
+public class TextAnalysisRequest
+{
+ ///
+ /// Text to analyze
+ ///
+ [Required]
+ [JsonPropertyName("text")]
+ public string Text { get; set; } = string.Empty;
+
+ ///
+ /// Folder path containing CSV files (will read all .csv files from the folder or its 'output' subfolder)
+ ///
+ [JsonPropertyName("vocabulary_folder_name")]
+ public string? VocabularyFolderName { get; set; }
+
+ ///
+ /// Domain term mapping CSV file
+ ///
+ [JsonPropertyName("domain_term_mapping_file")]
+ public string? DomainTermMappingFile { get; set; }
+
+ ///
+ /// Min score for suggestions (0.0-1.0)
+ ///
+ [JsonPropertyName("cutoff")]
+ [Range(0.0, 1.0)]
+ public double Cutoff { get; set; } = 0.82;
+
+ ///
+ /// Max candidates per domain (1-20)
+ ///
+ [JsonPropertyName("topk")]
+ [Range(1, 20)]
+ public int TopK { get; set; } = 5;
+
+ ///
+ /// Max n-gram size (1-10)
+ ///
+ [JsonPropertyName("max_ngram")]
+ [Range(1, 10)]
+ public int MaxNgram { get; set; } = 5;
+
+ ///
+ /// Include tokens field in response (default: False)
+ ///
+ [JsonPropertyName("include_tokens")]
+ public bool IncludeTokens { get; set; } = false;
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs
new file mode 100644
index 000000000..c2d91b0e4
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/INgramProcessor.cs
@@ -0,0 +1,27 @@
+using BotSharp.Abstraction.FuzzSharp.Models;
+
+namespace BotSharp.Abstraction.FuzzSharp
+{
+ public interface INgramProcessor
+ {
+ ///
+ /// Process tokens and generate all possible n-gram match results
+ ///
+ /// List of tokens to process
+ /// Vocabulary (domain type -> vocabulary set)
+ /// Domain term mapping
+ /// Lookup table (lowercase vocabulary -> (canonical form, domain type list))
+ /// Maximum n-gram length
+ /// Minimum confidence threshold for fuzzy matching
+ /// Maximum number of matches to return
+ /// List of flagged items
+ List ProcessNgrams(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary domainTermMapping,
+ Dictionary DomainTypes)> lookup,
+ int maxNgram,
+ double cutoff,
+ int topK);
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs
new file mode 100644
index 000000000..b406f9348
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/IResultProcessor.cs
@@ -0,0 +1,18 @@
+using BotSharp.Abstraction.FuzzSharp.Models;
+
+namespace BotSharp.Abstraction.FuzzSharp
+{
+ ///
+ /// Result processor interface
+ /// Responsible for processing match results, including deduplication and sorting
+ ///
+ public interface IResultProcessor
+ {
+ ///
+ /// Process a list of flagged items, removing overlapping duplicates and sorting
+ ///
+ /// List of flagged items to process
+ /// Processed list of flagged items (deduplicated and sorted)
+ List ProcessResults(List flagged);
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITextAnalysisService.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITextAnalysisService.cs
new file mode 100644
index 000000000..4add4f62b
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITextAnalysisService.cs
@@ -0,0 +1,13 @@
+using BotSharp.Abstraction.FuzzSharp.Arguments;
+using BotSharp.Abstraction.FuzzSharp.Models;
+
+namespace BotSharp.Abstraction.FuzzSharp
+{
+ public interface ITextAnalysisService
+ {
+ ///
+ /// Analyze text for typos and entities using domain-specific vocabulary
+ ///
+ Task AnalyzeTextAsync(TextAnalysisRequest request);
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs
new file mode 100644
index 000000000..5e0b04ac5
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/ITokenMatcher.cs
@@ -0,0 +1,40 @@
+namespace BotSharp.Abstraction.FuzzSharp
+{
+ public interface ITokenMatcher
+ {
+ ///
+ /// Try to match a content span and return a match result
+ ///
+ /// The matching context containing all necessary information
+ /// Match result if found, null otherwise
+ MatchResult? TryMatch(MatchContext context);
+
+ ///
+ /// Priority of this matcher (higher priority matchers are tried first)
+ ///
+ int Priority { get; }
+ }
+
+ ///
+ /// Context information for token matching
+ ///
+ public record MatchContext(
+ string ContentSpan,
+ string ContentLow,
+ int StartIndex,
+ int NgramLength,
+ Dictionary> Vocabulary,
+ Dictionary DomainTermMapping,
+ Dictionary DomainTypes)> Lookup,
+ double Cutoff,
+ int TopK);
+
+ ///
+ /// Result of a token match
+ ///
+ public record MatchResult(
+ string CanonicalForm,
+ List DomainTypes,
+ string MatchType,
+ double Confidence);
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs
new file mode 100644
index 000000000..8dc547d48
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/FlaggedItem.cs
@@ -0,0 +1,51 @@
+using System.Text.Json.Serialization;
+
+namespace BotSharp.Abstraction.FuzzSharp.Models
+{
+ public class FlaggedItem
+ {
+ ///
+ /// Token index in the original text
+ ///
+ [JsonPropertyName("index")]
+ public int Index { get; set; }
+
+ ///
+ /// Original token text
+ ///
+ [JsonPropertyName("token")]
+ public string Token { get; set; } = string.Empty;
+
+ ///
+ /// Domain types where this token was found (e.g., ['client_Profile.Name', 'data_ServiceType.Name'])
+ ///
+ [JsonPropertyName("domain_types")]
+ public List DomainTypes { get; set; } = new();
+
+ ///
+ /// Type of match: 'domain_term_mapping' (business abbreviations, confidence=1.0) |
+ /// 'exact_match' (vocabulary matches, confidence=1.0) |
+ /// 'typo_correction' (spelling corrections, confidence less than 1.0)
+ ///
+ [JsonPropertyName("match_type")]
+ public string MatchType { get; set; } = string.Empty;
+
+ ///
+ /// Canonical form or suggested correction
+ ///
+ [JsonPropertyName("canonical_form")]
+ public string CanonicalForm { get; set; } = string.Empty;
+
+ ///
+ /// Confidence score (0.0-1.0, where 1.0 is exact match)
+ ///
+ [JsonPropertyName("confidence")]
+ public double Confidence { get; set; }
+
+ ///
+ /// N-gram length (number of tokens in this match). Internal field, not included in JSON output.
+ ///
+ [JsonIgnore]
+ public int NgramLength { get; set; }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs
new file mode 100644
index 000000000..131a53b49
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzSharp/Models/TextAnalysisResponse.cs
@@ -0,0 +1,31 @@
+using System.Text.Json.Serialization;
+
+namespace BotSharp.Abstraction.FuzzSharp.Models;
+
+public class TextAnalysisResponse
+{
+ ///
+ /// Original text
+ ///
+ [JsonPropertyName("original")]
+ public string Original { get; set; } = string.Empty;
+
+ ///
+ /// Tokenized text (only included if include_tokens=true)
+ ///
+ [JsonPropertyName("tokens")]
+ [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
+ public List? Tokens { get; set; }
+
+ ///
+ /// Flagged items (filter by 'match_type' field: 'domain_term_mapping', 'exact_match', or 'typo_correction')
+ ///
+ [JsonPropertyName("flagged")]
+ public List Flagged { get; set; } = new();
+
+ ///
+ /// Processing time in milliseconds
+ ///
+ [JsonPropertyName("processing_time_ms")]
+ public double ProcessingTimeMs { get; set; }
+}
\ No newline at end of file
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs
new file mode 100644
index 000000000..412ddfa9c
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs
@@ -0,0 +1,29 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Abstraction.Plugins;
+using BotSharp.Plugin.FuzzySharp.Services;
+using BotSharp.Plugin.FuzzySharp.Services.Matching;
+using BotSharp.Plugin.FuzzySharp.Services.Processors;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+
+namespace BotSharp.Plugin.FuzzySharp;
+
+public class FuzzySharpPlugin : IBotSharpPlugin
+{
+ public string Id => "379e6f7b-c58c-458b-b8cd-0374e5830711";
+ public string Name => "Fuzzy Sharp";
+ public string Description => "Analyze text for typos and entities using domain-specific vocabulary.";
+ public string IconUrl => "https://cdn-icons-png.flaticon.com/512/9592/9592995.png";
+
+ public void RegisterDI(IServiceCollection services, IConfiguration config)
+ {
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ services.AddScoped();
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs
new file mode 100644
index 000000000..eb6d4243b
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/CsvPhraseCollectionLoader.cs
@@ -0,0 +1,188 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Core.Infrastructures;
+using CsvHelper;
+using CsvHelper.Configuration;
+using Microsoft.Extensions.Logging;
+using System.Globalization;
+using System.IO;
+
+namespace BotSharp.Plugin.FuzzySharp.Services;
+
+public class CsvPhraseCollectionLoader : IPhraseCollection
+{
+ private readonly ILogger _logger;
+
+ public CsvPhraseCollectionLoader(ILogger logger)
+ {
+ _logger = logger;
+ }
+
+ [SharpCache(60)]
+ public async Task>> LoadVocabularyAsync()
+ {
+ string foldername = "";
+ var vocabulary = new Dictionary>();
+
+ if (string.IsNullOrEmpty(foldername))
+ {
+ return vocabulary;
+ }
+
+ // Load CSV files from the folder
+ var csvFileDict = await LoadCsvFilesFromFolderAsync(foldername);
+ if (csvFileDict.Count == 0)
+ {
+ return vocabulary;
+ }
+
+ // Load each CSV file
+ foreach (var (domainType, filePath) in csvFileDict)
+ {
+ try
+ {
+ var terms = await LoadCsvFileAsync(filePath);
+ vocabulary[domainType] = terms;
+ _logger.LogInformation($"Loaded {terms.Count} terms for domain type '{domainType}' from {filePath}");
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, $"Error loading CSV file for domain type '{domainType}': {filePath}");
+ }
+ }
+
+ return vocabulary;
+ }
+
+ [SharpCache(60)]
+ public async Task> LoadDomainTermMappingAsync()
+ {
+ string filename = "";
+ var result = new Dictionary();
+ if (string.IsNullOrWhiteSpace(filename))
+ {
+ return result;
+ }
+
+ var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp");
+ var filePath = Path.Combine(searchFolder, filename);
+
+ if (string.IsNullOrEmpty(filePath) || !File.Exists(filePath))
+ {
+ return result;
+ }
+
+ try
+ {
+ using var reader = new StreamReader(filePath);
+ using var csv = new CsvReader(reader, CreateCsvConfig());
+
+ await csv.ReadAsync();
+ csv.ReadHeader();
+
+ if (!HasRequiredColumns(csv))
+ {
+ _logger.LogWarning("Domain term mapping file missing required columns: {FilePath}", filePath);
+ return result;
+ }
+
+ while (await csv.ReadAsync())
+ {
+ var term = csv.GetField("term") ?? string.Empty;
+ var dbPath = csv.GetField("dbPath") ?? string.Empty;
+ var canonicalForm = csv.GetField("canonical_form") ?? string.Empty;
+
+ if (term.Length == 0 || dbPath.Length == 0 || canonicalForm.Length == 0)
+ {
+ _logger.LogWarning(
+ "Missing column(s) in CSV at row {Row}: term={Term}, dbPath={DbPath}, canonical_form={CanonicalForm}",
+ csv.Parser.RawRow,
+ term ?? "",
+ dbPath ?? "",
+ canonicalForm ?? "");
+ continue;
+ }
+
+ var key = term.ToLowerInvariant();
+ result[key] = (dbPath, canonicalForm);
+ }
+
+ _logger.LogInformation("Loaded domain term mapping from {FilePath}: {Count} terms", filePath, result.Count);
+ }
+ catch (Exception ex)
+ {
+ _logger.LogError(ex, "Error loading domain term mapping file: {FilePath}", filePath);
+ }
+
+ return result;
+ }
+
+ private async Task> LoadCsvFileAsync(string filePath)
+ {
+ var terms = new HashSet(StringComparer.OrdinalIgnoreCase);
+
+ if (!File.Exists(filePath))
+ {
+ _logger.LogWarning($"CSV file does not exist: {filePath}");
+ return terms;
+ }
+
+ using var reader = new StreamReader(filePath);
+ using var csv = new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
+ {
+ HasHeaderRecord = false // No header in the CSV files
+ });
+
+ while (await csv.ReadAsync())
+ {
+ // Read the first column (assuming it contains the terms)
+ var term = csv.GetField(0);
+ if (!string.IsNullOrWhiteSpace(term))
+ {
+ terms.Add(term.Trim());
+ }
+ }
+
+ _logger.LogInformation($"Loaded {terms.Count} terms from {Path.GetFileName(filePath)}");
+ return terms;
+ }
+
+ private async Task> LoadCsvFilesFromFolderAsync(string folderName)
+ {
+ var csvFileDict = new Dictionary();
+ var searchFolder = Path.Combine(AppContext.BaseDirectory, "data", "plugins", "fuzzySharp", folderName);
+ if (!Directory.Exists(searchFolder))
+ {
+ _logger.LogWarning($"Folder does not exist: {searchFolder}");
+ return csvFileDict;
+ }
+
+ var csvFiles = Directory.GetFiles(searchFolder, "*.csv");
+ foreach (var file in csvFiles)
+ {
+ var fileName = Path.GetFileNameWithoutExtension(file);
+ csvFileDict[fileName] = file;
+ }
+
+ _logger.LogInformation($"Loaded {csvFileDict.Count} CSV files from {searchFolder}");
+ return await Task.FromResult(csvFileDict);
+ }
+
+ private static CsvConfiguration CreateCsvConfig()
+ {
+ return new CsvConfiguration(CultureInfo.InvariantCulture)
+ {
+ HasHeaderRecord = true,
+ DetectColumnCountChanges = true,
+ MissingFieldFound = null
+ };
+ }
+
+ private static bool HasRequiredColumns(CsvReader csv)
+ {
+ return csv.HeaderRecord is { Length: > 0 } headers
+ && headers.Contains("term")
+ && headers.Contains("dbPath")
+ && headers.Contains("canonical_form");
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/DomainTermMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/DomainTermMatcher.cs
new file mode 100644
index 000000000..e8813013d
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/DomainTermMatcher.cs
@@ -0,0 +1,24 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching
+{
+ public class DomainTermMatcher : ITokenMatcher
+ {
+ public int Priority => 3; // Highest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ if (context.DomainTermMapping.TryGetValue(context.ContentLow, out var match))
+ {
+ return new MatchResult(
+ CanonicalForm: match.CanonicalForm,
+ DomainTypes: new List { match.DbPath },
+ MatchType: MatchReason.DomainTermMapping,
+ Confidence: 1.0);
+ }
+
+ return null;
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs
new file mode 100644
index 000000000..f404f47b8
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/ExactMatcher.cs
@@ -0,0 +1,24 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching
+{
+ public class ExactMatcher : ITokenMatcher
+ {
+ public int Priority => 2; // Second highest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ if (context.Lookup.TryGetValue(context.ContentLow, out var match))
+ {
+ return new MatchResult(
+ CanonicalForm: match.CanonicalForm,
+ DomainTypes: match.DomainTypes,
+ MatchType: MatchReason.ExactMatch,
+ Confidence: 1.0);
+ }
+
+ return null;
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs
new file mode 100644
index 000000000..c6b3ba477
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Matching/FuzzyMatcher.cs
@@ -0,0 +1,82 @@
+using BotSharp.Abstraction.FuzzSharp;
+using System.Text.RegularExpressions;
+using FuzzySharp;
+using FuzzySharp.SimilarityRatio;
+using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Matching
+{
+ public class FuzzyMatcher : ITokenMatcher
+ {
+ public int Priority => 1; // Lowest priority
+
+ public MatchResult? TryMatch(MatchContext context)
+ {
+ var match = CheckTypoCorrection(context.ContentSpan, context.Lookup, context.Cutoff);
+ if (match == null)
+ {
+ return null;
+ }
+
+ var (canonicalForm, domainTypes, confidence) = match.Value;
+ return new MatchResult(
+ CanonicalForm: canonicalForm,
+ DomainTypes: domainTypes,
+ MatchType: MatchReason.TypoCorrection,
+ Confidence: confidence);
+ }
+
+ ///
+ /// Check typo correction using fuzzy matching
+ ///
+ private (string CanonicalForm, List DomainTypes, double Confidence)? CheckTypoCorrection(
+ string contentSpan,
+ Dictionary DomainTypes)> lookup,
+ double cutoff)
+ {
+ // Convert cutoff to 0-100 scale for FuzzySharp
+ var scoreCutoff = (int)(cutoff * 100);
+
+ // Get all candidates from lookup
+ var candidates = lookup.Keys.ToList();
+
+ // Find best match using ExtractOne
+ var scorer = ScorerCache.Get();
+ var result = Process.ExtractOne(
+ contentSpan,
+ candidates,
+ candidate => Normalize(candidate), // Preprocessor function
+ scorer,
+ scoreCutoff // Score cutoff
+ );
+
+ if (result == null)
+ {
+ return null;
+ }
+
+ // Get the canonical form and domain types from lookup
+ var match = lookup[result.Value];
+ return (match.CanonicalForm, match.DomainTypes, Math.Round(result.Score / 100.0, 3));
+ }
+
+ ///
+ /// Normalize text for fuzzy matching comparison
+ /// - Replaces all non-word characters (except apostrophes) with spaces
+ /// - Converts to lowercase
+ /// - Collapses multiple spaces into single space
+ /// - Trims leading/trailing whitespace
+ /// Example: "Test-Value (123)" → "test value 123"
+ ///
+ /// Text to normalize
+ /// Normalized text suitable for fuzzy matching
+ private string Normalize(string text)
+ {
+ // Replace non-word characters (except apostrophes) with spaces
+ var normalized = Regex.Replace(text, @"[^\w']+", " ", RegexOptions.IgnoreCase);
+ // Convert to lowercase, collapse multiple spaces, and trim
+ return Regex.Replace(normalized.ToLowerInvariant(), @"\s+", " ").Trim();
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs
new file mode 100644
index 000000000..d28829a16
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/NgramProcessor.cs
@@ -0,0 +1,134 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.FuzzSharp.Models;
+using BotSharp.Plugin.FuzzySharp.Constants;
+using BotSharp.Plugin.FuzzySharp.Utils;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Processors
+{
+ public class NgramProcessor : INgramProcessor
+ {
+ private readonly List _matchers;
+
+ public NgramProcessor(IEnumerable matchers)
+ {
+ // Sort matchers by priority (highest first)
+ _matchers = matchers.OrderByDescending(m => m.Priority).ToList();
+ }
+
+ public List ProcessNgrams(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary domainTermMapping,
+ Dictionary DomainTypes)> lookup,
+ int maxNgram,
+ double cutoff,
+ int topK)
+ {
+ var flagged = new List();
+
+ // Process n-grams from largest to smallest
+ for (int n = maxNgram; n >= 1; n--)
+ {
+ for (int i = 0; i <= tokens.Count - n; i++)
+ {
+ var item = ProcessSingleNgram(
+ tokens,
+ i,
+ n,
+ vocabulary,
+ domainTermMapping,
+ lookup,
+ cutoff,
+ topK);
+
+ if (item != null)
+ {
+ flagged.Add(item);
+ }
+ }
+ }
+
+ return flagged;
+ }
+
+ ///
+ /// Process a single n-gram at the specified position
+ ///
+ private FlaggedItem? ProcessSingleNgram(
+ List tokens,
+ int startIdx,
+ int n,
+ Dictionary> vocabulary,
+ Dictionary domainTermMapping,
+ Dictionary DomainTypes)> lookup,
+ double cutoff,
+ int topK)
+ {
+ // Extract content span
+ var (contentSpan, spanTokens, contentIndices) = ExtractContentSpan(tokens, startIdx, n);
+ if (string.IsNullOrWhiteSpace(contentSpan))
+ {
+ return null;
+ }
+
+ var contentLow = contentSpan.ToLowerInvariant();
+
+ // Try matching in priority order using matchers
+ var context = new MatchContext(
+ contentSpan,
+ contentLow,
+ startIdx,
+ n,
+ vocabulary,
+ domainTermMapping,
+ lookup,
+ cutoff,
+ topK);
+
+ foreach (var matcher in _matchers)
+ {
+ var matchResult = matcher.TryMatch(context);
+ if (matchResult != null)
+ {
+ return CreateFlaggedItem(matchResult, startIdx, contentSpan, n);
+ }
+ }
+
+ return null;
+ }
+
+ ///
+ /// Create a FlaggedItem from a MatchResult
+ ///
+ private FlaggedItem CreateFlaggedItem(
+ MatchResult matchResult,
+ int startIndex,
+ string contentSpan,
+ int ngramLength)
+ {
+ return new FlaggedItem
+ {
+ Index = startIndex,
+ Token = contentSpan,
+ DomainTypes = matchResult.DomainTypes,
+ MatchType = matchResult.MatchType,
+ CanonicalForm = matchResult.CanonicalForm,
+ Confidence = matchResult.Confidence,
+ NgramLength = ngramLength
+ };
+ }
+
+ ///
+ /// Extract content span
+ ///
+ private (string ContentSpan, List Tokens, List ContentIndices) ExtractContentSpan(
+ List tokens,
+ int startIdx,
+ int n)
+ {
+ var span = tokens.Skip(startIdx).Take(n).ToList();
+ var indices = Enumerable.Range(startIdx, n).ToList();
+ return (string.Join(" ", span), span, indices);
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs
new file mode 100644
index 000000000..2238b6153
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/Processors/ResultProcessor.cs
@@ -0,0 +1,103 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.FuzzSharp.Models;
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Services.Processors
+{
+ public class ResultProcessor : IResultProcessor
+ {
+ public List ProcessResults(List flagged)
+ {
+ // Remove overlapping duplicates
+ var deduped = RemoveOverlappingDuplicates(flagged);
+
+ // Sort by confidence (descending), then match_type (alphabetically)
+ // This matches Python's _sort_and_format_results function
+ return deduped
+ .OrderByDescending(f => f.Confidence)
+ .ThenBy(f => f.MatchType)
+ .ToList();
+ }
+
+ ///
+ /// Remove overlapping detections with the same canonical form.
+ /// When multiple detections overlap and have the same canonical_form,
+ /// keep only the best one based on:
+ /// 1. Prefer domain_term_mapping over exact_match over typo_correction (matches matcher priority)
+ /// 2. Highest confidence
+ /// 3. Shortest n-gram length
+ ///
+ private List RemoveOverlappingDuplicates(List flagged)
+ {
+ var deduped = new List();
+ var skipIndices = new HashSet();
+
+ for (int i = 0; i < flagged.Count; i++)
+ {
+ if (skipIndices.Contains(i))
+ {
+ continue;
+ }
+
+ var item = flagged[i];
+ var itemRange = (item.Index, item.Index + item.NgramLength);
+
+ // Find all overlapping items with same canonical_form (regardless of match_type)
+ var overlappingGroup = new List { item };
+ for (int j = i + 1; j < flagged.Count; j++)
+ {
+ if (skipIndices.Contains(j))
+ {
+ continue;
+ }
+
+ var other = flagged[j];
+ if (item.CanonicalForm == other.CanonicalForm)
+ {
+ var otherRange = (other.Index, other.Index + other.NgramLength);
+ if (RangesOverlap(itemRange, otherRange))
+ {
+ overlappingGroup.Add(other);
+ skipIndices.Add(j);
+ }
+ }
+ }
+
+ // Keep the best item from the overlapping group
+ // Priority: domain_term_mapping (3) > exact_match (2) > typo_correction (1)
+ // Then highest confidence, then shortest ngram
+ var bestItem = overlappingGroup
+ .OrderByDescending(x => GetMatchTypePriority(x.MatchType))
+ .ThenByDescending(x => x.Confidence)
+ .ThenBy(x => x.NgramLength)
+ .First();
+ deduped.Add(bestItem);
+ }
+
+ return deduped;
+ }
+
+ ///
+ /// Get priority value for match type (higher is better)
+ /// Matches the priority order in matchers: domain > exact > fuzzy
+ ///
+ private int GetMatchTypePriority(string matchType)
+ {
+ return matchType switch
+ {
+ MatchReason.DomainTermMapping => 3, // Highest priority
+ MatchReason.ExactMatch => 2, // Second priority
+ MatchReason.TypoCorrection => 1, // Lowest priority
+ _ => 0 // Unknown types get lowest priority
+ };
+ }
+
+ ///
+ /// Check if two token ranges overlap.
+ ///
+ private bool RangesOverlap((int start, int end) range1, (int start, int end) range2)
+ {
+ return range1.start < range2.end && range2.start < range1.end;
+ }
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/TextAnalysisService.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/TextAnalysisService.cs
new file mode 100644
index 000000000..969ff1a65
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/TextAnalysisService.cs
@@ -0,0 +1,173 @@
+using BotSharp.Abstraction.FuzzSharp;
+using BotSharp.Abstraction.FuzzSharp.Arguments;
+using BotSharp.Abstraction.FuzzSharp.Models;
+using BotSharp.Abstraction.Knowledges;
+using BotSharp.Plugin.FuzzySharp.Utils;
+using Microsoft.Extensions.Logging;
+using System.Diagnostics;
+
+namespace BotSharp.Plugin.FuzzySharp.Services;
+
+public class TextAnalysisService : ITextAnalysisService
+{
+ private readonly ILogger _logger;
+ private readonly IEnumerable _phraseLoaderServices;
+ private readonly INgramProcessor _ngramProcessor;
+ private readonly IResultProcessor _resultProcessor;
+
+ public TextAnalysisService(
+ ILogger logger,
+ IEnumerable phraseLoaderServices,
+ INgramProcessor ngramProcessor,
+ IResultProcessor resultProcessor)
+ {
+ _logger = logger;
+ _phraseLoaderServices = phraseLoaderServices;
+ _ngramProcessor = ngramProcessor;
+ _resultProcessor = resultProcessor;
+ }
+
+ ///
+ /// Analyze text for typos and entities using domain-specific vocabulary
+ ///
+ public async Task AnalyzeTextAsync(TextAnalysisRequest request)
+ {
+ var stopwatch = Stopwatch.StartNew();
+ try
+ {
+ // Tokenize the text
+ var tokens = TextTokenizer.Tokenize(request.Text);
+
+ // Load vocabulary
+ var vocabulary = await LoadAllVocabularyAsync();
+
+ // Load domain term mapping
+ var domainTermMapping = await LoadAllDomainTermMappingAsync();
+
+ // Analyze text
+ var flagged = AnalyzeTokens(tokens, vocabulary, domainTermMapping, request);
+
+ stopwatch.Stop();
+
+ var response = new TextAnalysisResponse
+ {
+ Original = request.Text,
+ Flagged = flagged,
+ ProcessingTimeMs = Math.Round(stopwatch.Elapsed.TotalMilliseconds, 2)
+ };
+
+ if (request.IncludeTokens)
+ {
+ response.Tokens = tokens;
+ }
+
+ _logger.LogInformation(
+ $"Text analysis completed in {response.ProcessingTimeMs}ms | " +
+ $"Text length: {request.Text.Length} chars | " +
+ $"Flagged items: {flagged.Count}");
+
+ return response;
+ }
+ catch (Exception ex)
+ {
+ stopwatch.Stop();
+ _logger.LogError(ex, $"Error analyzing text after {stopwatch.Elapsed.TotalMilliseconds}ms");
+ throw;
+ }
+ }
+
+ public async Task>> LoadAllVocabularyAsync()
+ {
+ var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadVocabularyAsync()));
+ var merged = new Dictionary>();
+
+ foreach (var dict in results)
+ {
+ foreach (var kvp in dict)
+ {
+ if (!merged.TryGetValue(kvp.Key, out var set))
+ merged[kvp.Key] = new HashSet(kvp.Value);
+ else
+ set.UnionWith(kvp.Value);
+ }
+ }
+
+ return merged;
+ }
+
+ public async Task> LoadAllDomainTermMappingAsync()
+ {
+ var results = await Task.WhenAll(_phraseLoaderServices.Select(c => c.LoadDomainTermMappingAsync()));
+ var merged = new Dictionary();
+
+ foreach (var dict in results)
+ {
+ foreach (var kvp in dict)
+ merged[kvp.Key] = kvp.Value; // later entries override earlier ones
+ }
+
+ return merged;
+ }
+
+ ///
+ /// Analyze tokens for typos and entities
+ ///
+ private List AnalyzeTokens(
+ List tokens,
+ Dictionary> vocabulary,
+ Dictionary domainTermMapping,
+ TextAnalysisRequest request)
+ {
+ // Build lookup table for O(1) exact match lookups (matching Python's build_lookup)
+ var lookup = BuildLookup(vocabulary);
+
+ // Process n-grams and find matches
+ var flagged = _ngramProcessor.ProcessNgrams(
+ tokens,
+ vocabulary,
+ domainTermMapping,
+ lookup,
+ request.MaxNgram,
+ request.Cutoff,
+ request.TopK);
+
+ // Process results: deduplicate and sort
+ return _resultProcessor.ProcessResults(flagged);
+ }
+
+ ///
+ /// Build a lookup dictionary mapping lowercase terms to their canonical form and domain types.
+ /// This is a performance optimization - instead of iterating through all domains for each lookup,
+ /// we build a flat dictionary once at the start.
+ ///
+ /// Matches Python's build_lookup() function.
+ ///
+ private Dictionary DomainTypes)> BuildLookup(
+ Dictionary> vocabulary)
+ {
+ var lookup = new Dictionary DomainTypes)>();
+
+ foreach (var (domainType, terms) in vocabulary)
+ {
+ foreach (var term in terms)
+ {
+ var key = term.ToLowerInvariant();
+ if (lookup.TryGetValue(key, out var existing))
+ {
+ // Term already exists - add this domain type to the list if not already there
+ if (!existing.DomainTypes.Contains(domainType))
+ {
+ existing.DomainTypes.Add(domainType);
+ }
+ }
+ else
+ {
+ // New term - create entry with single type in list
+ lookup[key] = (term, new List { domainType });
+ }
+ }
+ }
+
+ return lookup;
+ }
+}
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs
new file mode 100644
index 000000000..568fe81d5
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Using.cs
@@ -0,0 +1,10 @@
+global using System;
+global using System.Collections.Generic;
+global using System.Linq;
+global using System.Net.Http;
+global using System.Net.Mime;
+global using System.Text;
+global using System.Text.Json;
+global using System.Threading;
+global using System.Threading.Tasks;
+
diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs
new file mode 100644
index 000000000..2ccb6ba2f
--- /dev/null
+++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Utils/TextTokenizer.cs
@@ -0,0 +1,64 @@
+using BotSharp.Plugin.FuzzySharp.Constants;
+
+namespace BotSharp.Plugin.FuzzySharp.Utils
+{
+ public static class TextTokenizer
+ {
+ ///
+ /// Preprocess text: add spaces before and after characters that need to be separated
+ /// This allows subsequent simple whitespace tokenization to correctly separate these characters
+ /// Example: "(IH)" -> " ( IH ) " -> ["(", "IH", ")"]
+ ///
+ /// Text to preprocess
+ /// Preprocessed text
+ public static string PreprocessText(string text)
+ {
+ if (string.IsNullOrWhiteSpace(text))
+ {
+ return text;
+ }
+
+ var result = new StringBuilder(text.Length * 2);
+
+ foreach (var ch in text)
+ {
+ // If it's a character that needs to be separated, add spaces before and after
+ if (TextConstants.SeparatorChars.Contains(ch))
+ {
+ result.Append(' ');
+ result.Append(ch);
+ result.Append(' ');
+ }
+ else
+ {
+ result.Append(ch);
+ }
+ }
+
+ return result.ToString();
+ }
+
+ ///
+ /// Simple whitespace tokenization
+ /// Should be called after preprocessing text with PreprocessText
+ ///
+ /// Text to tokenize
+ /// List of tokens
+ public static List SimpleTokenize(string text)
+ {
+ return text.Split(TextConstants.TokenSeparators, StringSplitOptions.RemoveEmptyEntries).ToList();
+ }
+
+ ///
+ /// Complete tokenization flow: preprocessing + tokenization
+ /// This is the recommended usage
+ ///
+ /// Text to tokenize
+ /// List of tokens
+ public static List Tokenize(string text)
+ {
+ var preprocessed = PreprocessText(text);
+ return SimpleTokenize(preprocessed);
+ }
+ }
+}
diff --git a/src/WebStarter/WebStarter.csproj b/src/WebStarter/WebStarter.csproj
index 5a7c6eb7b..082ac578e 100644
--- a/src/WebStarter/WebStarter.csproj
+++ b/src/WebStarter/WebStarter.csproj
@@ -37,6 +37,7 @@
+
diff --git a/src/WebStarter/appsettings.json b/src/WebStarter/appsettings.json
index 57dd1c50d..2ca33f390 100644
--- a/src/WebStarter/appsettings.json
+++ b/src/WebStarter/appsettings.json
@@ -846,7 +846,8 @@
"BotSharp.Plugin.ExcelHandler",
"BotSharp.Plugin.SqlDriver",
"BotSharp.Plugin.TencentCos",
- "BotSharp.Plugin.PythonInterpreter"
+ "BotSharp.Plugin.PythonInterpreter",
+ "BotSharp.Plugin.FuzzySharp"
]
}
}