diff --git a/src/Infrastructure/BotSharp.Abstraction/Entity/IEntityDataLoader.cs b/src/Infrastructure/BotSharp.Abstraction/Entity/IEntityDataLoader.cs index caf8fe227..8e81aeb7a 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Entity/IEntityDataLoader.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Entity/IEntityDataLoader.cs @@ -1,3 +1,5 @@ +using BotSharp.Abstraction.Entity.Models; + namespace BotSharp.Abstraction.Entity; public interface IEntityDataLoader @@ -15,4 +17,18 @@ public interface IEntityDataLoader /// /// Task> LoadSynonymMappingAsync(); + + /// + /// Context-aware vocabulary load. Default implementation delegates to the + /// parameterless version for loaders that don't need runtime parameters. + /// + Task>> LoadVocabularyAsync(EntityDataLoadContext ctx) + => LoadVocabularyAsync(); + + /// + /// Context-aware synonym load. Default implementation delegates to the + /// parameterless version for loaders that don't need runtime parameters. + /// + Task> LoadSynonymMappingAsync(EntityDataLoadContext ctx) + => LoadSynonymMappingAsync(); } diff --git a/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityAnalysisOptions.cs b/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityAnalysisOptions.cs index 53f3e4b23..155bd8cb0 100644 --- a/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityAnalysisOptions.cs +++ b/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityAnalysisOptions.cs @@ -9,6 +9,14 @@ public class EntityAnalysisOptions [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public IEnumerable? DataProviders { get; set; } + /// + /// Free-form parameters forwarded to implementations. + /// Each loader documents the keys it recognizes (e.g. "graphId" for graph-backed loaders). + /// + [JsonPropertyName("loader_parameters")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public IDictionary? LoaderParameters { get; set; } + /// /// Maximum n-gram size /// diff --git a/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityDataLoadContext.cs b/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityDataLoadContext.cs new file mode 100644 index 000000000..d9ee699c7 --- /dev/null +++ b/src/Infrastructure/BotSharp.Abstraction/Entity/Models/EntityDataLoadContext.cs @@ -0,0 +1,16 @@ +namespace BotSharp.Abstraction.Entity.Models; + +/// +/// Loader-facing context carrying free-form parameters from the caller +/// (e.g. via ). +/// Each implementation defines which keys it +/// recognizes (document them on the concrete loader's XML doc). +/// +public class EntityDataLoadContext +{ + /// + /// Case-insensitive key/value bag (e.g. "graphId", "tenantId"). + /// + public IDictionary Parameters { get; init; } + = new Dictionary(StringComparer.OrdinalIgnoreCase); +} diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs index ea3bc048f..ca9b2f545 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/FuzzySharpPlugin.cs @@ -22,6 +22,8 @@ public void RegisterDI(IServiceCollection services, IConfiguration config) services.AddScoped(); services.AddScoped(); services.AddScoped(); + services.AddScoped(); + services.AddScoped(sp => sp.GetRequiredService()); services.AddScoped(); services.AddScoped(); diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/MembaseNERDataLoader.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/MembaseNERDataLoader.cs new file mode 100644 index 000000000..42a8fa458 --- /dev/null +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/DataLoaders/MembaseNERDataLoader.cs @@ -0,0 +1,237 @@ +using BotSharp.Abstraction.Graph; +using BotSharp.Abstraction.Graph.Options; +using BotSharp.Abstraction.Infrastructures; +using Microsoft.Extensions.Logging; +using System.Text.RegularExpressions; + +namespace BotSharp.Plugin.FuzzySharp.Services.DataLoaders; + +/// +/// Loads NER vocabulary / synonyms from a graph database (e.g. Membase) per-call. +/// Required context parameter: +/// - "graphId": target graph identifier (non-empty string). +/// Vocabulary schemas are configured per-tenant under +/// FuzzySharp:Membase:Tenants:<alias>:Schema (committed in appsettings), +/// with the tenant's environment-specific GraphId supplied via user-secrets +/// or appsettings.{Environment}.json. The loader resolves the incoming graphId to a +/// tenant via the merged configuration. Each label yields +/// MATCH (n:Label) RETURN n.Property AS text. Synonyms still read the flat +/// (:Synonym {table, column, term, canonical_form}) schema. +/// Exposes InvalidateCacheAsync(graphId) so write paths can force a refresh. +/// +public class MembaseNERDataLoader : IEntityDataLoader +{ + private const string GraphIdKey = "graphId"; + private const string CacheKeyPrefix = "fuzzysharp:ner"; + private const int CacheMinutes = 60; + private const string GraphDbProvider = "membase"; + + private const string SynonymCypher = + "MATCH (n:Synonym) RETURN n.table AS table, n.column AS column, n.term AS term, n.canonical_form AS canonical_form"; + + private static readonly Regex IdentifierRegex = new("^[A-Za-z_][A-Za-z0-9_]*$", RegexOptions.Compiled); + + private readonly ILogger _logger; + private readonly IEnumerable _graphDbs; + private readonly ICacheService _cache; + private readonly FuzzySharpSettings _settings; + + public MembaseNERDataLoader( + ILogger logger, + IEnumerable graphDbs, + ICacheService cache, + FuzzySharpSettings settings) + { + _logger = logger; + _graphDbs = graphDbs; + _cache = cache; + _settings = settings; + } + + public string Provider => "fuzzy-sharp-membase"; + + private static string VocabKey(string graphId) => $"{CacheKeyPrefix}:vocab:{graphId}"; + private static string SynonymKey(string graphId) => $"{CacheKeyPrefix}:synonym:{graphId}"; + + // The parameterless overloads don't make sense for a graph-backed loader. + // Caller must supply a graphId via EntityDataLoadContext. + public Task>> LoadVocabularyAsync() + => Task.FromResult(new Dictionary>()); + + public Task> LoadSynonymMappingAsync() + => Task.FromResult(new Dictionary()); + + public Task>> LoadVocabularyAsync(EntityDataLoadContext ctx) + { + if (!TryGetGraphId(ctx, out var graphId)) + { + return Task.FromResult(new Dictionary>()); + } + return LoadVocabularyByGraphIdAsync(graphId); + } + + public Task> LoadSynonymMappingAsync(EntityDataLoadContext ctx) + { + if (!TryGetGraphId(ctx, out var graphId)) + { + return Task.FromResult(new Dictionary()); + } + return LoadSynonymMappingByGraphIdAsync(graphId); + } + + private async Task>> LoadVocabularyByGraphIdAsync(string graphId) + { + var key = VocabKey(graphId); + var cached = await _cache.GetAsync>>(key); + if (cached != null) return cached; + + var result = new Dictionary>(); + + var sources = _settings.Membase?.VocabularySources; + if (sources == null || !sources.TryGetValue(graphId, out var labelMap) || labelMap == null || labelMap.Count == 0) + { + _logger.LogWarning($"Skip {Provider}: no vocabulary sources configured for graphId='{graphId}' under FuzzySharp:Membase:VocabularySources."); + return result; + } + + var graphDb = ResolveGraphDb(); + if (graphDb == null) return result; + + foreach (var (label, fields) in labelMap) + { + if (fields == null || fields.Length == 0) continue; + + if (!IdentifierRegex.IsMatch(label)) + { + _logger.LogWarning($"Skip vocabulary label '{label}' in {Provider}: invalid identifier."); + continue; + } + + // Build aliased projections: n.prop0 AS f0, n.prop1 AS f1, ... + // Carry SqlSource alongside so we can key the result dict by SQL "table.column". + var validFields = new List<(string Alias, string GraphProperty, string SqlSource)>(fields.Length); + for (var i = 0; i < fields.Length; i++) + { + var graphProperty = fields[i].GraphProperty; + var sqlSource = fields[i].SqlSource; + if (string.IsNullOrWhiteSpace(graphProperty) || !IdentifierRegex.IsMatch(graphProperty)) + { + _logger.LogWarning($"Skip vocabulary field '{label}.{graphProperty}' in {Provider}: invalid identifier."); + continue; + } + if (string.IsNullOrWhiteSpace(sqlSource)) + { + _logger.LogWarning($"Skip vocabulary field '{label}.{graphProperty}' in {Provider}: empty SqlSource."); + continue; + } + validFields.Add(($"f{i}", graphProperty, sqlSource)); + } + if (validFields.Count == 0) continue; + + var projection = string.Join(", ", validFields.Select(f => $"n.{f.GraphProperty} AS {f.Alias}")); + var cypher = $"MATCH (n:{label}) RETURN {projection}"; + + try + { + var queryResult = await graphDb.ExecuteQueryAsync(cypher, new GraphQueryExecuteOptions + { + GraphId = graphId + }); + + foreach (var (alias, _, sqlSource) in validFields) + { + if (!result.TryGetValue(sqlSource, out var set)) + { + set = new HashSet(StringComparer.OrdinalIgnoreCase); + result[sqlSource] = set; + } + + foreach (var row in queryResult?.Values ?? []) + { + var text = row.TryGetValue(alias, out var tx) ? tx?.ToString() : null; + if (string.IsNullOrWhiteSpace(text)) continue; + set.Add(text!.Trim()); + } + } + } + catch (Exception ex) + { + _logger.LogError(ex, $"Error loading vocabulary label '{label}' from {Provider} graphId={graphId}"); + } + } + + _logger.LogInformation($"Loaded vocabulary from {Provider} graphId={graphId}: {result.Sum(x => x.Value.Count)} terms across {result.Count} sources"); + await _cache.SetAsync(key, result, TimeSpan.FromMinutes(CacheMinutes)); + + return result; + } + + private async Task> LoadSynonymMappingByGraphIdAsync(string graphId) + { + var key = SynonymKey(graphId); + var cached = await _cache.GetAsync>(key); + if (cached != null) return cached; + + var result = new Dictionary(); + var graphDb = ResolveGraphDb(); + if (graphDb == null) return result; + + try + { + var queryResult = await graphDb.ExecuteQueryAsync(SynonymCypher, new GraphQueryExecuteOptions + { + GraphId = graphId + }); + + foreach (var row in queryResult?.Values ?? []) + { + var term = row.TryGetValue("term", out var t) ? t?.ToString() : null; + var table = row.TryGetValue("table", out var tb) ? tb?.ToString() : null; + var column = row.TryGetValue("column", out var co) ? co?.ToString() : null; + var canonical = row.TryGetValue("canonical_form", out var c) ? c?.ToString() : null; + if (string.IsNullOrWhiteSpace(term) || string.IsNullOrWhiteSpace(table) || string.IsNullOrWhiteSpace(column) || string.IsNullOrWhiteSpace(canonical)) continue; + + var dbPath = $"{table!.Trim()}.{column!.Trim()}"; + result[term!.Trim().ToLowerInvariant()] = (dbPath, canonical!); + } + + _logger.LogInformation($"Loaded synonym mapping from {Provider} graphId={graphId}: {result.Count} terms"); + await _cache.SetAsync(key, result, TimeSpan.FromMinutes(CacheMinutes)); + } + catch (Exception ex) + { + _logger.LogError(ex, $"Error loading synonym mapping from {Provider} graphId={graphId}"); + } + + return result; + } + + public async Task InvalidateCacheAsync(string graphId) + { + if (string.IsNullOrWhiteSpace(graphId)) return; + await _cache.RemoveAsync(VocabKey(graphId)); + await _cache.RemoveAsync(SynonymKey(graphId)); + } + + private IGraphDb? ResolveGraphDb() + { + var graphDb = _graphDbs.FirstOrDefault(x => string.Equals(x.Provider, GraphDbProvider, StringComparison.OrdinalIgnoreCase)); + if (graphDb == null) + { + _logger.LogWarning($"No IGraphDb registered with provider '{GraphDbProvider}'. Skip {Provider}."); + } + return graphDb; + } + + private bool TryGetGraphId(EntityDataLoadContext ctx, out string graphId) + { + if (ctx.Parameters.TryGetValue(GraphIdKey, out var value) && !string.IsNullOrWhiteSpace(value)) + { + graphId = value; + return true; + } + graphId = string.Empty; + _logger.LogWarning($"Skip {Provider}: '{GraphIdKey}' not provided in EntityDataLoadContext."); + return false; + } +} diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpEntityAnalyzer.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpEntityAnalyzer.cs index f17afafe4..6ae7930ba 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpEntityAnalyzer.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Services/FuzzySharpEntityAnalyzer.cs @@ -66,11 +66,14 @@ private async Task AnalyzeTextAsync(string text, EntityAn // Tokenize the text var tokens = TokenHelper.Tokenize(text); + // Build loader context once and reuse for both calls + var loaderCtx = BuildLoaderContext(options); + // Load vocabulary - var vocabulary = await LoadAllVocabularyAsync(options?.DataProviders); + var vocabulary = await LoadAllVocabularyAsync(options?.DataProviders, loaderCtx); // Load synonym mapping - var synonymMapping = await LoadAllSynonymMappingAsync(options?.DataProviders); + var synonymMapping = await LoadAllSynonymMappingAsync(options?.DataProviders, loaderCtx); // Analyze text var flaggedItems = AnalyzeTokens(tokens, vocabulary, synonymMapping, options); @@ -99,10 +102,11 @@ private async Task AnalyzeTextAsync(string text, EntityAn } } - public async Task>> LoadAllVocabularyAsync(IEnumerable? dataProviders = null) + public async Task>> LoadAllVocabularyAsync(IEnumerable? dataProviders = null, EntityDataLoadContext? ctx = null) { + ctx ??= new EntityDataLoadContext(); var dataLoaders = _tokenDataLoaders.Where(x => dataProviders == null || dataProviders.Contains(x.Provider)); - var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadVocabularyAsync())); + var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadVocabularyAsync(ctx))); var merged = new Dictionary>(); foreach (var dict in results) @@ -123,10 +127,11 @@ public async Task>> LoadAllVocabularyAsync(IE return merged; } - public async Task> LoadAllSynonymMappingAsync(IEnumerable? dataProviders = null) + public async Task> LoadAllSynonymMappingAsync(IEnumerable? dataProviders = null, EntityDataLoadContext? ctx = null) { + ctx ??= new EntityDataLoadContext(); var dataLoaders = _tokenDataLoaders.Where(x => dataProviders == null || dataProviders.Contains(x.Provider)); - var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadSynonymMappingAsync())); + var results = await Task.WhenAll(dataLoaders.Select(c => c.LoadSynonymMappingAsync(ctx))); var merged = new Dictionary(); foreach (var dict in results) @@ -140,6 +145,22 @@ public async Task>> LoadAllVocabularyAsync(IE return merged; } + private static EntityDataLoadContext BuildLoaderContext(EntityAnalysisOptions? options) + { + var ctx = new EntityDataLoadContext(); + if (options?.LoaderParameters is { } src) + { + foreach (var kvp in src) + { + if (!string.IsNullOrEmpty(kvp.Key)) + { + ctx.Parameters[kvp.Key] = kvp.Value; + } + } + } + return ctx; + } + /// /// Analyze tokens for typos and entities /// diff --git a/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs b/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs index 2b75fabef..ac7b040f4 100644 --- a/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs +++ b/src/Plugins/BotSharp.Plugin.FuzzySharp/Settings/FuzzySharpSettings.cs @@ -3,6 +3,7 @@ namespace BotSharp.Plugin.FuzzySharp.Settings; public class FuzzySharpSettings { public TokenDataSettings Data { get; set; } + public MembaseNERSettings Membase { get; set; } = new(); } public class TokenDataSettings @@ -17,4 +18,22 @@ public class TokenFileSetting { public string Folder { get; set; } public string[] FileNames { get; set; } +} + +public class MembaseNERSettings +{ + // Per-tenant vocabulary sources. Outer key = graphId; inner key = graph node Label + // (used in "MATCH (n:Label) ..."); value = projection list for that label. + public Dictionary> VocabularySources { get; set; } + = new(StringComparer.OrdinalIgnoreCase); +} + +public class VocabularyFieldSetting +{ + // Property name on the graph node, used to build "RETURN n.GraphProperty". + public string GraphProperty { get; set; } = string.Empty; + + // "table.column" string surfaced to downstream consumers + // (becomes FlaggedTokenItem.Sources / EntityAnalysisResult.Data["sources"]). + public string SqlSource { get; set; } = string.Empty; } \ No newline at end of file diff --git a/src/Plugins/BotSharp.Plugin.Membase/Handlers/MembaseAuthHandler.cs b/src/Plugins/BotSharp.Plugin.Membase/Handlers/MembaseAuthHandler.cs index c7b511f59..163b955de 100644 --- a/src/Plugins/BotSharp.Plugin.Membase/Handlers/MembaseAuthHandler.cs +++ b/src/Plugins/BotSharp.Plugin.Membase/Handlers/MembaseAuthHandler.cs @@ -1,3 +1,5 @@ +using BotSharp.Abstraction.Conversations; +using Microsoft.AspNetCore.Http; using Microsoft.Extensions.Logging; using System.Net.Http; using System.Threading; @@ -6,20 +8,26 @@ namespace BotSharp.Plugin.Membase.Handlers; public class MembaseAuthHandler : DelegatingHandler { + private const string TokenStateKey = "membase_access_token"; + private readonly MembaseSettings _settings; + private readonly IHttpContextAccessor _httpContextAccessor; private readonly ILogger _logger; public MembaseAuthHandler( ILogger logger, - MembaseSettings settings) + MembaseSettings settings, + IHttpContextAccessor httpContextAccessor) { _logger = logger; _settings = settings; + _httpContextAccessor = httpContextAccessor; } protected override async Task SendAsync(HttpRequestMessage requestMessage, CancellationToken cancellationToken) { - requestMessage.Headers.TryAddWithoutValidation("Authorization", $"Bearer {_settings.ApiKey}"); + var token = ResolveToken(); + requestMessage.Headers.TryAddWithoutValidation("Authorization", $"Bearer {token}"); var response = await base.SendAsync(requestMessage, cancellationToken).ConfigureAwait(false); #if DEBUG @@ -29,4 +37,20 @@ protected override async Task SendAsync(HttpRequestMessage return response; } + + private string ResolveToken() + { + var requestServices = _httpContextAccessor.HttpContext?.RequestServices; + if (requestServices != null) + { + var state = requestServices.GetService(); + var stateToken = state?.GetState(TokenStateKey); + if (!string.IsNullOrWhiteSpace(stateToken)) + { + return stateToken; + } + } + + return _settings.ApiKey; + } } diff --git a/src/Plugins/BotSharp.Plugin.Membase/MembasePlugin.cs b/src/Plugins/BotSharp.Plugin.Membase/MembasePlugin.cs index cb828a3b2..dcd0ff091 100644 --- a/src/Plugins/BotSharp.Plugin.Membase/MembasePlugin.cs +++ b/src/Plugins/BotSharp.Plugin.Membase/MembasePlugin.cs @@ -24,6 +24,7 @@ public void RegisterDI(IServiceCollection services, IConfiguration config) config.Bind("Membase", settings); services.AddSingleton(sp => settings); + services.AddHttpContextAccessor(); services.AddTransient(); services.AddRefitClient(new RefitSettings {