/
BlobStorageMemorySource.cs
85 lines (71 loc) · 3.41 KB
/
BlobStorageMemorySource.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
using Azure.Storage.Blobs;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Newtonsoft.Json;
using VectorSearchAiAssistant.Service.Interfaces;
namespace VectorSearchAiAssistant.Service.MemorySource
{
public class BlobStorageMemorySource : IMemorySource
{
private readonly BlobStorageMemorySourceSettings _settings;
private readonly ITextSplitterService _textSplitterService;
private readonly ILogger _logger;
private BlobStorageMemorySourceConfig _config;
private readonly BlobServiceClient _blobServiceClient;
private readonly Dictionary<string, BlobContainerClient> _containerClients;
public BlobStorageMemorySource(
IOptions<BlobStorageMemorySourceSettings> settings,
ITextSplitterService textSplitterService,
ILogger<BlobStorageMemorySource> logger)
{
_settings = settings.Value;
_textSplitterService = textSplitterService;
_logger = logger;
_blobServiceClient = new BlobServiceClient(_settings.ConfigBlobStorageConnection);
_containerClients = new Dictionary<string, BlobContainerClient>();
}
public async Task<List<string>> GetMemories()
{
await EnsureConfig();
var filesContent = await Task.WhenAll(_config.TextFileMemorySources
.Select(tfms => tfms.TextFiles.Select(tf => ReadTextFileContent(tfms.ContainerName, tf)))
.SelectMany(x => x));
var chunkedFilesContent = filesContent
.Select(txt => txt.SplitIntoChunks ? _textSplitterService.SplitPlainText(txt.Content).TextChunks : new List<string>() { txt.Content })
.SelectMany(x => x).ToList();
return chunkedFilesContent;
}
private async Task EnsureConfig()
{
if (_config == null)
{
var configContent = await ReadConfigContent(_settings.ConfigBlobStorageContainer, _settings.ConfigFilePath);
_config = JsonConvert.DeserializeObject<BlobStorageMemorySourceConfig>(configContent);
}
}
private BlobContainerClient GetBlobContainerClient(string containerName)
{
if (!_containerClients.ContainsKey(containerName))
{
var containerClient = _blobServiceClient.GetBlobContainerClient(containerName);
_containerClients.Add(containerName, containerClient);
return containerClient;
}
return _containerClients[containerName];
}
private async Task<string> ReadConfigContent(string containerName, string filePath)
{
var containerClient = GetBlobContainerClient(containerName);
var blobClient = containerClient.GetBlobClient(filePath);
var reader = new StreamReader(await blobClient.OpenReadAsync());
return await reader.ReadToEndAsync();
}
private async Task<(string Content, bool SplitIntoChunks)> ReadTextFileContent(string containerName, TextFileMemorySourceFile file)
{
var containerClient = GetBlobContainerClient(containerName);
var blobClient = containerClient.GetBlobClient(file.FileName);
var reader = new StreamReader(await blobClient.OpenReadAsync());
return (await reader.ReadToEndAsync(), file.SplitIntoChunks);
}
}
}