-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathBulkInsert.cs
More file actions
103 lines (86 loc) · 3.78 KB
/
BulkInsert.cs
File metadata and controls
103 lines (86 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
using System.Threading.Tasks;
using Microsoft.Azure.WebJobs;
using System.Collections.Generic;
using Azure.Data.Tables;
using System.Linq;
using System;
using Azure.Storage.Blobs;
using System.IO;
using System.Globalization;
namespace AdaTheDev.TableStorageBulkInsert
{
public class BulkInsert
{
private readonly Settings _settings;
public BulkInsert(Settings settings)
{
_settings = settings;
}
[FunctionName("BulkInsert")]
public async Task Run([QueueTrigger("bulkinsert-queue", Connection = "AzureWebJobsStorage")] BulkInsertMessage message)
{
// Read the whole file, into a list of lists. Each list contains message.TaskParallelism entities for inserting in parallel.
var allEntitiesBatched = await GetBatchedEntitiesAsync(message.BlobName, message.TaskParallelism);
// Create destination table
var tableName = $"TestTable{message.TestNumber}";
var tableClient = new TableClient(_settings.AzureStorageConnectionString, tableName);
await tableClient.CreateIfNotExistsAsync();
var recordsProcessed = 0;
// Load the data
var startTime = DateTime.UtcNow;
foreach (var batch in allEntitiesBatched)
{
var tasks = batch.Select(x => tableClient.UpsertEntityAsync(x));
await Task.WhenAll(tasks);
recordsProcessed += batch.Count;
}
var endTime = DateTime.UtcNow;
// Log stats out
var statsTable = new TableClient(_settings.AzureStorageConnectionString, "StatsTable");
await statsTable.CreateIfNotExistsAsync();
var statsEntity = new StatsEntity
{
PartitionKey = $"TESTCASE-{message.TestNumber}",
RowKey = DateTime.UtcNow.ToString("yyyyMMddTHHmmssfff"),
BlobName = message.BlobName,
StartTime = startTime,
EndTime = endTime,
ScaleOutMaxInstances = message.ScaleOutMaxInstances,
TaskParallelism = message.TaskParallelism,
RecordsProcessed = recordsProcessed,
DurationSeconds = endTime.Subtract(startTime).TotalSeconds,
RecordsPerSecond = recordsProcessed / endTime.Subtract(startTime).TotalSeconds
};
await statsTable.AddEntityAsync(statsEntity);
}
private async Task<List<List<TestEntity>>> GetBatchedEntitiesAsync(string blobName, int batchSize)
{
var data = new List<List<TestEntity>>();
var blobClient = new BlobClient(_settings.AzureStorageConnectionString, "data", blobName);
using var stream = new MemoryStream();
var contentResponse = await blobClient.DownloadToAsync(stream);
stream.Position = 0;
using var streamReader = new StreamReader(stream);
using var csvReader = new CsvHelper.CsvReader(streamReader, CultureInfo.InvariantCulture);
var batch = new List<TestEntity>();
var recordCount = 0;
while (csvReader.Read())
{
recordCount++;
batch.Add(new TestEntity
{
PartitionKey = csvReader[0],
RowKey = "DATA",
SomeValue = csvReader[0],
SomeNumber = int.Parse(csvReader[1])
});
if (batch.Count == batchSize)
{
data.Add(new List<TestEntity>(batch));
batch.Clear();
}
}
return data;
}
}
}