Skip to content

Commit

Permalink
refactor: #1294: Use IEnumerable when bulk inserting to reduce memory…
Browse files Browse the repository at this point in the history
… usage.
  • Loading branch information
zabeen committed May 3, 2024
1 parent 98355b8 commit 0b9ffc7
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 55 deletions.
10 changes: 7 additions & 3 deletions Atlas.Common/Sql/BulkInsert/BulkInsertRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Linq;
using System.Reflection;
using System.Threading.Tasks;
using Atlas.Common.Utils.Extensions;
using Microsoft.Data.SqlClient;

namespace Atlas.Common.Sql.BulkInsert
Expand All @@ -13,7 +14,7 @@ public interface IBulkInsertRepository<in TEntity> where TEntity : IBulkInsertMo
/// <summary>
/// Note, `Id` and properties annotated with <see cref="BulkInsertIgnoreAttribute"/> will not be included in the bulk insert.
/// </summary>
Task BulkInsert(IReadOnlyCollection<TEntity> entities);
Task BulkInsert(IEnumerable<TEntity> entities);
}

public abstract class BulkInsertRepository<TEntity> : IBulkInsertRepository<TEntity> where TEntity : IBulkInsertModel
Expand All @@ -29,14 +30,17 @@ protected BulkInsertRepository(string connectionString, string bulkInsertTableNa
this.bulkInsertTableName = bulkInsertTableName;
}

public async Task BulkInsert(IReadOnlyCollection<TEntity> entities)
public async Task BulkInsert(IEnumerable<TEntity> entities)
{
if (!entities.Any())
// ReSharper disable once PossibleMultipleEnumeration - `IsNullOrEmpty` does not enumerate the collection
if (entities.IsNullOrEmpty())
{
return;
}

var columnNames = GetColumnNames();

// ReSharper disable once PossibleMultipleEnumeration
var dataTable = BuildDataTable(entities, columnNames);

using (var sqlBulk = BuildSqlBulkCopy(columnNames))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Atlas.Client.Models.Search.Results.MatchPrediction;
using System.Collections.Generic;
using Atlas.Client.Models.Search.Results.MatchPrediction;
using Atlas.Common.Public.Models.MatchPrediction;

namespace Atlas.Debug.Client.Models.MatchPrediction
Expand All @@ -10,9 +11,9 @@ public class GenotypeMatcherResponse
public SubjectResult DonorInfo { get; set; }

/// <summary>
/// Patient-donor genotype pairs (represented as a single, formatted string) and their match counts.
/// Patient-donor genotype pairs and their match counts.
/// </summary>
public string MatchedGenotypePairs { get; set; }
public IEnumerable<string> MatchedGenotypePairs { get; set; }
}

public class SubjectResult
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
public interface IProcessedResultsRepository<in TDbModel>
{
Task DeleteResults(int searchRequestRecordId);
Task BulkInsert(IReadOnlyCollection<TDbModel> results);
Task BulkInsert(IEnumerable<TDbModel> results);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ internal class MatchingGenotypesProcessor : IMatchingGenotypesProcessor
var patientSummaryId = await GetOrAddImputationSummary(patientInfo.ExternalId, response.Result!.PatientInfo);
var donorSummaryId = await GetOrAddImputationSummary(donorInfo.ExternalId, response.Result!.DonorInfo);
var genotypes = SplitIntoMatchingGenotypes(
response.Result!.MatchedGenotypePairs, patientSummaryId, donorSummaryId).ToList();
response.Result!.MatchedGenotypePairs, patientSummaryId, donorSummaryId);
await matchingRepo.BulkInsert(genotypes);
return true;
}
Expand Down Expand Up @@ -105,57 +105,50 @@ internal class MatchingGenotypesProcessor : IMatchingGenotypesProcessor
}

private static IEnumerable<MatchingGenotypes> SplitIntoMatchingGenotypes(
string matchingGenotypesAsString,
IEnumerable<string> matchingGenotypePairs,
int patientSummaryId,
int donorSummaryId)
{
// split genotypesAsString into individual genotypes by splitting by \r\n
// first line contains header so will be skipped
var matchingGenotypeStrings = matchingGenotypesAsString
.Split("\r\n")
return matchingGenotypePairs
.Skip(1)
.Where(s => !string.IsNullOrEmpty(s));

foreach (var matchingGenotypes in matchingGenotypeStrings)
.Select(pair => pair.Split(","))
.Select(pairParts => new MatchingGenotypes
{
var matchingGenotypeParts = matchingGenotypes.Split(",");
yield return new MatchingGenotypes
{
TotalCount = int.Parse(matchingGenotypeParts[0]),
A_Count = int.Parse(matchingGenotypeParts[1]),
B_Count = int.Parse(matchingGenotypeParts[2]),
C_Count = int.Parse(matchingGenotypeParts[3]),
DQB1_Count = int.Parse(matchingGenotypeParts[4]),
DRB1_Count = int.Parse(matchingGenotypeParts[5]),

Patient_A_1 = matchingGenotypeParts[6],
Patient_A_2 = matchingGenotypeParts[7],
Patient_B_1 = matchingGenotypeParts[8],
Patient_B_2 = matchingGenotypeParts[9],
Patient_C_1 = matchingGenotypeParts[10],
Patient_C_2 = matchingGenotypeParts[11],
Patient_DQB1_1 = matchingGenotypeParts[12],
Patient_DQB1_2 = matchingGenotypeParts[13],
Patient_DRB1_1 = matchingGenotypeParts[14],
Patient_DRB1_2 = matchingGenotypeParts[15],
Patient_Likelihood = decimal.Parse(matchingGenotypeParts[16]),

Donor_A_1 = matchingGenotypeParts[17],
Donor_A_2 = matchingGenotypeParts[18],
Donor_B_1 = matchingGenotypeParts[19],
Donor_B_2 = matchingGenotypeParts[20],
Donor_C_1 = matchingGenotypeParts[21],
Donor_C_2 = matchingGenotypeParts[22],
Donor_DQB1_1 = matchingGenotypeParts[23],
Donor_DQB1_2 = matchingGenotypeParts[24],
Donor_DRB1_1 = matchingGenotypeParts[25],
Donor_DRB1_2 = matchingGenotypeParts[26],
Donor_Likelihood = decimal.Parse(matchingGenotypeParts[27]),

Patient_ImputationSummary_Id = patientSummaryId,
Donor_ImputationSummary_Id = donorSummaryId
};
}
TotalCount = int.Parse(pairParts[0]),
A_Count = int.Parse(pairParts[1]),
B_Count = int.Parse(pairParts[2]),
C_Count = int.Parse(pairParts[3]),
DQB1_Count = int.Parse(pairParts[4]),
DRB1_Count = int.Parse(pairParts[5]),
Patient_A_1 = pairParts[6],
Patient_A_2 = pairParts[7],
Patient_B_1 = pairParts[8],
Patient_B_2 = pairParts[9],
Patient_C_1 = pairParts[10],
Patient_C_2 = pairParts[11],
Patient_DQB1_1 = pairParts[12],
Patient_DQB1_2 = pairParts[13],
Patient_DRB1_1 = pairParts[14],
Patient_DRB1_2 = pairParts[15],
Patient_Likelihood = decimal.Parse(pairParts[16]),
Donor_A_1 = pairParts[17],
Donor_A_2 = pairParts[18],
Donor_B_1 = pairParts[19],
Donor_B_2 = pairParts[20],
Donor_C_1 = pairParts[21],
Donor_C_2 = pairParts[22],
Donor_DQB1_1 = pairParts[23],
Donor_DQB1_2 = pairParts[24],
Donor_DRB1_1 = pairParts[25],
Donor_DRB1_2 = pairParts[26],
Donor_Likelihood = decimal.Parse(pairParts[27]),
Patient_ImputationSummary_Id = patientSummaryId,
Donor_ImputationSummary_Id = donorSummaryId
});
}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Net.Http;
using System;
using System.Net.Http;
using System.Threading.Tasks;
using Atlas.Common.Public.Models.GeneticData.PhenotypeInfo;
using Atlas.Common.Public.Models.GeneticData.PhenotypeInfo.TransferModels;
Expand Down Expand Up @@ -33,7 +34,10 @@ public interface IMatchingGenotypesRequester

internal class MatchingGenotypesRequester : AtlasHttpRequester, IMatchingGenotypesRequester
{
private static readonly HttpClient HttpRequestClient = new();
private static readonly HttpClient HttpRequestClient = new HttpClient
{
Timeout = TimeSpan.FromMinutes(10)
};

/// <inheritdoc />
public MatchingGenotypesRequester(IOptions<ValidationHomeworkSettings> settings)
Expand Down

0 comments on commit 0b9ffc7

Please sign in to comment.