Skip to content

Commit

Permalink
Merge pull request #123 from AzureCosmosDB/feature/nested-nulls
Browse files Browse the repository at this point in the history
Extending ignoring null to arrays and children; adding tests and docs
  • Loading branch information
bowencode committed Apr 11, 2024
2 parents 7eb9c90 + e2f1c1a commit d93bd18
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 6 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Cosmos.DataTransfer.Interfaces;
using System.Dynamic;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace Cosmos.DataTransfer.CosmosExtension.UnitTests
Expand Down Expand Up @@ -132,5 +133,170 @@ public void BuildDynamicObjectTree_WithPreservedMixedCaseIds_PassesThroughSource
Assert.IsNotNull(cosmosId);
Assert.IsFalse(string.IsNullOrWhiteSpace(cosmosId));
}

[TestMethod]
public void BuildDynamicObjectTree_WithIgnoredNulls_ExcludesNullFields()
{
var item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "1" },
{ "nullField", null },
{
"array",
new List<object?>
{
new List<object?>
{
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub1-1" },
{ "nullField", null },
}),
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub1-2" }
})
},
new List<object?>
{
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub2-1" },
{ "nullField", null },
}),
}
}
},
{ "child1",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child1-1" },
})
},
{ "child2",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child2-1" },
{ "nullField", null },
{ "child2_1",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child2_1-1" },
{ "nullField", null },
})
}
})
}
});

dynamic obj = item.BuildDynamicObjectTree(ignoreNullValues: true)!;

Assert.IsFalse(HasProperty(obj, "nullField"));

Assert.AreEqual(typeof(object[]), obj.array.GetType());
Assert.AreEqual(2, obj.array.Length);

var firstSubArray = obj.array[0];
Assert.AreEqual(typeof(object[]), firstSubArray.GetType());
Assert.IsFalse(HasProperty(firstSubArray[0], "nullField"));

var secondSubArray = obj.array[1];
Assert.AreEqual(typeof(object[]), secondSubArray.GetType());
Assert.IsFalse(HasProperty(secondSubArray[0], "nullField"));

var child2 = obj.child2;
Assert.IsFalse(HasProperty(child2, "nullField"));
Assert.IsFalse(HasProperty(child2.child2_1, "nullField"));
}

[TestMethod]
public void BuildDynamicObjectTree_WithNulls_RetainsNullFields()
{
var item = new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "1" },
{ "nullField", null },
{
"array",
new List<object?>
{
new List<object?>
{
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub1-1" },
{ "nullField", null },
}),
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub1-2" }
})
},
new List<object?>
{
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "sub2-1" },
{ "nullField", null },
}),
}
}
},
{ "child1",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child1-1" },
})
},
{ "child2",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child2-1" },
{ "nullField", null },
{ "child2_1",
new CosmosDictionaryDataItem(new Dictionary<string, object?>()
{
{ "id", "child2_1-1" },
{ "nullField", null },
})
}
})
}
});

dynamic obj = item.BuildDynamicObjectTree(ignoreNullValues: false)!;

Assert.IsTrue(HasProperty(obj, "nullField"));
Assert.IsNull(obj.nullField);

Assert.AreEqual(typeof(object[]), obj.array.GetType());
Assert.AreEqual(2, obj.array.Length);

var firstSubArray = obj.array[0];
Assert.AreEqual(typeof(object[]), firstSubArray.GetType());
Assert.IsTrue(HasProperty(firstSubArray[0],"nullField"));
Assert.IsNull(firstSubArray[0].nullField);
Assert.IsFalse(HasProperty(firstSubArray[1], "nullField"));

var secondSubArray = obj.array[1];
Assert.AreEqual(typeof(object[]), secondSubArray.GetType());
Assert.IsTrue(HasProperty(secondSubArray[0], "nullField"));
Assert.IsNull(secondSubArray[0].nullField);

var child2 = obj.child2;
Assert.IsTrue(HasProperty(child2, "nullField"));
Assert.IsNull(child2.nullField);
Assert.IsTrue(HasProperty(child2.child2_1, "nullField"));
Assert.IsNull(child2.child2_1.nullField);
}

public static bool HasProperty(object obj, string name)
{
if (obj is not ExpandoObject)
return obj.GetType().GetProperty(name) != null;

var values = (IDictionary<string, object>)obj;
return values.ContainsKey(name);
}
}
}
3 changes: 2 additions & 1 deletion Extensions/Cosmos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Or with RBAC:
}
```

Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. To use hierarchical partition keys, instead use the `PartitionKeyPaths` setting to supply an array of up to 3 paths. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. `ConnectionMode` can be set to either `Gateway` (default) or `Direct` to control how the client connects to the CosmosDB service. For situations where a container is created as part of the transfer operation `CreatedContainerMaxThroughput` (in RUs) and `UseAutoscaleForCreatedContainer` provide the initial throughput settings which will be in effect when executing the transfer. To instead use shared throughput that has been provisioned at the database level, set the `UseSharedThroughput` parameter to `true`. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment. The `PreserveMixedCaseIds` parameter (`false` by default) ignores differently cased `id` fields and writes them through without modification, while generating a separate lowercased `id` field as required by Cosmos.
Sink requires an additional `PartitionKeyPath` parameter which is used when creating the container if it does not exist. To use hierarchical partition keys, instead use the `PartitionKeyPaths` setting to supply an array of up to 3 paths. It also supports an optional `RecreateContainer` parameter (`false` by default) to delete and then recreate the container to ensure only newly imported data is present. The optional `BatchSize` parameter (100 by default) sets the number of items to accumulate before inserting. `ConnectionMode` can be set to either `Gateway` (default) or `Direct` to control how the client connects to the CosmosDB service. For situations where a container is created as part of the transfer operation `CreatedContainerMaxThroughput` (in RUs) and `UseAutoscaleForCreatedContainer` provide the initial throughput settings which will be in effect when executing the transfer. To instead use shared throughput that has been provisioned at the database level, set the `UseSharedThroughput` parameter to `true`. The optional `WriteMode` parameter specifies the type of data write to use: `InsertStream`, `Insert`, `UpsertStream`, or `Upsert`. The `IsServerlessAccount` parameter specifies whether the target account uses Serverless instead of Provisioned throughput, which affects the way containers are created. Additional parameters allow changing the behavior of the Cosmos client appropriate to your environment. The `PreserveMixedCaseIds` parameter (`false` by default) ignores differently cased `id` fields and writes them through without modification, while generating a separate lowercased `id` field as required by Cosmos. The `IgnoreNullValues` parameter allows for excluding fields with null values when writing to Cosmos DB.

### Sink

Expand All @@ -63,6 +63,7 @@ Sink requires an additional `PartitionKeyPath` parameter which is used when crea
"UseAutoscaleForCreatedContainer": true,
"WriteMode": "InsertStream",
"PreserveMixedCaseIds": false,
"IgnoreNullValues": false,
"IsServerlessAccount": false,
"UseSharedThroughput": false
}
Expand Down
11 changes: 6 additions & 5 deletions Interfaces/Cosmos.DataTransfer.Interfaces/DataItemExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public static class DataItemExtensions
/// </summary>
/// <param name="source"></param>
/// <param name="requireStringId">If true, adds a new GUID "id" field to any top level items where one is not already present.</param>
/// <param name="ignoreNullValues">If true, excludes fields containing null values from output.</param>
/// <param name="preserveMixedCaseIds">If true, disregards differently cased "id" fields for purposes of required "id" and passes them through.</param>
/// <returns>A dynamic object containing the entire data structure.</returns>
/// <remarks>The returned ExpandoObject can be used directly as an IDictionary.</remarks>
Expand Down Expand Up @@ -70,28 +71,28 @@ public static class DataItemExtensions
}
else if (value is IDataItem child)
{
value = BuildDynamicObjectTree(child);
value = BuildDynamicObjectTree(child, ignoreNullValues: ignoreNullValues);
}
else if (value is IEnumerable<object?> array)
{
value = BuildArray(array);
value = BuildArray(array, ignoreNulls: ignoreNullValues);
}

item.TryAdd(fieldName, value);
}

return item;

static object BuildArray(IEnumerable<object?> array)
static object BuildArray(IEnumerable<object?> array, bool ignoreNulls)
{
return array.Select(dataItem =>
{
switch (dataItem)
{
case IDataItem childObject:
return BuildDynamicObjectTree(childObject);
return BuildDynamicObjectTree(childObject, ignoreNullValues: ignoreNulls);
case IEnumerable<object?> array:
return BuildArray(array);
return BuildArray(array, ignoreNulls);
default:
return dataItem;
}
Expand Down

0 comments on commit d93bd18

Please sign in to comment.