diff --git a/Pipfile b/Pipfile index 8cf9c20..8f87bda 100644 --- a/Pipfile +++ b/Pipfile @@ -14,8 +14,8 @@ pygit2 = "*" types-docker = "*" types-pygit2 = "*" flask = "*" -jsondiff = "*" boto3 = "*" +deepdiff = "*" [dev-packages] black = "*" diff --git a/Pipfile.lock b/Pipfile.lock index f6fa651..1ff4aa9 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f69aed4c7e9fc70ed7cd5699d9903216597857c291f59d98715b3602e796033e" + "sha256": "4811e3bc6c1ebe8265070fcf866a2b88abf2ed5c77182164fdb978c2758bac23" }, "pipfile-spec": 6, "requires": { @@ -26,20 +26,20 @@ }, "boto3": { "hashes": [ - "sha256:a839ce09a844d92e0039f95851e88da9df80c89ebb4c7818b3e78247fd97a8a7", - "sha256:c9bab807b372d5b076d6aeb1d6513131fa0b74e32d8895128f8568b6521296ea" + "sha256:a9c0955df0b52b43749d81bde159343a40ea2a3537a46049336fe8193871b18e", + "sha256:f4124548bb831e13504e805f2fbbfcee06df42fffea0655862c6eb9b95d6d1be" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.35.46" + "version": "==1.35.53" }, "botocore": { "hashes": [ - "sha256:8bbc9a55cae65a8db7f2e33ff087f4dbfc13fce868e8e3c5273ce9af367a555a", - "sha256:8c0ff5fdd611a28f5752189d171c69690dbc484fa06d74376890bb0543ec3dc1" + "sha256:12869640f2f9fab3152ea312a6906d5bc6ae15522cc74b6367ee1c273269a28b", + "sha256:e610ae076ad1eaed5680d3990493659bbabdffd67b15c61d8373a23e4bc41062" ], "markers": "python_version >= '3.8'", - "version": "==1.35.46" + "version": "==1.35.53" }, "certifi": { "hashes": [ @@ -242,6 +242,15 @@ "markers": "python_version >= '3.7'", "version": "==8.1.7" }, + "deepdiff": { + "hashes": [ + "sha256:245599a4586ab59bb599ca3517a9c42f3318ff600ded5e80a3432693c8ec3c4b", + "sha256:42e99004ce603f9a53934c634a57b04ad5900e0d8ed0abb15e635767489cbc05" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==8.0.1" + }, "docker": { "hashes": [ "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", @@ -451,15 +460,6 @@ "markers": "python_version >= '3.7'", "version": "==1.0.1" }, - "jsondiff": { - "hashes": [ - "sha256:658d162c8a86ba86de26303cd86a7b37e1b2c1ec98b569a60e2ca6180545f7fe", - "sha256:b1f0f7e2421881848b1d556d541ac01a91680cfcc14f51a9b62cdf4da0e56722" - ], - "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==2.2.1" - }, "markupsafe": { "hashes": [ "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", @@ -583,9 +583,17 @@ "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1", "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648" ], - "markers": "python_version >= '3.10'", + "markers": "python_version >= '3.12'", "version": "==2.1.2" }, + "orderly-set": { + "hashes": [ + "sha256:52a18b86aaf3f5d5a498bbdb27bf3253a4e5c57ab38e5b7a56fa00115cd28448", + "sha256:f7a37c95a38c01cdfe41c3ffb62925a318a2286ea0a41790c057fc802aec54da" + ], + "markers": "python_version >= '3.8'", + "version": "==5.2.2" + }, "pandas": { "hashes": [ "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", @@ -637,46 +645,52 @@ }, "pyarrow": { "hashes": [ - "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", - "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", - "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", - "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", - "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", - "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", - "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", - "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", - "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", - "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", - "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", - "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", - "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", - "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", - "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", - "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", - "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", - "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", - "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", - "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", - "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", - "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", - "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", - "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", - "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", - "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", - "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", - "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", - "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", - "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", - "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", - "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", - "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", - "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", - "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", - "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8" + "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd", + "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f", + "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4", + "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9", + "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2", + "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea", + "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b", + "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad", + "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310", + "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954", + "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a", + "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55", + "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79", + "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba", + "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8", + "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce", + "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a", + "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f", + "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2", + "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93", + "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463", + "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319", + "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136", + "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9", + "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2", + "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f", + "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22", + "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5", + "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7", + "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03", + "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", + "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22", + "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1", + "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122", + "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd", + "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280", + "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d", + "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a", + "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802", + "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8", + "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e", + "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==17.0.0" + "markers": "python_version >= '3.9'", + "version": "==18.0.0" }, "pycparser": { "hashes": [ @@ -737,65 +751,6 @@ ], "version": "==2024.2" }, - "pyyaml": { - "hashes": [ - "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff", - "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", - "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", - "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e", - "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", - "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", - "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", - "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", - "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", - "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", - "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a", - "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", - "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", - "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8", - "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", - "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19", - "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", - "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a", - "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", - "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", - "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", - "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631", - "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d", - "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", - "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", - "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", - "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", - "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", - "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", - "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706", - "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", - "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", - "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", - "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083", - "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", - "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", - "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", - "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f", - "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725", - "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", - "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", - "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", - "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", - "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", - "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5", - "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d", - "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290", - "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", - "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", - "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", - "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", - "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12", - "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4" - ], - "markers": "python_version >= '3.8'", - "version": "==6.0.2" - }, "requests": { "hashes": [ "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", @@ -856,11 +811,11 @@ }, "types-setuptools": { "hashes": [ - "sha256:2e48ff3acd4919471e80d5e3f049cce5c177e108d5d36d2d4cee3fa4d4104258", - "sha256:86ea31b5f6df2c6b8f2dc8ae3f72b213607f62549b6fa2ed5866e5299f968694" + "sha256:2949913a518d5285ce00a3b7d88961c80a6e72ffb8f3da0a3f5650ea533bd45e", + "sha256:6721ac0f1a620321e2ccd87a9a747c4a383dc381f78d894ce37f2455b45fcf1c" ], "markers": "python_version >= '3.8'", - "version": "==75.2.0.20241019" + "version": "==75.2.0.20241025" }, "tzdata": { "hashes": [ @@ -880,11 +835,11 @@ }, "werkzeug": { "hashes": [ - "sha256:02c9eb92b7d6c06f31a782811505d2157837cea66aaede3e217c7c27c039476c", - "sha256:34f2371506b250df4d4f84bfe7b0921e4762525762bbd936614909fe25cd7306" + "sha256:8cd39dfbdfc1e051965f156163e2974e52c210f130810e9ad36858f0fd3edad4", + "sha256:a71124d1ef06008baafa3d266c02f56e1836a5984afd6dd6c9230669d60d9fb5" ], - "markers": "python_version >= '3.8'", - "version": "==3.0.4" + "markers": "python_version >= '3.9'", + "version": "==3.1.1" } }, "develop": { @@ -1212,12 +1167,12 @@ }, "ipython": { "hashes": [ - "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a", - "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35" + "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8", + "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb" ], "index": "pypi", "markers": "python_version >= '3.10'", - "version": "==8.28.0" + "version": "==8.29.0" }, "jedi": { "hashes": [ @@ -1237,42 +1192,42 @@ }, "mypy": { "hashes": [ - "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66", - "sha256:0dcc1e843d58f444fce19da4cce5bd35c282d4bde232acdeca8279523087088a", - "sha256:0e6fe449223fa59fbee351db32283838a8fee8059e0028e9e6494a03802b4004", - "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735", - "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931", - "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a", - "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02", - "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0", - "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801", - "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635", - "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179", - "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81", - "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811", - "sha256:8135ffec02121a75f75dc97c81af7c14aa4ae0dda277132cfcd6abcd21551bfd", - "sha256:843826966f1d65925e8b50d2b483065c51fc16dc5d72647e0236aae51dc8d77e", - "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042", - "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627", - "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f", - "sha256:9fe20f89da41a95e14c34b1ddb09c80262edcc295ad891f22cc4b60013e8f78d", - "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6", - "sha256:a7b76fa83260824300cc4834a3ab93180db19876bce59af921467fd03e692810", - "sha256:b16fe09f9c741d85a2e3b14a5257a27a4f4886c171d562bc5a5e90d8591906b8", - "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1", - "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e", - "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc", - "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4", - "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20", - "sha256:dc6e2a2195a290a7fd5bac3e60b586d77fc88e986eba7feced8b778c373f9afe", - "sha256:de5b2a8988b4e1269a98beaf0e7cc71b510d050dce80c343b53b4955fff45f19", - "sha256:e10ba7de5c616e44ad21005fa13450cd0de7caaa303a626147d45307492e4f2d", - "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5", - "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d" + "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc", + "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", + "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f", + "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74", + "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a", + "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2", + "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b", + "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73", + "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e", + "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d", + "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d", + "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6", + "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca", + "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d", + "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5", + "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62", + "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a", + "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc", + "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7", + "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb", + "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7", + "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732", + "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80", + "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a", + "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc", + "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2", + "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0", + "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24", + "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7", + "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b", + "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372", + "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.12.1" + "version": "==1.13.0" }, "mypy-extensions": { "hashes": [ @@ -1346,7 +1301,7 @@ "sha256:faa88bc527d0f097abdc2c663cddf37c05a1c2f113716601555249805cf573f1", "sha256:fc44e3c68ff00fd991b59092a54350e6e4911152682b4782f68070985aa9e648" ], - "markers": "python_version >= '3.10'", + "markers": "python_version >= '3.12'", "version": "==2.1.2" }, "packaging": { @@ -1439,55 +1394,61 @@ }, "pyarrow": { "hashes": [ - "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a", - "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca", - "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597", - "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c", - "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb", - "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977", - "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3", - "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687", - "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7", - "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204", - "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28", - "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087", - "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15", - "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc", - "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2", - "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155", - "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df", - "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22", - "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a", - "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b", - "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03", - "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda", - "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07", - "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204", - "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b", - "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c", - "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545", - "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655", - "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420", - "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5", - "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4", - "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8", - "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053", - "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145", - "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047", - "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8" + "sha256:00178509f379415a3fcf855af020e3340254f990a8534294ec3cf674d6e255fd", + "sha256:03f40b65a43be159d2f97fd64dc998f769d0995a50c00f07aab58b0b3da87e1f", + "sha256:082ba62bdcb939824ba1ce10b8acef5ab621da1f4c4805e07bfd153617ac19d4", + "sha256:09f30690b99ce34e0da64d20dab372ee54431745e4efb78ac938234a282d15f9", + "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2", + "sha256:28f9c39a56d2c78bf6b87dcc699d520ab850919d4a8c7418cd20eda49874a2ea", + "sha256:2c664ab88b9766413197733c1720d3dcd4190e8fa3bbdc3710384630a0a7207b", + "sha256:2c992716cffb1088414f2b478f7af0175fd0a76fea80841b1706baa8fb0ebaad", + "sha256:2e549a748fa8b8715e734919923f69318c953e077e9c02140ada13e59d043310", + "sha256:320ae9bd45ad7ecc12ec858b3e8e462578de060832b98fc4d671dee9f10d9954", + "sha256:336addb8b6f5208be1b2398442c703a710b6b937b1a046065ee4db65e782ff5a", + "sha256:3ac24b2be732e78a5a3ac0b3aa870d73766dd00beba6e015ea2ea7394f8b4e55", + "sha256:45476490dd4adec5472c92b4d253e245258745d0ccaabe706f8d03288ed60a79", + "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba", + "sha256:4d5ca5d707e158540312e09fd907f9f49bacbe779ab5236d9699ced14d2293b8", + "sha256:58a62549a3e0bc9e03df32f350e10e1efb94ec6cf63e3920c3385b26663948ce", + "sha256:5f0510608ccd6e7f02ca8596962afb8c6cc84c453e7be0da4d85f5f4f7b0328a", + "sha256:603cd8ad4976568954598ef0a6d4ed3dfb78aff3d57fa8d6271f470f0ce7d34f", + "sha256:606e9a3dcb0f52307c5040698ea962685fb1c852d72379ee9412be7de9c5f9e2", + "sha256:616ea2826c03c16e87f517c46296621a7c51e30400f6d0a61be645f203aa2b93", + "sha256:66dcc216ebae2eb4c37b223feaf82f15b69d502821dde2da138ec5a3716e7463", + "sha256:6dd1b52d0d58dd8f685ced9971eb49f697d753aa7912f0a8f50833c7a7426319", + "sha256:871b292d4b696b09120ed5bde894f79ee2a5f109cb84470546471df264cae136", + "sha256:8c70c1965cde991b711a98448ccda3486f2a336457cf4ec4dca257a926e149c9", + "sha256:8f40ec677e942374e3d7f2fad6a67a4c2811a8b975e8703c6fd26d3b168a90e2", + "sha256:907ee0aa8ca576f5e0cdc20b5aeb2ad4d3953a3b4769fc4b499e00ef0266f02f", + "sha256:a1824f5b029ddd289919f354bc285992cb4e32da518758c136271cf66046ef22", + "sha256:a6aa027b1a9d2970cf328ccd6dbe4a996bc13c39fd427f502782f5bdb9ca20f5", + "sha256:a71ab0589a63a3e987beb2bc172e05f000a5c5be2636b4b263c44034e215b5d7", + "sha256:b30a927c6dff89ee702686596f27c25160dd6c99be5bcc1513a763ae5b1bfc03", + "sha256:b46591222c864e7da7faa3b19455196416cd8355ff6c2cc2e65726a760a3c420", + "sha256:b5bd7fd32e3ace012d43925ea4fc8bd1b02cc6cc1e9813b518302950e89b5a22", + "sha256:bc1daf7c425f58527900876354390ee41b0ae962a73ad0959b9d829def583bb1", + "sha256:bc97316840a349485fbb137eb8d0f4d7057e1b2c1272b1a20eebbbe1848f5122", + "sha256:be08af84808dff63a76860847c48ec0416928a7b3a17c2f49a072cac7c45efbd", + "sha256:d5795e37c0a33baa618c5e054cd61f586cf76850a251e2b21355e4085def6280", + "sha256:d6331f280c6e4521c69b201a42dd978f60f7e129511a55da9e0bfe426b4ebb8d", + "sha256:dc892be34dbd058e8d189b47db1e33a227d965ea8805a235c8a7286f7fd17d3a", + "sha256:e7ab04f272f98ebffd2a0661e4e126036f6936391ba2889ed2d44c5006237802", + "sha256:eb7e3abcda7e1e6b83c2dc2909c8d045881017270a119cc6ee7fdcfe71d02df8", + "sha256:f1a198a50c409ab2d009fbf20956ace84567d67f2c5701511d4dd561fae6f32e", + "sha256:fe92efcdbfa0bcf2fa602e466d7f2905500f33f09eb90bf0bcf2e6ca41b574c8" ], "index": "pypi", - "markers": "python_version >= '3.8'", - "version": "==17.0.0" + "markers": "python_version >= '3.9'", + "version": "==18.0.0" }, "pyarrow-stubs": { "hashes": [ - "sha256:b7c1085d8cf3224c0fd105a16d063ea3b76d8b38d42044a13200b3e2cbf7c302", - "sha256:c59c8163e30f3146d6b0b060aa67623d5d18b292b17e8a53016906c499920b3c" + "sha256:71255538eaa2d5fc85626a520c642285206acb43eccade36e724a4c1c5153299", + "sha256:d0a7b3f661ebddf53291df959648c84d109055aa48b245686f1f1878c249a2b8" ], "index": "pypi", "markers": "python_version >= '3.8' and python_version < '4'", - "version": "==17.9" + "version": "==17.11" }, "pygments": { "hashes": [ @@ -1583,37 +1544,37 @@ }, "ruff": { "hashes": [ - "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628", - "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e", - "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495", - "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9", - "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa", - "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06", - "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b", - "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737", - "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11", - "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be", - "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598", - "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e", - "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4", - "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914", - "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9", - "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d", - "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec", - "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2" + "sha256:21aae53ab1490a52bf4e3bf520c10ce120987b047c494cacf4edad0ba0888da2", + "sha256:28bd8220f4d8f79d590db9e2f6a0674f75ddbc3847277dd44ac1f8d30684b828", + "sha256:2b14e77293380e475b4e3a7a368e14549288ed2931fce259a6f99978669e844f", + "sha256:576305393998b7bd6c46018f8104ea3a9cb3fa7908c21d8580e3274a3b04b691", + "sha256:5b813ef26db1015953daf476202585512afd6a6862a02cde63f3bafb53d0b2d4", + "sha256:7b792468e9804a204be221b14257566669d1db5c00d6bb335996e5cd7004ba80", + "sha256:853277dbd9675810c6826dad7a428d52a11760744508340e66bf46f8be9701d9", + "sha256:9fd67094e77efbea932e62b5d2483006154794040abb3a5072e659096415ae1e", + "sha256:b19fafe261bf741bca2764c14cbb4ee1819b67adb63ebc2db6401dcd652e3748", + "sha256:b73f873b5f52092e63ed540adefc3c36f1f803790ecf2590e1df8bf0a9f72cb8", + "sha256:bb8368cd45bba3f57bb29cbb8d64b4a33f8415d0149d2655c5c8539452ce7760", + "sha256:ccc7e0fc6e0cb3168443eeadb6445285abaae75142ee22b2b72c27d790ab60ba", + "sha256:dba53ed84ac19ae4bfb4ea4bf0172550a2285fa27fbb13e3746f04c80f7fa088", + "sha256:dd8800cbe0254e06b8fec585e97554047fb82c894973f7ff18558eee33d1cb88", + "sha256:e00163fb897d35523c70d71a46fbaa43bf7bf9af0f4534c53ea5b96b2e03397b", + "sha256:f3c54b538633482dc342e9b634d91168fe8cc56b30a4b4f99287f4e339103e88", + "sha256:fa993cfc9f0ff11187e82de874dfc3611df80852540331bc85c75809c93253a8", + "sha256:fd77877a4e43b3a98e5ef4715ba3862105e299af0c48942cc6d51ba3d97dc859" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==0.7.0" + "version": "==0.7.2" }, "setuptools": { "hashes": [ - "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec", - "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8" + "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", + "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==75.2.0" + "version": "==75.3.0" }, "six": { "hashes": [ @@ -1664,11 +1625,11 @@ }, "virtualenv": { "hashes": [ - "sha256:2ca56a68ed615b8fe4326d11a0dca5dfbe8fd68510fb6c6349163bed3c15f2b2", - "sha256:44a72c29cceb0ee08f300b314848c86e57bf8d1f13107a5e671fb9274138d655" + "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba", + "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4" ], "markers": "python_version >= '3.8'", - "version": "==20.27.0" + "version": "==20.27.1" }, "wcwidth": { "hashes": [ diff --git a/abdiff/core/calc_ab_diffs.py b/abdiff/core/calc_ab_diffs.py index e482f08..d5faf36 100644 --- a/abdiff/core/calc_ab_diffs.py +++ b/abdiff/core/calc_ab_diffs.py @@ -1,3 +1,4 @@ +import json import logging import time from collections.abc import Generator @@ -5,7 +6,7 @@ import pyarrow as pa import pyarrow.dataset as ds -from jsondiff import diff +from deepdiff import DeepDiff from abdiff.core.utils import update_or_create_run_json, write_to_dataset @@ -22,6 +23,7 @@ pa.field("record_a", pa.binary()), pa.field("record_b", pa.binary()), pa.field("ab_diff", pa.string()), + pa.field("modified_timdex_fields", pa.list_(pa.string())), pa.field("has_diff", pa.string()), ) ) @@ -61,23 +63,52 @@ def get_diffed_batches_iter( # convert batch to pandas dataframe and calc values for new columns df = batch.to_pandas() # noqa: PD901 - df["ab_diff"] = df.apply( + + # calculate all diffs and unpack into separate columns + diff_results = df.apply( lambda row: calc_record_diff(row["record_a"], row["record_b"]), axis=1 ) - df["has_diff"] = df["ab_diff"].apply(lambda diff_value: diff_value != "{}") + df["ab_diff"] = diff_results.apply(lambda x: x[0]) + df["modified_timdex_fields"] = diff_results.apply( + lambda x: list(x[1]) if x[1] else [] + ) + df["has_diff"] = diff_results.apply(lambda x: x[2]) yield pa.RecordBatch.from_pandas(df) # type: ignore[attr-defined] -def calc_record_diff(record_a: bytes | None, record_b: bytes | None) -> str | None: - """Calculate symmetric diff from two JSON strings.""" +def calc_record_diff( + record_a: str | bytes | dict | None, + record_b: str | bytes | dict | None, + *, + ignore_order: bool = True, + report_repetition: bool = True, +) -> tuple[str | None, list[str] | None, bool]: + """Calculate diff from two JSON byte strings. + + The DeepDiff library has the property 'affected_root_keys' on the produced diff object + that is very useful for our purposes. At this time, we simply want to know if + anything about a particular root level TIMDEX field (e.g. 'dates' or 'title') has + changed which this method provides explicitly. We also serialize the full diff to + JSON via the to_json() method for storage and possible further analysis. + + This method returns a tuple: + - ab_diff: [str] - full diff as JSON + - modified_timdex_fields: list[str] - list of modified root keys (TIMDEX fields) + - has_diff: bool - True/False if any diff present + """ if record_a is None or record_b is None: - return None - - return diff( - record_a.decode(), - record_b.decode(), - syntax="symmetric", - load=True, - dump=True, + return None, None, False + + diff = DeepDiff( + json.loads(record_a) if isinstance(record_a, str | bytes) else record_a, + json.loads(record_b) if isinstance(record_b, str | bytes) else record_b, + ignore_order=ignore_order, + report_repetition=report_repetition, ) + + ab_diff = diff.to_json() + modified_timdex_fields = diff.affected_root_keys + has_diff = bool(modified_timdex_fields) + + return ab_diff, modified_timdex_fields, has_diff diff --git a/abdiff/core/calc_ab_metrics.py b/abdiff/core/calc_ab_metrics.py index 2c12944..4607e4d 100644 --- a/abdiff/core/calc_ab_metrics.py +++ b/abdiff/core/calc_ab_metrics.py @@ -1,6 +1,5 @@ # ruff: noqa: S608 -import json import logging import os import time @@ -48,12 +47,11 @@ def create_record_diff_matrix_dataset( ) -> str: """Create a boolean sparse matrix of modified fields for all records. - This writes a single parquet file with rows for each record, and columns for each - TIMDEX field, and a value of integer 1 if that field has a diff and 0 if not. This - provides a handy way to calculate aggregate metrics for a given field or source in - later steps. The column "has_diff" is also carried over from the diffs dataset to - provide a single column to check if ANY of the field columns indicate a diff for a - record row. + This writes a single parquet file with rows for each record, columns for each TIMDEX + field, and a value of integer 1 if that field has a diff and 0 if not. This provides + a handy way to calculate aggregate metrics for a given field or source in later steps. + The column "has_diff" is also carried over from the diffs dataset to provide a single + column to check if ANY of the field columns indicate a diff for a record row. This code momentarily creates a single dataframe in memory for all rows. This is safe given the nature of the dataframe: there may be 10m rows, and potentially 20-30 @@ -66,32 +64,31 @@ def create_record_diff_matrix_dataset( for i, batch in enumerate( diffs_ds.to_batches( batch_size=batch_size, - columns=["timdex_record_id", "source", "ab_diff", "has_diff"], + columns=["timdex_record_id", "source", "modified_timdex_fields", "has_diff"], ) ): start_time = time.time() batch_df = batch.to_pandas() - # parse diff JSON to dictionary for batch - batch_df["ab_diff"] = batch_df["ab_diff"].apply( - lambda diff_json: json.loads(diff_json) - ) - batch_metrics = [] for _, row in batch_df.iterrows(): record_metrics = { "timdex_record_id": row["timdex_record_id"], "source": row["source"], - "has_diff": 1 if row["has_diff"] == "true" else 0, + "has_diff": (1 if row["has_diff"] == "true" else 0), } - diff_data = row["ab_diff"] - record_metrics.update(generate_field_diff_bools_for_record(diff_data)) + + # for each modified field (root key in diff), set column and value = 1 (True) + if row["modified_timdex_fields"] is not None: + for field in row["modified_timdex_fields"]: + record_metrics[field] = 1 + batch_metrics.append(record_metrics) # build dataframe for batch batch_metrics_df = pd.DataFrame(batch_metrics) batch_metrics_dfs.append(batch_metrics_df) - logger.info(f"batch: {i+1}, elapsed: {time.time()-start_time}") + logger.info(f"batch: {i + 1}, elapsed: {time.time() - start_time}") # concatenate all dataframes into single dataframe for writing and replace None with 0 metrics_df = pd.concat(batch_metrics_dfs) @@ -107,32 +104,6 @@ def create_record_diff_matrix_dataset( return metrics_dataset -def generate_field_diff_bools_for_record(diff_data: dict) -> dict: - """Function to return dictionary of fields that have a diff. - - Determining if a field had a diff is as straight-forward as looking to see if it shows - up in the parsed diff JSON. The fields may be at the root of the diff, or they could - be nested under "$insert" or "$delete" nodes in the diff. - - If a field from the original A/B records are not in the diff at all, then they did not - have changes, and therefore will not receive a 1 here to indicate a diff. - """ - fields_with_diffs = {} - - for key in diff_data: - - # identify modified fields nested in $insert or $delete blocks - if key in ("$insert", "$delete"): - for subfield in diff_data[key]: - fields_with_diffs[subfield] = 1 - - # identified modified fields at root of diff - else: - fields_with_diffs[key] = 1 - - return fields_with_diffs - - def calculate_metrics_data(field_matrix_parquet: str) -> dict: """Create a dictionary of metrics via DuckDB queries.""" summary: dict = {} diff --git a/pyproject.toml b/pyproject.toml index 73ed296..4bfb68e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ disallow_untyped_defs = true exclude = ["tests/", "output/"] [[tool.mypy.overrides]] -module = ["docker", "docker.models.containers", "duckdb", "duckdb.duckdb", "jsondiff", "pandas", "ijson"] +module = ["docker", "docker.models.containers", "duckdb", "duckdb.duckdb", "deepdiff", "pandas", "ijson"] ignore_missing_imports = true [tool.pytest.ini_options] diff --git a/tests/test_calc_ab_diffs.py b/tests/test_calc_ab_diffs.py index 5a0ed38..6997723 100644 --- a/tests/test_calc_ab_diffs.py +++ b/tests/test_calc_ab_diffs.py @@ -17,22 +17,66 @@ def test_calc_record_diff_has_diff(): a = {"color": "green"} b = {"color": "red"} - assert calc_record_diff(json.dumps(a).encode(), json.dumps(b).encode()) == json.dumps( - {"color": ["green", "red"]} + ab_diff, modified_timdex_fields, has_diff = calc_record_diff(a, b) + assert ab_diff == json.dumps( + {"values_changed": {"root['color']": {"new_value": "red", "old_value": "green"}}} ) + assert modified_timdex_fields == {"color"} + assert has_diff def test_calc_record_diff_no_diff(): a = {"color": "green"} b = a - assert calc_record_diff(json.dumps(a).encode(), json.dumps(b).encode()) == json.dumps( - {} # no diff - ) + ab_diff, modified_timdex_fields, has_diff = calc_record_diff(a, b) + assert ab_diff == json.dumps({}) # no diff + assert not modified_timdex_fields + assert not has_diff def test_calc_record_diff_one_input_is_none(): a = {"color": "green"} - assert calc_record_diff(json.dumps(a).encode(), None) is None + assert calc_record_diff(a, None) == (None, None, False) + + +def test_calc_record_diff_array_by_default_order_not_a_diff(): + """Arrays with the same values, but differently ordered, not considered a diff.""" + a = {"colors": ["green", "red"]} + b = {"colors": ["red", "green"]} + ab_diff, modified_timdex_fields, has_diff = calc_record_diff(a, b) + assert ab_diff == json.dumps({}) # no diff + assert not modified_timdex_fields + assert not has_diff + + +def test_calc_record_diff_array_set_flag_order_is_a_diff(): + """Arrays with the same values, but differently ordered, not considered a diff.""" + a = {"colors": ["green", "red"]} + b = {"colors": ["red", "green"]} + _, _, has_diff = calc_record_diff(a, b, ignore_order=False) + assert has_diff + + +def test_calc_record_diff_array_repetition_is_reported_when_diff(): + """Same array values, but different in repetition, is considered a diff.""" + a = {"colors": ["red", "green"]} + b = {"colors": ["red", "green", "green"]} + ab_diff, modified_timdex_fields, has_diff = calc_record_diff(a, b) + assert ab_diff == json.dumps( + { + "repetition_change": { + "root['colors'][1]": { + "old_repeat": 1, + "new_repeat": 2, + "old_indexes": [1], + "new_indexes": [1, 2], + "value": "green", + } + } + } + ) + assert modified_timdex_fields == {"colors"} + assert has_diff def test_diffed_batches_yields_pyarrow_record_batch(collated_dataset): @@ -46,7 +90,9 @@ def test_diffed_batches_first_batch_has_diff(collated_dataset): batch_one = next(batch_iter).to_pandas() row = batch_one.iloc[0] - assert row.ab_diff == json.dumps({"color": ["green", "red"]}) + assert row.ab_diff == json.dumps( + {"values_changed": {"root['color']": {"new_value": "red", "old_value": "green"}}} + ) assert row.has_diff @@ -73,6 +119,7 @@ def test_calc_ab_diffs_writes_dataset(caplog, run_directory, collated_dataset_di pa.field("record_a", pa.binary()), pa.field("record_b", pa.binary()), pa.field("ab_diff", pa.string()), + pa.field("modified_timdex_fields", pa.list_(pa.string())), pa.field("has_diff", pa.string()), ) ) diff --git a/tests/test_calc_ab_metrics.py b/tests/test_calc_ab_metrics.py index 3662693..49faadc 100644 --- a/tests/test_calc_ab_metrics.py +++ b/tests/test_calc_ab_metrics.py @@ -12,32 +12,10 @@ calc_ab_metrics, calculate_metrics_data, create_record_diff_matrix_dataset, - generate_field_diff_bools_for_record, ) from abdiff.core.utils import load_dataset, read_run_json -def test_record_field_diffs_no_diffs(): - diff_data = {} - assert generate_field_diff_bools_for_record(diff_data) == {} - - -def test_record_field_diffs_one_diff(): - diff_data = {"color": "green"} - assert generate_field_diff_bools_for_record(diff_data) == {"color": 1} - - -def test_record_field_diffs_diff_from_inserts_and_deletes_counted_only_once(): - diff_data = { - "$insert": {"fruits": "strawberry"}, - "$delete": {"vegetables": "onion"}, - } - assert generate_field_diff_bools_for_record(diff_data) == { - "fruits": 1, - "vegetables": 1, - } - - def test_sparse_matrix_dataset_created_success(run_directory, diffs_dataset_directory): diff_matrix_dataset_filepath = create_record_diff_matrix_dataset( run_directory, diffs_dataset_directory