From adda3edd931f2b238b2429784721ed6bd049b1a0 Mon Sep 17 00:00:00 2001 From: Felix Jordan Date: Fri, 21 Nov 2025 06:56:24 +0000 Subject: [PATCH 1/5] feat: Add missing data normalization to validation module --- .../validation_engine.cpython-313.pyc | Bin 5249 -> 7840 bytes ...dation_engine.cpython-313-pytest-8.4.2.pyc | Bin 15361 -> 16806 bytes .../tests/test_validation_engine.py | 48 ++++++++++++++- .../services/validation/validation_engine.py | 58 ++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc index 92752a77f5469e263db3e30c15009ea647803c40..3d448babed60aed285aa12bfa28d0f53a990007e 100644 GIT binary patch delta 2624 zcma)8Uu@e%7(d5OlO|2}C+!loBX!zNUD_>e(+sq(%R08QYOH@&$78Ip;Kp9-Epfux zfpr2>c;l^6K(t6`K@=^O_K*o43J9?YBqs5aEYlLqgeJiYZyg%~@d9^loNe8ZI4O6& zyYIg5cfarF&w1OM-EY5VvsnOY362);Kb5w-h@0uo38Eo={&aex)oN`9XDMi%F?nrQ z)E_M_&IACnjU^aoCpyFu9e~+_XP^CGeTT|SZ{F6VIwCMG%V9a5gb^VenH3`EBk2bqKQ+fsX~7BR9H#0)IK8dx$wG6X_xju9cE4X}VQ>@lpH(I8|9 zkqn8UhsX}#rMNB>$1)a&F)k`z5M)}J6=+dPBo&(H6po&rqoI(9auHFQp<|*f<1r3t zA(l|)=rb>hLX?;3*F|NP7S1Q4VnkGEY%a{kqr3oR3n%gPj35b+Q;>I#ex>RpjNONk z?>JOm(av(Bgq>pmGh8CW8TwL^j>~7UObSX8O7eh(M(rRi39r+<7*WKy#KF0B1UkMm z>!@m{c^aemkJvs-q{@!Q5lSy`px-@;LSqo|JXOmC3#JU2k0{qP_a#a_m^5l99_jRa zIKarK03?y;L_16DtlD7lu!J2jC{K3KPmZHeqF8d;3my(bPSxQs0ggfD9^;R+2}_Isc^ zC-rOOr2c=9x>6(6fl0NQ9=u0Yn(K5ZQr$xUtAy)&i1@)O$V;qg5+z|{)03(5#BD|h zjZ-KI>y0((zHll~db;rbq%EZzJQz3RF}O}F(twu5{}p^I$~5&z`08mL1XjSf327ts zWO4lRh&bw}Hi!d*eS$Vnc{@UkBH3!{c<+Ha&}xIOj~U=KkEw(Y;y3iS)IX?QAjf(R zUg#cutTf>oP_118zg{0Yk!hz-C8kv3Wp4w-1Vb0P|c`xg|$`3g9@dW!!xP{Z=rpt$oMc+5ITt$&Z`s#sS={<3@ea(O@Jv` z2&2Llu6a=%PbjYHrrJKQMUnA4ii>Eq&Ckz6yadPbZTvOsaMhV_FdC0=Q90MdHm6?a$J-CK0`-g5iXmId->pMQ~GJdy3oGTGs! z1FPhMx#Do8_b;Ey8$XxJJN6ZK?7M3G-g?dYt*vmJDGf1|uDylE&Wgu-$^EfAPnJA^ zg`;=qu3Ufa*-uiN7O9HwVA*%9=sR{jRAAVW@5BvP_gxZrdhZMzKnL^w6??JqnM!~| zlbpHK*mt|BHEX;~UFNfIl$!#jrofMl-MQggjf1~CL2Lhg7qGh)83G`ez&K6#HvE)CMLaTS{K}T@S0$gp`w)9(N z=kB6&ciGufboNvZjTBtnmk;GFtNttY(yjwHTtjz)htOIcu8b93`!_uPo;bNw|@bQo{mKT delta 244 zcmZ2r+o;L+nU|M~0SIDIt-0C+>Y%Xv}U63}o$YJ<_gMmlx0*mYi zRt8fa#>pJQQ&}|`{WQ5J7t3my-eN8;DJtRzI-p1%M3?}Hl?+7!AT}S6xW!?Uo1ape dlWJFF3FI;Yaq-;A*JWpMe`RoGWRwG|1OQWqL7xBs diff --git a/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc b/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc index 262197228f09434131ec6c60fc357c04d1be7eab..fc903b96cdbc1fd83d79e7f94498413dbf7eccf0 100644 GIT binary patch delta 2758 zcmb_e%}*Ow5Z|?T4K`jIY_JJnvJND82{HcQqX{7&#pHt|5Loij5b8SiCb4R4>#b>n z+X|xO&};hCL)%j?RZtIABbiWGbIw=?g} z%=^u}d28Rk^~D|5@mvsW41A^&V){Y-Jx3#(xz`b%AF&dv$jIg zkgEV4ay6h+wgT45Ho!W$2GDhbmF&wHb5t^E&3T1f&yksUlH^o1uI6S*Qi&&TD#^E1 ztu6~QiA=hv#Ani~n$F%xq93p^9(H(`;eH-6G0ew|%*u>xlFd4DL31}F^W&^+3G!K- ztAejtM`NxU=B%5(wxC6>30kr&t=-??HeL1~I>j)#Xuw#DN&QOe+S>f~3+5hH7B2gH zOud{jB|D}pYevjOH68c**=8CR+vtMH+PLnX@G#62&@?_1cA8*YU1O~5ke%Cm``KRB zF#ewq4ILX+5X&jo22BTmOO})CWY+~Q#OfM9<)rsKZo1gjV7kE5@t*qmtoa5bxkok2 zJBiG!64n?^$SP_5;uadE*1BmGa5k55mA}R7v@CWMI=f-lSnEl z@!fRcrq}@iI)ajgIMJ~dpjdCL6I{zq_lgwfw;PTYg$kLe@bnfnIt^hR74)#b^!qIQIHhSOQO@HCSgBFcL9;6ZZd68euvHV=z za19#@9=b#!x>@N}v5n~r|G~Vc0L@S#vVQ!C1P0jk>Jq>Lv*c zyH~3?x9ypY_u5w7PP=6xw$jeHo69aQT)NSovTsktXvM*JV{F+oRBj)~%QphosxV(A z+E#aaMax63<oR&^UIPCw!Lo-?$yW!Kl=|6$@uJ7i2BnBU_`14;v&k0;G2O>+~0eh{EP@MT0= z5PS%Jggppt08%w+rve|5NC#+}3*S%n!H8N7Ag+A~9(uaWi~MmAA|;1KU`?bG>6H4a zPH|U|4(UQTfDi;ITEktvp-@*S6e1xUi~?v4Lu2R8pBRvXo;(#les*|bQr43o zy=c{k@EXEFgaKNv8M2dIC^sQA(++#`;!atS6KKGbo-z7)2PPOYR0B^q1SuUxa}ZrIvbSozQ0uG48Ex>i((-!Ft6` zx*F>A5d5Ja+i?qEUua(acy7FNa>=+lo6l$F$Yq#454So^ZwsE%|wUV3|%^+IjG zy-MJi#V8^DG7z?G+26kdem)MgQl(bu+wEyp&Xs1qHCKGOO~yYcJ9OQrp%d%0eh>9; zrFAguvn9bb2eN#4;R)!5oG6VzauR^W*@Juc>n^BIC85m>XR$~AwW|n22GR$4( Xj_{PpJmb39{%2QN`olnwL+5`1(0_85 delta 1568 zcmb`HUr1AN6vyw~?w#A+AKNt9oi6W`&1Q4n%{C)+TuaQ%EW1xuV)JhU|M9yM{6mJ= zTLj4uf?h%h0wJilFZEt8AqBn!gXkdk)eCnLdKMR~Fc;S9Hzkla`&pE$yUoEl= z(~d2>U0~pOtw^cc#Y+x1+rLy3Nl1)p^f1JtazH`lfmYQ7w5dfvyJ`kHR12_J6@aHk z*r0VCJ#_`yrDZQ&C@s^Kq78#7!wn|WNh%9-rWJPE(A-Sl3SK^AoSZpN*MwW<5;zky z5hMM{Rns^x8_k5Loz`eLV^t<+`i3XSSTY^UA2j9<8Z4Ezs z0JEIFYPGY5Er!mQiuzw$JLl(NnwStp2s}G$iG*UZ9Fk?3U@nNACY|M;VlJSk`l*YySKm(dFL>zGupxFRY-b4KKvFPJ3!L(a{Ck}x)$hA+uP~Nm7@c&$&n82@< zv7-vCmArf_2&~dOrQw`FQD5~bT=#wkPbzP{fg|T_IpMaCr8F%yK0?|+UP%8I=-ZWd z^K1uyZ||;VjLH6i>|G_tuJXD8d>5$lcAa9%RhcT`NX4OLeiHTx*T5BS(%#U|36H)O zetxjt{ocwLr)Or5SM;le_14GRcJKfU6dLGyXnb=EJ?Tl6L%5wu7#t zb*xKJSSGsaK=hzdAc9d7bhW||bVK}rjJauSVsL77C_#E)$ik~70GmdZ XWq&ZtBWBV5o$2|*(R53g!(;yd*1;QX diff --git a/backend/app/services/validation/tests/test_validation_engine.py b/backend/app/services/validation/tests/test_validation_engine.py index 84e928b8..ce23deca 100644 --- a/backend/app/services/validation/tests/test_validation_engine.py +++ b/backend/app/services/validation/tests/test_validation_engine.py @@ -1,5 +1,51 @@ import pytest -from backend.app.services.validation.validation_engine import perform_cross_source_checks +from backend.app.services.validation.validation_engine import perform_cross_source_checks, normalize_missing + +def test_normalize_missing(): + data = { + "field1": "value1", + "field2": None, + "field3": "", + "nested": { + "nested_field1": "nested_value1", + "nested_field2": None, + "nested_field3": "" + }, + "list_field": [ + "item1", + None, + "item3", + "" + ] + } + + expected_normalized_data = { + "field1": "value1", + "field2": "N/A", + "field3": "N/A", + "nested": { + "nested_field1": "nested_value1", + "nested_field2": "N/A", + "nested_field3": "N/A" + }, + "list_field": [ + "item1", + "N/A", + "item3", + "N/A" + ], + "missing_data_report": { + "field2": "Missing or empty field replaced with 'N/A'.", + "field3": "Missing or empty field replaced with 'N/A'.", + "nested.nested_field2": "Missing or empty field replaced with 'N/A'.", + "nested.nested_field3": "Missing or empty field replaced with 'N/A'.", + "list_field[1]": "Missing or empty field replaced with 'N/A'.", + "list_field[3]": "Missing or empty field replaced with 'N/A'." + } + } + + normalized_data = normalize_missing(data) + assert normalized_data == expected_normalized_data def test_circulating_supply_match(): data = { diff --git a/backend/app/services/validation/validation_engine.py b/backend/app/services/validation/validation_engine.py index c0d4416a..c1e2d111 100644 --- a/backend/app/services/validation/validation_engine.py +++ b/backend/app/services/validation/validation_engine.py @@ -119,6 +119,64 @@ def perform_cross_source_checks(data: Dict[str, Any]) -> Dict[str, Any]: "INFO: Documentation circulating supply not found." ) + if validation_results["alerts"]: + validation_results["cross_source_checks"] = "COMPLETED_WITH_ALERTS" + else: + validation_results["cross_source_checks"] = "PASSED" + return validation_results + +def normalize_missing(data: Dict[str, Any]) -> Dict[str, Any]: + """ + Normalizes the input data by replacing missing or empty fields with explicit placeholders + and generates a `missing_data_report` explaining the gaps. + + Args: + data: The input data dictionary to normalize. + + Returns: + A new dictionary with missing fields normalized and a `missing_data_report`. + """ + normalized_data = data.copy() + missing_data_report = {} + + def _traverse_and_normalize(current_data, path): + if isinstance(current_data, dict): + for key, value in current_data.items(): + new_path = f"{path}.{key}" if path else key + if value is None or (isinstance(value, str) and value.strip() == ""): + normalized_data_ref = normalized_data + path_parts = new_path.split('.') + # Navigate to the correct nested dictionary in normalized_data + for part in path_parts[:-1]: + normalized_data_ref = normalized_data_ref.setdefault(part, {}) + normalized_data_ref[path_parts[-1]] = "N/A" # Replace with placeholder + missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'." + elif isinstance(value, (dict, list)): + _traverse_and_normalize(value, new_path) + elif isinstance(current_data, list): + for index, item in enumerate(current_data): + new_path = f"{path}[{index}]" + if item is None or (isinstance(item, str) and item.strip() == ""): + normalized_data_ref = normalized_data + path_parts = new_path.replace(']', '').split('[') + # Navigate to the correct nested structure in normalized_data + for i, part in enumerate(path_parts[:-1]): + if part.isdigit(): + normalized_data_ref = normalized_data_ref[int(part)] + else: + normalized_data_ref = normalized_data_ref.setdefault(part, {}) + if path_parts[-1].isdigit(): + normalized_data_ref[int(path_parts[-1])] = "N/A" + else: + normalized_data_ref[path_parts[-1]] = "N/A" + missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'." + elif isinstance(item, (dict, list)): + _traverse_and_normalize(item, new_path) + + _traverse_and_normalize(data, "") + normalized_data["missing_data_report"] = missing_data_report + return normalized_data + # You can add more validation functions as needed. From 35c5137ac37464f4ca354cb77b1183047fc41118 Mon Sep 17 00:00:00 2001 From: Felix Jordan Date: Fri, 21 Nov 2025 07:19:01 +0000 Subject: [PATCH 2/5] Fix: Prevent in-place mutation in normalize_missing with deepcopy --- .../validation_engine.cpython-313.pyc | Bin 7840 -> 7863 bytes ...dation_engine.cpython-313-pytest-8.4.2.pyc | Bin 16806 -> 17489 bytes .../tests/test_validation_engine.py | 3 +++ .../services/validation/validation_engine.py | 3 ++- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc index 3d448babed60aed285aa12bfa28d0f53a990007e..4b9ff1210818264abd5ec833e5acd7943ea99f1b 100644 GIT binary patch delta 677 zcmZutO=uHA6rQ&`yV*^4*KBLk?rJnSs4P?ksaB{B0rlc9>C%fXDK%MZVt&f35%i*9 zZ#^Z0;IS7Ey$O5h#hZdh1rI_%(7T{wpy0_j3rdj8;p5Gl@B7|-Z)bUL+4^OgX@V!d zTWHuv`qmtL>7OV*@xhm`s#j7hRUoH{Pbl@tO$*5JYufQb7R%wm+P$!Lf34k!qDE(9?csW>7e=gzPlxzG zdkItVd3{Aevu*1V%3;8B(Ys{HHvSVa_}P%f8QlFsgQS$nolL_fJ1n3iK&&*`5ED@o!(GCXFD65^3+-;b*9uGQ7QTzv e(OZNcG64LPB%u2mfs-G}{Ac3uZ~0eLNdEwNsfl<1 delta 639 zcmdmPyTF$3GcPX}0}vD>DrA0Bc7C74Z$A&aGmBbWzG9Y+jH3~LNKNDmf$Y{8r$eOZh}oM`%B=39cT16sfZwtx$V zGMFlEuqy6!c1_;NJd7=y=P(*EF>*~l&peTpALsaH$|$fofHQ~{sBbZ!3#0Jl7kuX#MK|x@7h~iV18I^55}F)ET$67L zbOZIK3o5dj0EMhJ_X^rE@x%a`ag1?Ham-U0CO_m9o4i4kWAkg_9#)`96_UqsmW3VcDO%G zq9_o^DrNx^3JwkqMIdu-3FhS&G`=A{>f0L7a?DlLJ;uLgz>VhrNa z(=#V#PR?%dzab@afkpf~ivp|7M;As0R+$+t3ns_PigAM6pvmZ`$uqe|RyCwZ3dk+8 z0TG~ZECPj95y;bE(^oPSft?I?0f$X)eoARhs$Ee8P@WNpiw^>c56p~=j31ep7+Jn^ SFfi&~X3&|;EVqFLqz?d-OM&?S diff --git a/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc b/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc index fc903b96cdbc1fd83d79e7f94498413dbf7eccf0..469c3018099abe8f52b0662862152251ab714635 100644 GIT binary patch delta 2245 zcmb_cO>7%Q6y9C$ZXN$6@n2$Ur|bSWwj0OJkKGc2YtyPEA%P}~MUWMeP3$<95<8kr z1!+Z#gpev#NJ&RfAud5kNEL?yd!cu@pbC^!(8zG%fd5}| zerDc&@4b2R!Rz$qHQKggwVEihKAabd4%fPEn+dY(KaM^xT14s@s>EcN2?O}|uM8EZ zHA~5m<#b<-m7Pov3{g}$6Wn#?zNIJ}W`uUpn&J9s(KeaxXX32p6`Qjr{5Rc!*9NT3 zhbbz{>mes>_&t}ajnzM=hm+i}>qfZ4o~intqs3;irEw&DnQ_x_4u8a0qb*`sE}Ln8nQHp7*ZMhGP}@OVRO=oRI!#(u}~~Yi_@Tn z%oTR7yu4a5=JNSvkns2H8+Ie|l4GbHpaZ~%%Up~%5>XKF``laRE+WEhkYA$lS-#-j z>7}@qZD;$ow`RW#%*fuwBnuYu9?r)0~uJu=0Vi9Dus;b*Dv19r}f#y*Og}GdMu3As)e$AOXM+Z~`Cz z&`BUf3V=t=ec>=sD(2Mk>hydWElDe

5J$tq_8`&`H?i!#~?SCTV4E0i7dyT)Atu z(R2h~ae9<}ip{E=fhkgn_kyGm1?fD13!pAza6An#1fVh;0cjLq3@1F%z_TD;02l|DARt=^ zXoc0wi?4h9+>69KtbFXrYen@drweMN+)hWt{5n@iuJpo85lXED2R0fB824BKerI)%{L=zI#JdF}$*R>0nKtc`)B- z;=IoHa^h5DzF#JM+&qL*eocH@OY>^AtY{ki>ICILI?yGUffIFy2OJW>-T>u6-7PPf z$$De7krcQEvh7vn^W+`c9wB1LQ8CV5EH17TOENl&*HV47fImwOpM&RJeaH~J3J9hT v!3|gM5rQruHFb!0i$V}fdk2k!OXb|f#e5PiF+Ge&G;ixh3 zN?ttZ#fzG7(Zs#@bLgM7n75Pf z5{ffX2izetd?eNI&?&=NrxogWJK^CC@xZ5$%{)S)j%Zbh(<*O;C;yxZyuG0+y0e|% zZ|1m+5Uq~Zj0!3-dgA3OSoSz!tyAJhMOf^0nKPytPI2lk@iKwD`5LcS!BlbEYAy)L zL_R_5;C_WFu$?$~UvYPb!VDzUK5>w<%!oe^aJ7E;X3VseQVDmoKDeUOW&Bw21>1-k&qAhGhvoMkWHtL5FN0Y6e z4IRKa5#CBi;g#bZX@v>jZv8>kNuk-tW@sDAew2153)Vnem#Vg@s!CNH3__(#mb%dF zM(IHbqa1>M*RyCjSUQM49+W*OGLxlBY8=^%1BX$Lpd3ZPZ=!uD%`hkp6F)3UE#i4L z(5t_e(&R7qvDyKl-Pij$uZ9L3-|fz}YpFnn(4*m(-g zGbm?KMnHB;458cY6)&)XL4Djky^gTjxQHEGe!L5X z+WfSZkI$wwEtQ#xC$m#=Z7!Ef&(kqBdyYL?j3s)PUek1n;p!8T3g3R-Wt#s zfxd79D2k(8v|@@ZUSbTF^qWe2Jz8pTSiE8&V6+OR#N?DwXmp#bG3WnljdjBjuRHkquCA>8I5g`UB6kd|<$ z6l6Z!Xq#e7v+U8HvOLx6FT?lB5RKy*MvW)X+B$VT<%w|d1jlGF)5=W6S+GpbrPBFSMx!1$6bTYHWFtcZ_-mv1f1!9uDP9w!3MrNV r#eh@%SYy8>j^bX_s9{U*Y&JQUo<2fn;di7_Y$KuX Dict[str, Any]: Returns: A new dictionary with missing fields normalized and a `missing_data_report`. """ - normalized_data = data.copy() + normalized_data = deepcopy(data) missing_data_report = {} def _traverse_and_normalize(current_data, path): From 3ff7ce49465f9dfb1c8921f33f4858d5f0a41425 Mon Sep 17 00:00:00 2001 From: Felix Jordan Date: Fri, 21 Nov 2025 07:31:46 +0000 Subject: [PATCH 3/5] Fix: Correct path normalization in validation engine for mixed keys and list indices --- .../validation_engine.cpython-313.pyc | Bin 7863 -> 9041 bytes ...dation_engine.cpython-313-pytest-8.4.2.pyc | Bin 17489 -> 17489 bytes .../services/validation/validation_engine.py | 64 +++++++++++++++--- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc index 4b9ff1210818264abd5ec833e5acd7943ea99f1b..d5ef9b23f1bdf891a43f29ef05cc4db482c2f7d1 100644 GIT binary patch delta 2368 zcmaJCZA=?ia^8AvukF|*m~R8!+9oz7T=U7m8xlrDpm4g8e3MRMin~UN%w@H2)w&Zrtr5pmZJ!<_+4g#bI<@6_5=(6XGTnk`q(8AqbiiC8q5 zxG1o3L9Qof$cgI`xkojc@CedwM04<<5loHg##0$%C{2g%qYSRUOz)*h{q=X59~g4z z`s;gEH6{k-R{ZYY)X7to8B!_xt1goYIaM`4tn!alJqM}iC4jL#ssj~xI6#V&6$Yt5 z1Ym{&xbzxNEWqh_2C=@A7HHlu(IQ`}b~A%iv>eS0a@2%wE16l2@{IxvlITN#)0KjD z47ZCADs;BQ6<0fO3{M}x7%Es{#3)hwL87b>FaunC*InFz7-|IZ{Je3P3YGzwfCa%M z7`J6^DrD|0xEUmY4>wc)mftm2x#Il>f$JG1rKU}s!Y4lkb4XI7R7JO11??Dp0QKUHS z=?b1!+cZc8yXA`7?)_Bo3|=p*9Y)`LX< zls8|BCND&!`1u#&LQ=|8kwjj<$>qxuQe-+RB}?0R=GXC;q==Lh^R!B#_Ef&Aq=b@4 zLMZ9Vm+Ou^U4@a@Ct+@GPE`MTVp#sB&a3|ul2V;UW^4~VuW<5d06I^(4@$#a0?RRPJ`T0Q3(ek$cnm^^pI6U)5*IT+$r{C|nreB~I z`Z2d#n&qQk7*={eH!Hqin)R%^+S45aw~|@cbBgP^$GO}5g`EG0;y#Emn_K_t=(z0- z=A7QFvu{>$_APg=Xm2}DZ0Nv!FzfOwF8`90arG;%<7?d6^!VlM_>3|>lm5+1nei(R z0Dez;sh@CkNgs8uxa82zeN1s5Tb5S(*4!s^zM$d@tvbCAnqc3-dp+PjLN;Oq4ond2 z?IN>Q(SE1$sRjBk$@k>1>&uVM5MaF<8nEp9_dku$bcRT~a6-N`7=tysFROKkMfLwL zo>h?^Q8O*78e(`$IxZU;nw~j_{44lLzeMyOd}FJi`lYwOmiT2RF z5lTMO;4CZ4>m!la#mPuS96$s4)rQs+JxGY^V=6waB9DsBA$kWy^%*RTah^H!!o}Fk zblfk-QKsHR=^I2F8ba`urwDzY0b=|f_Q?+$-fSKt#5R9)!MZTHP;>Rf20k*xwwO%y QFIiK8cmD9N0$*_cU;1Va00000 delta 1198 zcmah|O>7%Q6rS0g_1fznk@{yH+f8S(d{rYswg~y>VN*m0Z;4lNo&}t=vhc^z2(&#iCExwD+BIqrCV*Jqv`-}IRc6hT$&ugdq5kdqztD-e6#L%RfyeipA!Zi&o zq~scgX7ab|IuDZ`5|*2J7%`E!?1cgHx_sV{{1SkeiM(ZTkS}DLCAkH_R71tiRIH-L zUFB#6x{h?;bDKKShS{bDNZAGrXgmT}VFR&)$N&JZQPL|P#l%S`QCyx&=Z~_J&rpIe zD?cHN$28XVv@mjkeJ7iOvsJtdjw@3<1b`VRtQgNS8?w}QN9Y}<%ziMvc3fHkFTfE?qj(CWK)^^Y`)+Un3wg)) zyc1udbnhfvUNJ#Y@?r*e^)k}#k2tcPY|i4S6rp;avhRL z%U>&%_P)~mR|}sle7?B-ay2wwztC59oU5yWn%Z4ay9;dvTveZ|so{zm{@Pwuhu@F> z+}4qwDeZMt_8=bU!I7S zUwo+=oT1=>3ABgyCE)hwR{X2?YI0{q?ySjO6}hWE7%96iJ{T-WrQpZ5YRB~*cX%%} zNU;Lm8m+kd>pe+&zPz#O=>EaknnNFE9!%vHYtAd*Iu{T|;8b=in6Kcr@#hmihT t97K Date: Fri, 21 Nov 2025 07:48:35 +0000 Subject: [PATCH 4/5] Fix validation engine dict assignment to prevent overwriting lists --- .../validation_engine.cpython-313.pyc | Bin 9041 -> 8930 bytes ...dation_engine.cpython-313-pytest-8.4.2.pyc | Bin 17489 -> 17489 bytes .../services/validation/validation_engine.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc index d5ef9b23f1bdf891a43f29ef05cc4db482c2f7d1..dd86c2317ffa715035fec1fbf19766845736ca1d 100644 GIT binary patch delta 191 zcmccU_Q;j@GcPX}0}!mvRmhyMkvB?|iyg=l2I9{}n+rwHF*5p0b`Y0lw4NLz?#j-X z_>O@glyh<;kI3Xb;)0AfC!Z6qWt_0tN8%+TIHXNA;L-w5mQW=fWklkRL|sVw^>R? hhl%Mk(`FC3=}eQ^6w^3-OxTW!v-y~8E>XP82mo+pE{XsE delta 240 zcmaFldeM#dGcPX}0}w3DRLDHGkvB?|iv!3L2I9}VHW!MXV`MCt>>w`97(Y2i+?Aav zQHX&dlyh<;kI3Xb;)0BSC!Z6qW!$paN8%+T Date: Fri, 21 Nov 2025 08:12:17 +0000 Subject: [PATCH 5/5] Refactor `_traverse_and_normalize` for correct data updates and type handling --- .../validation_engine.cpython-313.pyc | Bin 8930 -> 7309 bytes ...dation_engine.cpython-313-pytest-8.4.2.pyc | Bin 17489 -> 17489 bytes .../services/validation/validation_engine.py | 78 +++--------------- 3 files changed, 12 insertions(+), 66 deletions(-) diff --git a/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc b/backend/app/services/validation/__pycache__/validation_engine.cpython-313.pyc index dd86c2317ffa715035fec1fbf19766845736ca1d..85ec5fc5508f9233487e3a4d64f517eb772e0c3c 100644 GIT binary patch delta 1362 zcmZvc?@t^>7{_OJXYUq{Tkg0$pm$f;A_6N=sgzKXibRS9(*kmAbCKB1mD|vx_k+yd zX>FoCjq!zziF9Ie_0^d)@vx0z?hQs0iEs3sccFpnrmyrLAX005W1Lwyg~mzdyU*;* z^PT52ncZI}J|2)bRp==#qd$3gEJ$a!)!3WwI;%0-H;ljm^;A z!dyaBR1v$#Z=PGQk)*vR;81DRJHox;pffz}d+$@Z%wI;If(1xJyJ=wPAWp#nkVG1< zq0?L#2EekI2Vjbu=R1JLu~C>GLi?Et(BK?TJ{RPUFelJ=H6sPgBf1cE({~!| z1ga2KOZ{>m>{~oYwGEt9eYEdgpDQBGji#_^SJmH04|__rNdiu3-uIb z;vH(}u9;|D?W`zO@__2P%^_$`O5+%06yj49;=dH)|5G?w?T0ANAE6MRrs#_uP;^H= zJVcSMQ7i$~Ic>p{Sf;k*N$||1>cp)y#>c2wLOf$6O^Y`(*b=D3S1go>n;EOlNM(}- zj%EziWjR)&*hpophHJ*#dK&BTRLr8W<#RKPqvusn^0;km&5Fvz*FSM znl>cNfJi`P3tSdjZ|e z+@CKbcKm}|{=tWGZ}GyWye-et9hBi+9!P!I%ML))b(~ITsImjXuToJtOm8Hx9NCo^ z;}NJT#BUfeolZopItsmFr{k$wvdOL@#UC59$t`cHmQ?#A;90uj1KQ09 zmA`V}u?$+;^BI!L%~S)SGHQ8(Ji=>mw+A$Z@_O0RpPPD!g88Y!_>J|KHeY^#&R3Cd z2L&o9kRK_$UI`97K&PtxW4W2KtX9x-Rac;l0ydrsr`1Nwsq3-yqOMyUHprJJo*8F& zILPEI6Au%G$q*A(-x{#<_Zh^q$#g86Fh=nuI>ml9=1;`avDWo04Cc=je1*kA>z(Oc VK!?T5`uK)#eRA#HKiJSV`3H+DHw^#) delta 2766 zcmaJ@YfKy26~6N@et^xx#ym{IV4E1n;b~%TOk&=Pv&BGchN42^b+D6|H3OM3-R)K? z{OF%*wQ9E0R&3R(pl%h@ts-x!RNAU7RkIRmwW>cF?YEh;AQ;|ozYX7unY{pBn zQm^DQ_ue`8obR0To$-D9(qD&lPqkVNg7N+W6}#8+f$l8~+uZsdb_K`E_#E*LR=G7# zeoB;IIWaSJ9yNWI2|5ou9R>3b;Hes%XaT1&8mEbFB8b(adDQ|oiJ1|Ng7`(epiiMme3A$fHdsy) zR%9o`=eMpKE?}*ixf7MVq7tMO-&3{(A@2(=XK=pVSf~6I0@DEnKW?n|>XGlq2rYw& zB@2zU>F2Ucev6S`#wrWN}lYvtH zu10?blVfB`=#n4`5+q>rRU^bGX$Aj7MfFu>2a+RX6#g27N)d&E@RWvCB)}tv6hqogRheT-+TKtJzk_ZIrF9q|O3EW>^2#P6XG^fy)%IOC5 zotW?53H*=fvKh#EVbDdTpt=>{czjR|9jy)2NEPcXkxj^M!(e5mu6SA-R7+KqwHVbY zMk@+g#Um8|wy~CX8!LOj0j-ctMW7wp5u+H#&Z1L*6tXc|sH z2`MwuDV^*o4BQ72P3KAT0HNt=iA+*UcEDWPE$jg0ego%Dg0~gvTC@YlXenewB4`0E zQ%;u`s6AfPWhzcpphAuXs`uSO`k6`wrz=mzThuvKfGYl*Mk8dbqAEf?N323l>)fA9 zWXDM&PX%oPuvJCq23ScwW;6tg?1FY+r0u{NenZhvG6%#g=!|{Vh7_6(NZEqitQA-` zg8BKA#w@nt2<`_;ohrWig=RGML$yK6dD4>KmKjb-yI#2|Zv)dQ_%WJW$gUX^u{Kts z>L>q&!5eE|T6t65GCMOHam-$~eDp+U6BKv&b%l z|50t5Q!IZ06=ww8)aS?Q>MAS!&9Sq5Rh?V;EtqmoaektX9y)(1ZLsBZC)nmP_Wy>KqowW$*fZvKNh z>g@1=;qsFj)aZOxi*&VXW$#6@TF0K&@u;yS*^uQQ*PUD0vtHVc{BHi2^S_So-rV<2 z)3G#YU#<^yn@ZU&1GzZg6#`A*l2lKXnA>u+a1_%8P9qAf+w(Nbw6uH z7kVD|qLx1FID(;zH!-MuQnhdBd{o`IPQI7;>EwDmTkYJhc7AMdB!{wwo+rx~qTI&> z(p~uX6AfzqBF4e3YXyC%){J(Sd{|QgGT-Fvb%~Wq5O$KyU(9Bujr{HniKWsNit6j@ z%hOk^Eb?8xsj;tu!q+9umq7ey6+JVen#Q#R&#aE;>KoEz&8Os5OmX?~6pp$S4QoT| z=6$j`*V?x_o;KJ8@_eqeK26rM(*Ls^8?%>-%1|h>I2Q`B!?49qn=FGWn6M3!fXfYH zrK4U}nk^ik}(>{Y?CIFycpIfheckCEG WotP<^NDXf5Q@+iw{WHJL+xZ`!d5!)6 diff --git a/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc b/backend/app/services/validation/tests/__pycache__/test_validation_engine.cpython-313-pytest-8.4.2.pyc index 4bbda00b0b695e2e83633d3ff9e095bf2c9bdef7..bc5e51c46125f7c9c92f06b7c372fae34ef2129d 100644 GIT binary patch delta 21 bcmccE!FaKQk?S)rFBbz4JSg1A Dict[str, Any]: data: The input data dictionary to normalize. Returns: - A new dictionary with missing fields normalized and a `missing_data_report`. - """ + A new dictionary with missing fields normalized and a `missing_data_report`.""" normalized_data = deepcopy(data) missing_data_report = {} - def _traverse_and_normalize(current_data, path): + def _traverse_and_normalize(parent, key_or_index, current_data, path): if isinstance(current_data, dict): for key, value in current_data.items(): new_path = f"{path}.{key}" if path else key if value is None or (isinstance(value, str) and value.strip() == ""): - normalized_data_ref = normalized_data - path_parts = new_path.split('.') - # Navigate to the correct nested dictionary in normalized_data - for part in path_parts[:-1]: - normalized_data_ref = normalized_data_ref.setdefault(part, {}) - normalized_data_ref[path_parts[-1]] = "N/A" # Replace with placeholder + parent[key_or_index][key] = "N/A" # Replace with placeholder missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'." elif isinstance(value, (dict, list)): - _traverse_and_normalize(value, new_path) + _traverse_and_normalize(current_data, key, value, new_path) elif isinstance(current_data, list): for index, item in enumerate(current_data): new_path = f"{path}[{index}]" if item is None or (isinstance(item, str) and item.strip() == ""): - normalized_data_ref = normalized_data - path_segments = [s for s in re.split(r'(\[\d+\])', new_path) if s] - - for _i, segment in enumerate(path_segments): - is_last_segment = (_i == len(path_segments) - 1) - - if segment.startswith('['): # List index, e.g., '[0]' - index = int(segment[1:-1]) - # Ensure normalized_data_ref is a list and has enough elements - if not isinstance(normalized_data_ref, list): - # If it's not a list, and it's an empty dict, convert to list - if isinstance(normalized_data_ref, dict) and not normalized_data_ref: - normalized_data_ref = [] - else: - # If it's not a list and not an empty dict, this is an error in path or structure - # For now, let's assume the path is well-formed and the type matches - pass - - while len(normalized_data_ref) <= index: - normalized_data_ref.append(None) # Pad with None - - if is_last_segment: - normalized_data_ref[index] = "N/A" - else: - if normalized_data_ref[index] is None: - # If the next level is None, default to a dictionary - normalized_data_ref[index] = {} - normalized_data_ref = normalized_data_ref[index] - - else: # Dictionary key(s), e.g., 'root.key' or '.nested_key' - # Remove leading dot if present, then split by dot - keys_str = segment[1:] if segment.startswith('.') else segment - keys = keys_str.split('.') - - for _j, key in enumerate(keys): - is_last_key_in_segment = (_j == len(keys) - 1) - - if not isinstance(normalized_data_ref, dict): - # If it's not a dict, and it's an empty list, convert to dict - if isinstance(normalized_data_ref, list) and not normalized_data_ref: - normalized_data_ref = {} - else: - # If it's not a dict and not an empty list, this is an error in path or structure - # For now, let's assume the path is well-formed and the type matches - pass - - if is_last_segment and is_last_key_in_segment: - normalized_data_ref.setdefault(key, "N/A") - else: - # If the next level is None or not a dict, default to a dictionary - if normalized_data_ref.get(key) is None: - normalized_data_ref.setdefault(key, {}) - normalized_data_ref = normalized_data_ref[key] + parent[key_or_index][index] = "N/A" # Replace with placeholder missing_data_report[new_path] = "Missing or empty field replaced with 'N/A'." elif isinstance(item, (dict, list)): - _traverse_and_normalize(item, new_path) + _traverse_and_normalize(current_data, index, item, new_path) - _traverse_and_normalize(data, "") + # Initial call to _traverse_and_normalize + # We use a temporary key '__root__' to hold the original data for the initial call + temp_root = {'__root__': normalized_data} + _traverse_and_normalize(temp_root, '__root__', normalized_data, "") + normalized_data.update(temp_root['__root__']) # Update normalized_data with the modified content normalized_data["missing_data_report"] = missing_data_report return normalized_data -# You can add more validation functions as needed. +# You can add more validation functions as needed. \ No newline at end of file