From be31a5b96df2e8151c83c86872f93d1753bf5e15 Mon Sep 17 00:00:00 2001 From: Elisa Falk Date: Thu, 20 Nov 2025 11:55:36 +0000 Subject: [PATCH] feat: Add LLM-based code audit text generation --- .../__pycache__/nlg_engine.cpython-313.pyc | Bin 7195 -> 9471 bytes .../prompt_templates.cpython-313.pyc | Bin 4986 -> 6024 bytes backend/app/services/nlg/nlg_engine.py | 35 ++++++++++ backend/app/services/nlg/prompt_templates.py | 21 ++++++ ...st_nlg_engine.cpython-313-pytest-8.4.2.pyc | Bin 21617 -> 29230 bytes .../app/services/nlg/tests/test_nlg_engine.py | 63 ++++++++++++++++-- 6 files changed, 112 insertions(+), 7 deletions(-) diff --git a/backend/app/services/nlg/__pycache__/nlg_engine.cpython-313.pyc b/backend/app/services/nlg/__pycache__/nlg_engine.cpython-313.pyc index a6e9a95427e5bb94a62b84200d80a7752b014605..0454e6e12008a2ca7113a91995683598c564407b 100644 GIT binary patch delta 1865 zcmZuxUrbY17(eIU_O`vfEtK0@X^|d;szs~_Q~|{Vg({iW$)yv8Xs+#Txvt!@)7B}= z!rWfWk|i!DCYv+rvc$}%{o#crdsqk+gL3 z&i8%i&u88L4mlq?93r6OVLYJkZT!aRhMWXR3;eBp+&(UhOI@6qYL&VdX6A9BOHgDV zIAg*t#f|IIp3u5kk&BA(7C9=NgSTdXl)f;*zFEWd4iEn!58dI~*JvDkm_dxe5wgXT zkozf(g?PM!i4cRn*=wE_UMef_J|@{ueqR6K!6>E6wnR3mol2^Cby-vkNj+cY37+bz z!$Qdi^m`np(q@3bWe|s3Rb{H44X*F<_SW@ zq?miSBTy0ITs7J&EWP@woni@d20zB~DGvo!S2IjwwyHF4>VoYc&JVzy&@hkLtMbSS zfnW$K)(Kk6S$LU`nI&KdjUX-zJILpDd4stca|ry;$`ZGbBceyPwS(<2vQm#CvfX5I zXeS?u0bWuZ)M9pIK|3-)xbk^zBWk&T7VVA zKBeCUa?~!H4*+1;h;9$m)WmI0(8(#aimQi1fI$1YFx`*37sCf=J%+8{_Ns-*?{?W+ z)}~+z^zi|Zf=!?euq23G3eH$7vR!-%hUDp=!~>e4VKuMikctx7$tkQ&Xhu#ytI=6f zL*>d@}`o!X{oVe zY5lu@nfWcw$AEX&JOaF<=BX3(Md4NVb>9`=jiImAyY=07BERlEc(vtv>y_3UC+6T? zU+*1_dew^;9Zy8yMn%+hPmWx)K5#XPpPVbXdcN$VKsuc14Z-90d?$+T6H8wIeQ#^Y z+gj|66-QI0t?7H-3G%%&*fd;JMvBAn;)XYhs&+{#)uvW$mp1NPYA6s(&Gwmn=BNkG zTi`&GFmL4Vs7bobNsK!! z)Cjw1e`6t;BS$yPcGRt$OIpv4uxzX>O zj{uz(M@Eas$BQS`;&HV!a=Ic*pLxb~MDX7fPO?}L@~wX(%#dIF5!WaOPoUl+FA(7kB77zXi6#r%1DSrB%tiVjAq5a&z4?Tw z3NsH_qR0Ux;yC$-L>G?>h$#pp{4|9o3oB&^Sc3$dK!hKN2%0=W$%5$}!{k#+uUWq{ qaI6d{RjGI)c0O1Biaga>U-y6cS^?I#B z&tLo7>cZAT`f+*g?`rX4 z`BE-zj3Clj1rR0%hXdZ8gwF13`m;{9QM(8HT;<4(A8a&F>DS|&tHB;14S_Or^2?KOp>k?rkLdS-ni9&hc zPeZg05}`~U$V_uHP~N8};oP3`#0otmtV7z3S!$J>tLI2+NkRiL4pG}d!ag601Dm9R z>=TbHTSDq~9X?CZ$m|r3oIac2R3|0P0ZmWzkn2qOO2kjJ+C{+}gGvUv{3wF5<^*kb zrnDWm&JU>|GBVgRv66x+mn7~I5Qb>YOkSO<0}_?>kakEc_V5%uh^1h(Dc>GG!=+}&-y zZa>dZ7wzS(#qwIV4`0{rE<`KE_^etlBd5zpm+bK2xl2*)JYDTY8vbda*BY14Z?8-m zOJvS1U14*T%$YA&-nObPT3h^OZ_=$6zpuUD+@h56;=xF`p7;ER0B{?{AL|{aH!tT?XJzD`d|o(nvyX^8BbP8xmJNuD!zY)DYRR$7^L!9sR{;^KK!QV? z=VK;QETbS>gXf1Drpbpyf9Q$=Ro`OEE>A2xL0m#IiILHtaYpb51~6km@D~W str: error_msg="Failed to generate social sentiment summary due to an internal error. Please try again later." ) + async def generate_code_audit_text(self, code_data: Dict[str, Any], audit_data: Dict[str, Any]) -> str: + """ + Generates a comprehensive code audit summary using LLM prompts. + Includes clarity points, risk highlights, code activity, and repository quality indicators. + Handles missing audit information gracefully. + """ + if not code_data and not audit_data: + return self._format_output({ + "section_id": "code_audit_summary", + "text": "Code audit and repository data are not available at this time. Please check back later for updates." + }) + + # Combine data for the prompt, handling potentially missing parts + combined_data = { + "code_data": json.dumps(code_data, indent=2) if code_data else "N/A", + "audit_data": json.dumps(audit_data, indent=2) if audit_data else "N/A", + } + + template = get_template("code_audit_summary") + prompt = fill_template(template, **combined_data) + + async with LLMClient() as llm_client: + try: + response = await llm_client.generate_text(prompt) + generated_text = response.get("choices", [{}])[0].get("message", {}).get("content", "").strip() + if not generated_text: + raise ValueError("LLM returned empty content for code_audit_summary.") + return self._format_output({"section_id": "code_audit_summary", "text": generated_text}) + except Exception as e: + logger.error(f"Error generating code_audit_summary text with LLM: {e}", exc_info=True) + return self._format_output({ + "section_id": "code_audit_summary", + "text": "Failed to generate code audit summary due to an internal error. Please try again later." + }) + diff --git a/backend/app/services/nlg/prompt_templates.py b/backend/app/services/nlg/prompt_templates.py index 13a6cdb5..a6806fb2 100644 --- a/backend/app/services/nlg/prompt_templates.py +++ b/backend/app/services/nlg/prompt_templates.py @@ -67,6 +67,27 @@ def get_template(section_id: str) -> str: Risk Factors Data: {data} """, + "code_audit_summary": """ + Based on the provided code audit and repository data, generate a comprehensive audit summary. + The summary should include: + 1. **Clarity Points**: Highlight aspects of the codebase that are well-structured, + easy to understand, and follow best practices. + 2. **Risk Highlights**: Identify potential security vulnerabilities, performance bottlenecks, + or maintainability issues. + 3. **Code Activity**: Summarize recent development activity, such as commit frequency, + contributor engagement, and major feature implementations. + 4. **Repository Quality Indicators**: Comment on aspects like test coverage, + documentation quality, and adherence to coding standards. + + Handle cases where specific audit information might be missing by stating + that the information is not available or could not be assessed. + + Code Audit Data: + {code_data} + + Audit Data: + {audit_data} + """, "team_roles_summary": """ Based on the following team member data, summarize the key roles and responsibilities identified within the team. Highlight the diversity of roles and how they contribute diff --git a/backend/app/services/nlg/tests/__pycache__/test_nlg_engine.cpython-313-pytest-8.4.2.pyc b/backend/app/services/nlg/tests/__pycache__/test_nlg_engine.cpython-313-pytest-8.4.2.pyc index 7e58695aaafb44a681da0aaf14fbc16c1367fc69..624730879e2dcd6bc847d381080b4264bc4bab46 100644 GIT binary patch delta 3487 zcmeHKU2Gf25xza1$U9OLe?(Fw_2Xk?S=2vWNo_2L60LvBiAeEG!$}dStaMUnNu+Y5 zoybKYIWKu~U}gFfCuyoCO%NwYP(wxH^r5vLil#;i@I(O;h$3}b14SOv$~B52MjtwR zq8V9*`(vOF0lL_mo01e-ynBO`-7=ev3=VQ)U&8KRE_1W}y378T+2xO# z%;jFDldj=VYN*1oOR*0XHs>ydI87Q{s~@szH+w*UfcwDe+L5c=>bHA6oQ@my!^frd zaCNYhG{A2Ld&ya_4*kNkjILk7C#|yZS-5ibuR~9f)zEMYAs$E`^)^Rs+T9t0%N^C z_+72wPU>TN$y$OoNrPldJsab@h8ZSx5v~g@f;}d55UKp+)1dY>L0f4fT<8(plB1Pv z(W8M;a!Rh4F+eoQ1Y&&?q^bqD*kkb+f@J;$sUmeoa#QEj@nQPjaqQ6?d+)=sL314A zV7cE}`%$z8eeM}aUaCxO#mrh9e}Xboi_;z}!#Ju=p6_5gxgEGv4H45Aq#OcVdZdNE zxiCTHlLJ0d3Tp!mG*tNYfX`>{Bn6Y2R!)o5BJGpJn57W6mBvl1u-sE2lmXi;OR%j|KUqy0O=}S$@V*K=EA|s|x&&|b^g)XsgHcqi( zA}vOzQ>n>TaTuM6cYKR)D-Li06|Nt{9JR&fYTMMa)Zr4yNy zvLHTrIzEdFAntQI@^%Uy*ESw6Yz$WqdF%H7n-9{y2fh(*f%Yz+$2~14WhI`G$9K8Q z$WLb=5ZVtHy4>O=n=*lc<^>Hf8PCM?#+_X$uS~*o{(%x3B1G5@xEAX&aym19FR%Ee z6BBYet-Ou~KgB2gG5)TfGE2-2b4|`HE)8zk+$%?~=+w%Gvfg0U+P!pi%kEiu?8*tX zsy|yfkhKjija=t#7am>lY%@$toJH#UCZ)~^q#WazWZ}r+_RR zy^HAd+j53-C6?~vFO(t zhpSMyrWa6u%|a+|H;#ygYgLAkT8Nidu&=OKDXSg`FiCYq}}YP&%pz7>x2m z5M9-^-Mi8}p1kXr(7Y7miVUCkw38n}OXxsZgHcOj{crTv=-{}n#z-y^vzf30>*Sz9?~sk3s^*`Yc+e)4F}d2G{iY-#wnF7NkG zJ%4J&opUWVE`_g~ye~YYn*3QVlx+^}P}LOLl*f+KDg)o)VphaAfLCjwS#`-Y%JZ;55pQ#-%lh?_B zk$a=RZorIPGHCzj{`frKZqk|~wm#iX>w_OOHQZ@@L#w~!d$~RZ(4q1jO1?|U_b9nY z$%~Y{M9IsPG*fbxl5bJcMM)tMw(F}&57>svjkGxJRAD&ikJwib)L^~qH^(vs?{;}% zHe`8hPpuoDot+z(6(yx8Z=+!jpY+e}qb}VL%HZ};nbV*}T#-RNtvw_Hx4*Lg|M#7HUZlQ6_nms*=znywz#eWUw3Jq{ z%3E+baL_`}jPfeY{bD!wwB4&9Le)Wc=p6+Aff}gyaagd4gE?~KOLtgoUo>jv4Gg}) z_u408#7Y1A&~7yfZ4Fu-QQm<+MyrhGxO5q;>+Aq?)9a1a9dV*163hx4oZX s`DZK7Rc;c8N*p&?flQOaGG(`2d)CcGYvi_T_N?CVeUtiD9Jup802F=v(*OVf delta 482 zcmZ4Ygz@7FM!wIyyj%=GU}qwqxlL{&p9Iq!ri~hNm{`&oHH9}9FkiG|RGM7Kp}aZO z=?KeYU2oCJ6Oy$i=XmoluAE%y?KF9dhUnxA-mAGL1I?HR#KlV|FR&KfJj|Xqn1(n4a zO~6+El6b;2xrSSxQEc)WZgWxIP+k+pVDX3=6DCs@keeA8f+Z#|$O@cX&%-%6KFN|% zVsfFI;^YHfoRbygLU|?iSajJK81ndnrGlj=OYlzRlkqER2I?wm0TB}?*W~C-p64yg zS~LmBpS;dWce1_@+vGrNc}Bg-h1Rk%IXSuUsksFumGQ~>c_pcNB}G$#3O4|8@g9&b zc|Ni-3$QhKeBff>6q?+Z>$v%Xj|be_2AWO!^id