# Example prompt
from openai import OpenAI
client = OpenAI()

system_prompt = """
You are a senior technical writer and you are writing a industry standard architecture.
You MUST ONLY use the provided repo content. 
If information is missing, explicitly state: "❌ Information could not be found in the repository." 
NEVER invent or assume details.
"""

user_prompt = f"""
The project folder structure is:

{repo_tree}

Here are the most relevant code/doc chunks from the repo:

{arch_context}

➡️ Based strictly on this repo content ONLY, write a **detailed 'Architecture' section** with the following headings:

1. **Component Descriptions**  
   - Describe major components and their functionality, interfaces, and dependencies.  
   - If missing, say so

2. **Data Models**  
   - Document structure and relationships of the data used.  
   - If missing, say so

3. **Flow Diagrams**  
   - Generate a mermaid code to draw a flow chart.

4. **Security Protocols**  
   - Mention any security measures if present.  
   - If missing, say so

5. **Infrastructure Overview**  
   - Describe hardware/software/network infra if present.  
   - If missing, say so

6. **Deployment Processes**  
   - Explain how the system is deployed/configured if found.  
   - If missing, say so

STRICT RULES:  
- Only use facts present in the repo tree & retrieved chunks.  
- Do NOT hallucinate or assume anything.  
- Do NOT invent file names or components that do not appear in the provided content.  
- If unsure or missing, explicitly state ❌.

Now write in a **formal documentation style**.
"""


completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
)

architecture_section = completion.choices[0].message.content
print("\n✅ GENERATED ARCHITECTURE SECTION ✅\n")
print(architecture_section)


In [1]:

from langgraph.graph import StateGraph, END
from langchain.schema.runnable import RunnableLambda
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.chat_models import ChatOpenAI
from typing import Dict, List, TypedDict

from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
import os, ast, nbformat
from pathlib import Path

In [20]:
faiss_index = FAISS.load_local("docs_index", OpenAIEmbeddings(model="text-embedding-3-small"), allow_dangerous_deserialization=True)

# Updated retrieval function using FAISS index
def get_relevant_chunks(section: str, k: int = 25) -> List[str]:
    try:
        results = faiss_index.similarity_search(section, k=k)
        return [doc.page_content for doc in results] if results else []
    except Exception as e:
        print(f"[Warning] Retrieval error: {e}")
        return []

In [21]:
# LLM for structured summarisation
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def write_overview_section() -> str:
    context = get_relevant_chunks("Business Overview and Scope of the project")
    if not context:
        return "### Business Overview and Scope\
                ❌ Information could not be found in the repository."

    prompt = f"""
You are a technical writer tasked with summarising the overall Business Overview and Scope.
Only use facts from the following context:

{chr(10).join(context)}

➡️ Write a concise, formal overview of the system's business purpose, target users, and main use cases.
    """
    return llm.invoke(prompt)

def write_architecture_section() -> str:
    context = get_relevant_chunks("Architecture")
    if not context:
        return "### Architecture\n❌ Information could not be found in the repository."

    prompt = f"""
            You are a senior technical writer and you are writing an industry-standard Architecture section.
            You MUST ONLY use the provided repo content.

            ❗ STRICT RULES ❗
            - Do NOT hallucinate or assume details.
            - If information is missing, respond: "❌ Information could not be found in the repository."

            Repo context:
            {chr(10).join(context)}

            ➡️ Based strictly on the above, write the Architecture section with these subheadings:
            1. Component Descriptions
            2. Data Models
            3. Flow Diagrams (Mermaid code block)
            4. Security Protocols
            5. Infrastructure Overview
            6. Deployment Processes
                """

    return llm.invoke(prompt)



def write_user_guide_section() -> str:
    context = get_relevant_chunks("User Guide")
    if not context:
        return "### User Guide\
            ❌ Information could not be found in the repository."

    prompt = f"""
You are writing a User Guide section.
ONLY use the content below to explain how to use the tool or system.

{chr(10).join(context)}

➡️ Include: Overview, Features, Setup, Usage Instructions, and Support (if applicable).
    """
    return llm.invoke(prompt)

def generate_section(section: str) -> str:
    if section == "Architecture":
        return write_architecture_section()
    elif section == "Business_Overview_and_Scope":
        return write_overview_section()
    elif section == "User_Guide":
        return write_user_guide_section()
    else:
        return f"### {section}\
        ❌ Section writer not implemented."

In [22]:

# Node wrapper for LangGraph
class SectionNode:
    def __init__(self, section_name: str):
        self.section_name = section_name

    def __call__(self, state: Dict) -> Dict:
        output = generate_section(self.section_name)
        state[self.section_name] = output
        return state

# Define state schema
class DocState(TypedDict):
    Business_Overview_and_Scope: str
    Architecture: str
    User_Guide: str

# Build graph
graph = StateGraph(DocState)
graph.set_entry_point("overview")

# Define nodes for each section
overview_node = RunnableLambda(SectionNode("Business_Overview_and_Scope"))
architecture_node = RunnableLambda(SectionNode("Architecture"))
user_guide_node = RunnableLambda(SectionNode("User_Guide"))

# Add nodes
graph.add_node("overview", overview_node)
graph.add_node("architecture", architecture_node)
graph.add_node("user_guide", user_guide_node)

# Transitions
graph.add_edge("overview", "architecture")
graph.add_edge("architecture", "user_guide")
graph.add_edge("user_guide", END)

# Compile and invoke
doc_graph = graph.compile()
final_output = doc_graph.invoke({})


In [23]:
final_output

{'Business_Overview_and_Scope': AIMessage(content="**Business Overview and Scope of StepUpYourCareer.ai**\n\n**Business Purpose:**\nStepUpYourCareer.ai is an AI-powered career assistant designed to bridge the gap between academic qualifications and industry expectations. The platform aims to empower students and job seekers by providing them with insights into their skill gaps, personalized learning pathways, and connections to industry mentors, all initiated through a simple resume upload.\n\n**Target Users:**\nThe primary users of StepUpYourCareer.ai are recent graduates and job seekers who may possess academic credentials but lack clarity on the specific skills required by employers in their desired roles. This includes individuals from various educational backgrounds seeking to enhance their employability and align their skills with market demands.\n\n**Main Use Cases:**\n1. **Skill Gap Analysis:** Users can upload their resumes to identify missing skills relevant to their target j

In [24]:
# Save output to Word document
from docx import Document

doc = Document()
doc.add_heading("Generated Technical Documentation", 0)

for section, message in final_output.items():
    doc.add_heading(section.replace("_", " "), level=1)
    content = message.content if hasattr(message, "content") else str(message)
    for line in content.split("\n"):
        doc.add_paragraph(line)

doc.save("generated_report.docx")
print("✅ Report written to generated_report.docx")


✅ Report written to generated_report.docx
