In [35]:
from langchain_ollama import ChatOllama
from langchain_core.messages import SystemMessage, HumanMessage
from langgraph.prebuilt import create_react_agent
from docx import Document
from docxcompose.composer import Composer
from typing import List, Dict


In [20]:
annotate_agent = create_react_agent(
    model= llm,
    tools=[],
    prompt=(
        """
        You are a helpful agent how analyze a given document and provide recommandation on the paragraphs content
        
        Instructions:
            - adapt your analysis according to the user's request
            - once you have detected a modification to make, select paragraphs number associated to your recommandation
        output:
        
        the output should be strictly respected, as a json format:
        
        list of dict of recommendations identified
        
        ```json
        {{
            "suggestions": 
            [
                {{
                    "paragraph_number": 1,
                    "recommendation": "..."
                
                }},
                ...
                
            ]
        }}
        
        ```
        """
    ),
    name="annotate_agent",
    
)

In [None]:
def prompt_llm(content: str):
    
    return f"""
        You are a helpful agent how analyze a given document and provide recommandation on the paragraphs content
        
        Content of the document: {content}
        
        Instructions:
            - adapt your analysis according to the user's request
            - once you have detected a modification to make, select paragraphs number associated to your recommandation
        output:
        
        the output should be strictly respected, as a json format:
        
        list of dict of recommendations identified
        
        ```json
        {{
            "suggestions": 
            [
                {{
                    "paragraph_number": 1,
                    "recommendation": "..."
                
                }},
                ...
                
            ]
        }}
        
        ```
        """
        
content = ""
doc = Document("/Users/avicenne/Documents/python/AnnotateWithAI/temp/incorrect_facts_document.docx")

for i, para in enumerate(doc.paragraphs):
    content += f"[Paragraph number: {i}]: {para.text}"

msg = [
    SystemMessage(content= "You are an helpful assistant"),
    HumanMessage(content= prompt_llm(content))
]

llm = ChatOllama(
    model= "deepseek-r1:1.5b",
    temperature=0.7,
)


resp = llm.with_structured_output("json").invoke(msg)

In [28]:
resp

{'suggestions': [{'paragraph_number': 1,
   'recommendation': 'The content is accurate. Please review the document again.'},
  {'paragraph_number': 2,
   'recommendation': 'Water boils at 100°C, not 50°C. Need to correct this.'},
  {'paragraph_number': 3,
   'recommendation': 'Albert Einstein was born in 1879, not 1979. Correct the birth year and clarify his inventions.'},
  {'paragraph_number': 4,
   'recommendation': 'The capital of France is Paris, not Madrid. Correct this information.'},
  {'paragraph_number': 5, 'recommendation': 'Shakespeare wrote '}]}

In [31]:
from docx.enum.text import WD_COLOR_INDEX

def annotate_docx(annotations: List[dict], filename: str) -> str:
    """
    add comment to a docx

    Args:
        paragraph_number (int): the paragraph number to put a comment
        comments (str): the recommandation to put
        filename (str): name of the file without extension
        
    Return:
        str
    """
    
    doc = Document(f"temp/{filename}")
    composer = Composer(doc)
    
    file_no_ext = filename.split(".")[0]
    
    
    for annotation in annotations["suggestions"]:
        
        paragraph_number = annotation["paragraph_number"]
        comment = annotation["recommendation"]
        
        if paragraph_number < len(doc.paragraphs):
            p = doc.paragraphs[paragraph_number]
            # Ajoute un texte mis en évidence (comme un commentaire visuel)
            r = p.add_run(f" [Comment: {comment}]")
            r.font.highlight_color = WD_COLOR_INDEX.YELLOW
            r.font.italic = True
        else:
            print(f"⚠️ Paragraphe {paragraph_number} inexistant.")

    doc.save(f"output/{file_no_ext}_commented.docx") 

    print("document saved")
    
    return "comment added and document saved"

annotate_docx(resp, "incorrect_facts_document.docx")

document saved


'comment added and document saved'