#### HOW CAN WE INCORPORATE MEDGAMMA AND HAI-DEF MODELS

| Priority | Task                                   | Impact | Effort |
|----------|----------------------------------------|--------|--------|
| 1        | Swap Gemini Flash → MedGemma endpoint   | High   | Low    |
| 2        | Implement compliance scoring node       | High   | Medium |
| 3        | Upgrade to domain-specific embeddings   | Medium | Low    |
| 4        | Add citation tracking in audit output   | High   | Medium |
| 5        | Multi-modal document intake             | Medium | Medium |
| 6        | Parallel audit paths (CFR + ICH-GCP)     | High   | High   |
| 7        | Report export (PDF/CSV)                  | Medium | Medium |


In [None]:
# 1) replace core Google API with MedGamma
# in app.py

# input = 
# output = 

from langchain_google_vertexai import ChatVertexAI
llm = ChatVertexAI(
    model_name="medgemma-27b",  # or the specific endpoint you deploy
    project="your-gcp-project",
    location="us-central1",
    temperature=0.1,  # low temperature for regulatory precision
)

In [None]:
# 2) add multi-modal document processing node
# in nodes.py 

def document_processing_node(state, llm):
    """Extract structured protocol data from uploaded PDFs/images."""
    # MedGemma multimodal can parse protocol documents,
    # extract study arms, endpoints, consent language, etc.
    from langchain_core.messages import HumanMessage

    messages = [HumanMessage(content=[
        {"type": "text", "text": "Extract all regulatory-relevant sections from this clinical trial protocol. Identify: study design, consent procedures, data collection methods, electronic signature requirements, and adverse event reporting workflows."},
        {"type": "image_url", "image_url": {"url": f"data:application/pdf;base64,{state['protocol_document_b64']}"}}
    ])]
    response = llm.invoke(messages)
    return {"protocol_text": response.content}
```

### expand the Graph to a Multi-Node Pipeline
'''
This is where the architecture gets substantially more capable. Instead of two nodes, you'd have a pipeline that mirrors an actual regulatory review:
Document Intake → Section Extraction → Retrieve Regulations → 
    ├── CFR Part 11 Audit
    ├── ICH-GCP Audit  
    └── Ethics/IRB Audit
→ Score Aggregation → Report Generation
'''

def create_rip_graph(retriever, llm):
    workflow = StateGraph(AgentState)

    workflow.add_node("intake", partial(document_processing_node, llm=llm))
    workflow.add_node("retrieve", partial(retrieval_node, retriever=retriever))
    workflow.add_node("audit_cfr", partial(audit_node, llm=llm, focus="21_cfr_part_11"))
    workflow.add_node("audit_gcp", partial(audit_node, llm=llm, focus="ich_gcp"))
    workflow.add_node("score", partial(scoring_node, llm=llm))
    workflow.add_node("report", partial(report_node, llm=llm))

    workflow.set_entry_point("intake")
    workflow.add_edge("intake", "retrieve")
    workflow.add_edge("retrieve", "audit_cfr")
    workflow.add_edge("retrieve", "audit_gcp")  # parallel audit paths
    workflow.add_edge("audit_cfr", "score")
    workflow.add_edge("audit_gcp", "score")
    workflow.add_edge("score", "report")
    workflow.add_edge("report", END)

    return workflow.compile()

In [None]:
# 3) implement compliance scoring node

def scoring_node(state, llm):
    """Generate a numeric compliance score with justification."""
    scoring_prompt = """Based on the audit findings below, assign a compliance score from 1-100.
    
    Score bands:
    - 90-100: Fully compliant, minor documentation improvements only
    - 70-89: Substantially compliant, specific gaps to address
    - 50-69: Significant compliance risks requiring protocol amendment
    - Below 50: Critical deficiencies, do not proceed to enrollment
    
    Audit findings:
    {findings}
    
    Respond in JSON: {{"score": int, "band": str, "critical_gaps": [str], "recommendations": [str]}}"""
    
    response = llm.invoke(scoring_prompt.format(findings=state["audit_results"]))
    # Parse structured response
    import json
    result = json.loads(response.content)
    return {"compliance_score": result["score"], "audit_results": state["audit_results"] + "\n\n" + json.dumps(result, indent=2)}
