In [16]:
import os 
import sys
from dotenv import load_dotenv,find_dotenv
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate

In [17]:
_ = load_dotenv(find_dotenv())
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [18]:
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

In [None]:
prompt = ChatPromptTemplate.from_template(
                                        
                                        """
                                        You are an expert research advisor specializing across multiple domains 
(e.g., Machine Learning, Artificial Intelligence, Robotics, Systems, Data Science, 
HCI, Bioinformatics, Physics, Economics, and interdisciplinary fields).

Your task is to generate a complete research-paper idea based strictly on the user's inputs.

You MUST return the final output as a VALID JSON OBJECT ONLY.
No markdown. No commentary. No notes outside JSON. No extra text.

-----------------------
### USER INPUTS
- Topic: "{topic}"
- Keywords: "{explicit_keywords}"
- Desired Novelty: "{novelty}"   (High / Medium / Low)
- Target Venue: "{target_venue}"
- Constraints: "{constraints}"   (compute, time, data, resources)
- Style: "{style}"               (theoretical, empirical, systems, survey, mixed-methods)
-----------------------

### YOUR TASK
Generate a research idea that is:
- feasible under the constraints
- novel at the requested level
- aligned with the target venue style
- technically sound and realistic
- concise but specific

-----------------------
### OUTPUT FORMAT (STRICT JSON)

Return ONLY a JSON object with the following keys:

{
  "title": "",
  "abstract": "",
  "research_questions": [""],
  "hypotheses": [""],
  "methodology": {
      "approach": "",
      "architecture_or_model": "",
      "steps": [""]
  },
  "datasets_and_tools": [""],
  "experiments": [""],
  "expected_contributions": [""],
  "outline": [""],
  "keywords": [""],
  "latex_skeleton": "",
  "literature_search_queries": [""]
}

### Requirements
- All fields must be filled.
- `latex_skeleton` must be a minimal, compilable LaTeX article template.
- `literature_search_queries` must include 6 academically useful search phrases for arXiv/Google Scholar.
- Respect constraints strictly. If the user's idea is not feasible, propose a feasible alternative BUT still keep the idea aligned with the topic.
- Keep descriptions short, precise, and academically appropriate.
- Always ensure output is valid JSON.

-----------------------

Generate the JSON now.
"""
)


In [20]:
# Create prompt
prompt = ChatPromptTemplate.from_template(prompt_template)

# Build chain
chain = (
    {
        "field": RunnablePassthrough(),
        "topic": RunnablePassthrough(),
        "explicit_keywords": RunnablePassthrough(),
        "novelty": RunnablePassthrough(),
        "target_venue": RunnablePassthrough(),
        "constraints": RunnablePassthrough(),
        "style": RunnablePassthrough()
    }
    | prompt
    | llm
)

In [21]:
# Example usage
user_input = {
    "field": "ML/AI",
    "topic": "Advancements in Federated Learning for Healthcare Applications",
    "explicit_keywords": "federated learning, healthcare, privacy, machine learning, data security",
    "novelty": "High",
    "target_venue": "NeurIPS",
    "constraints": "Limited computational resources and strict data privacy regulations",
    "style": "empirical"
}

# Generate research idea
response = chain.invoke(user_input)


In [22]:
# Print output JSON
print(response.content)

```json
{
  "title": "Resource-Efficient Federated Learning for Healthcare Applications with Enhanced Privacy",
  "abstract": "This research proposes a novel federated learning framework for healthcare applications, addressing limited computational resources and strict data privacy regulations. Our approach leverages lightweight machine learning models and differential privacy techniques to ensure secure and efficient collaboration among healthcare institutions.",
  "research_questions": [
    "How can federated learning be optimized for healthcare applications with limited computational resources?",
    "What differential privacy techniques can be applied to ensure data security in federated learning for healthcare?"
  ],
  "hypotheses": [
    "Lightweight machine learning models can achieve comparable performance to traditional models in federated learning for healthcare applications.",
    "Differential privacy techniques can effectively protect sensitive healthcare data in federate