In [25]:
!pip install langchain langchain-openai langchain-community pypdf chromadb python-dotenv tavily-python --quiet

In [26]:
import os
import shutil
import json # For pretty printing JSON
from dotenv import load_dotenv

from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# --- Configuration ---
load_dotenv() # Load environment variables from .env file

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

CHROMA_DB_DIR = "./chroma_db_community_dev" # Directory to store your vector DB
PDF_SOURCE_DIR = "./pdfs_to_process" # Directory where you put your lecture PDFs

print("API keys loaded successfully!")
print(f"ChromaDB will be stored in: {CHROMA_DB_DIR}")
print(f"PDFs will be loaded from: {PDF_SOURCE_DIR}")

# --- Initialize Core Components ---
# Using GPT-4o for best performance, feel free to change to gpt-3.5-turbo for lower cost
# llm = ChatOpenAI(model="gpt-4o", temperature=0.3, api_key=OPENAI_API_KEY)
llm = ChatGroq(model_name="llama3-70b-8192", api_key=GROQ_API_KEY)

embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY)
tavily_search = TavilySearchResults(max_results=5, tavily_api_key=TAVILY_API_KEY)

print("LLM, Embeddings, and Tavily Search initialized.")

API keys loaded successfully!
ChromaDB will be stored in: ./chroma_db_community_dev
PDFs will be loaded from: ./pdfs_to_process
LLM, Embeddings, and Tavily Search initialized.


In [27]:
def process_and_store_pdfs(source_directory):
    """
    Processes PDF files from a given directory, extracts text, chunks it,
    and stores embeddings in a ChromaDB vector store.
    Returns a ChromaDB retriever or None if no PDFs are processed.
    """
    if os.path.exists(CHROMA_DB_DIR):
        print(f"Clearing existing lecture notes database at {CHROMA_DB_DIR}...")
        shutil.rmtree(CHROMA_DB_DIR) # Clear existing DB for fresh upload
        print("Cleared.")

    all_docs = []
    pdf_files = [f for f in os.listdir(source_directory) if f.endswith('.pdf')]

    if not pdf_files:
        print(f"WARNING: No PDF files found in '{source_directory}'. Skipping PDF processing.")
        return None # <--- CHANGE 1: Return None if no PDFs found

    for pdf_file in pdf_files:
        file_path = os.path.join(source_directory, pdf_file)
        print(f"Loading {pdf_file}...")
        try:
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            all_docs.extend(docs)
        except Exception as e:
            print(f"ERROR: Could not load {pdf_file}: {e}")
            continue

    if not all_docs:
        print("WARNING: No text extracted from PDFs. Please ensure they are not image-only PDFs or are valid PDF documents. Skipping vector store creation.")
        return None # <--- CHANGE 2: Return None if no text extracted

    # Chunking
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = text_splitter.split_documents(all_docs)
    print(f"Split {len(all_docs)} documents into {len(chunks)} chunks.")

    # Create and persist vector store
    vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory=CHROMA_DB_DIR)
    print(f"SUCCESS: Successfully processed {len(pdf_files)} PDF(s) and stored in vector DB.")
    return vectorstore.as_retriever()

# --- Execute PDF Processing ---
# This will process your PDFs and create the vector database (or return None)
lecture_retriever = process_and_store_pdfs(PDF_SOURCE_DIR)

if lecture_retriever:
    print("\nLecture PDFs processed and knowledge base is ready.")
else:
    print("\nNo lecture knowledge base created. The agent will rely solely on online research.")


No lecture knowledge base created. The agent will rely solely on online research.


In [28]:
def conduct_research_and_report(user_prompt, lecture_retriever):
    """
    Main function to orchestrate the research, drafting, evaluation, and reporting process.
    """
    print("\n" + "="*80)
    print("STEP 1: Gathering Information...")
    print("="*80 + "\n")
    
    # 1. Researcher Agent (Online Research)
    print("INFO: Searching online for latest research papers...")
    research_query = f"Academic research papers on {user_prompt} in community development, published in last 10 years, include key ideas, findings and methodologies."
    online_research_results = tavily_search.invoke({"query": research_query})
    
    print("\n### Online Research Findings:")
    print(json.dumps(online_research_results, indent=2))
    
    # 2. Lecture Analyst Agent (Retrieve Lecture Context)
    print("\nINFO: Retrieving relevant information from your lecture slides...")
    
    # --- CHANGE 3: Handle lecture_retriever being None ---
    lecture_context_str = "" # Initialize as empty
    if lecture_retriever: # Check if a retriever was successfully created
        lecture_context_docs = lecture_retriever.get_relevant_documents(user_prompt)
        lecture_context_str = "\n".join([doc.page_content for doc in lecture_context_docs])
        if not lecture_context_str: 
            # This means retriever exists but found no relevant docs for the current query
            lecture_context_str = "No specific lecture content found relevant to the prompt within the processed lecture notes."
    else: 
        # This means no PDFs were processed at all, so no retriever exists
        lecture_context_str = "No lecture context available as no PDFs were processed."
    # --- END CHANGE 3 ---

    print("\n### Relevant Lecture Notes:")
    print(lecture_context_str[:1500] + "..." if len(lecture_context_str) > 1500 else lecture_context_str) # Show snippet

    # --- Stage 3: Initial Drafting Agent ---
    print("\n" + "="*80)
    print("STEP 2: Generating Initial Draft...")
    print("="*80 + "\n")
    
    drafting_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a top performing student and an expert academic writer in Community Development. 
         Your task is to write an initial draft of a report or literature review based on the user's prompt, 
         incorporating information from both online research findings and provided lecture notes.
         Ensure the draft is structured, academic in tone, and directly addresses the user's request.
         
         Online Research Findings (summarized):
         {online_research}
         
         Lecture Notes Context:
         {lecture_notes}
         """),
        ("user", "Based on my prompt: '{user_prompt}', write an initial academic draft. "
        "Focus on synthesizing online research with concepts from the lecture notes."),
    ])
    
    initial_draft_chain = drafting_prompt | llm
    initial_draft = initial_draft_chain.invoke({
        "online_research": online_research_results, 
        "lecture_notes": lecture_context_str,
        "user_prompt": user_prompt
    }).content
    
    print("\n### Initial Draft:")
    print(initial_draft)

    # --- Stage 4: Critic/Evaluator Agent ---
    print("\n" + "="*80)
    print("STEP 3: Evaluating the Draft...")
    print("="*80 + "\n")
    critic_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a critical academic peer reviewer specializing in Community Development. 
         Your task is to evaluate the provided 'initial draft' based on the 'original user prompt' and the 'lecture context'.
         Provide constructive feedback using the following criteria:
         -   **Accuracy & Completeness:** Is the information factual and does it fully address the original prompt? Are there any obvious gaps?
         -   **Relevance to Lecture Notes:** Does it effectively integrate and reference concepts, theories, or examples from the provided lecture notes? Are there missed opportunities for connection?
         -   **Integration of Online Research:** Does it properly synthesize and cite (conceptually) the online research findings? Is it up-to-date?
         -   **Academic Tone & Structure:** Is the language appropriate for a postgraduate academic paper? Is the structure logical and easy to follow?
         -   **Referencing: ** Check if the report has reference and also if the online papers has been quoted reference in the report in APA style format.
         -   **AI detection: ** Does the report look like its written by an human. What is the percetage of the writeup looks like its written by an AI and how can we humanize it. 
         
         Provide your feedback in bullet points, clearly indicating areas for improvement. Do NOT rewrite the draft yourself.
         """),
        ("user", "Original User Prompt: '{original_user_prompt}'\n\nLecture Notes Context: '{lecture_context}'\n\nInitial Draft to Evaluate:\n'{initial_draft}'")
    ])
    
    critic_chain = critic_prompt | llm
    critic_feedback = critic_chain.invoke({
        "original_user_prompt": user_prompt,
        "lecture_context": lecture_context_str,
        "initial_draft": initial_draft
    }).content
    
    print("\n### Critic's Feedback:")
    print(critic_feedback)

    # --- Stage 5: Final Report Agent ---
    print("\n" + "="*80)
    print("STEP 4: Generating Final Report...")
    print("="*80 + "\n")
    final_report_prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a highly skilled academic writer for a Masters in Community Development. 
         Your task is to revise and finalize the 'initial draft' based on the 'critic's feedback', the 'original user prompt', 
         the 'online research findings', and the 'lecture notes context'.
         Produce a polished, comprehensive, and academically sound report that integrates all sources seamlessly.
         
         Original User Prompt: '{original_user_prompt}'
         
         Online Research Findings (summarized):
         {online_research}
         
         Lecture Notes Context:
         {lecture_notes}
         
         Initial Draft:
         {initial_draft}
         
         Critic's Feedback:
         {critic_feedback}
         
         Produce the final, refined report.
         """),
        ("user", "Please finalize the report, incorporating the feedback and all provided context.")
    ])
    
    final_report_chain = final_report_prompt | llm
    final_report = final_report_chain.invoke({
        "original_user_prompt": user_prompt,
        "online_research": online_research_results,
        "lecture_notes": lecture_context_str,
        "initial_draft": initial_draft,
        "critic_feedback": critic_feedback
    }).content
    
    print("\n" + "="*80)
    print("🎉 FINAL REPORT:")
    print("="*80 + "\n")
    print(final_report)

In [29]:
# --- Define Your Research Prompt ---
user_prompt = "Write a comprehensive report on the role of participatory approaches in sustainable community development, " \
"incorporating findings from recent empirical studies (last 2 years)."
# You can also add: "referencing relevant theories from our lectures" if you have PDFs.

print(f"Your Research Prompt: '{user_prompt}'\n")

# --- Run the Agent ---
print("Starting the research and report generation process...")
try:
    # --- CHANGE 4: Removed the 'if lecture_retriever:' check ---
    conduct_research_and_report(user_prompt, lecture_retriever) 
except Exception as e:
    print(f"\nERROR: An error occurred during report generation: {e}")
    print("Please check your API keys, network connection, and the prompt for any issues.")

print("\n" + "="*80)
print("Process Complete. Remember to critically review the generated content.")
print("="*80 + "\n")

Your Research Prompt: 'Write a comprehensive report on the role of participatory approaches in sustainable community development, incorporating findings from recent empirical studies (last 2 years).'

Starting the research and report generation process...

STEP 1: Gathering Information...

INFO: Searching online for latest research papers...

### Online Research Findings:
[
  {
    "title": "Participatory Methods and Tools in Community Development",
    "url": "https://www.echocommunity.org/en/resources/53f99bb6-f532-4606-8229-0327c16dbd3c",
    "content": "A participatory learning approach, applied at each stage of a community development project, is crucial to reaching the level of stakeholder involvement needed for lasting, positive change.   The use of participatory survey and planning methods in the development process encourages community ownership of projects, allows for information to flow between both the community (Fig. 1) and the development organization, and enhances accoun