In [1]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
import json
from langchain_core.messages import HumanMessage, SystemMessage
import requests
from langchain_community.tools.tavily_search import TavilySearchResults
from V2_RAG_classes import  Chunks, EmbeddingManager, VectorStore, RAGRetriever

load_dotenv(".env")
GROQ_TOKEN = os.environ['GROQ_TOKEN']

  from .autonotebook import tqdm as notebook_tqdm


# ***UNDERSTANDING TASK***

In [2]:
llm = ChatGroq(
    model = 'llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)
system_prompt = """
    You are a helpful assistant. Your task is to analyze the user's question and produce a structured classification.

    Steps:
    1. Determine the task type(s).
    Possible categories:
    - RESEARCH
    - SURVEY
    - COMPARISON
    If multiple categories apply, include all of them.

    2. Extract key information from the question, including (when applicable):
    - topic
    - time_range
    - methods / approaches
    - constraints or assumptions

    Output format:
    - Return ONLY a valid JSON object.
    - Do NOT include explanations, markdown, or extra text.
    - Use lowercase values for task_type.

    Example:

    User question:
    "Compare Transformers and LSTMs for time-series forecasting"

    Output:
    {
    "user_prompt" : ["Compare Transformers and LSTMs for time-series forecasting"]
    "task": {
    "task_type": ["comparison"],
    "topic": "time-series forecasting",
    "methods": ["transformers", "lstms"]}
    }

    Some Rules:
    1. if you are saying it is classification task type you should provide the atleast 2 diffrent methods in comparision from the user input
    you cant just provide one method in comparison and say it is a comparision task.
    """


user_input = input("\nEnter the Prompt:\n")
messages = [
    SystemMessage(content = system_prompt),

    HumanMessage(content=f'{user_input}')
]

response = llm.invoke(messages)

task_n_prompt = response.content
print(task_n_prompt)

{
"user_prompt": ["Impact of AI on Education Sector (impact on students, impact on jobs, impact on teachers, impact on learning methods) include both upside and downside of AI in the report"],
"task": {
"task_type": ["research", "comparison"],
"topic": "ai in education sector",
"methods": ["traditional learning methods", "ai-based learning methods"],
"constraints or assumptions": ["impact on students", "impact on jobs", "impact on teachers", "impact on learning methods"],
"time_range": null,
"approaches": ["analyzing benefits", "analyzing drawbacks"]
}
}


# ***PLANNING***

In [3]:
agent_state = {
    "task": None,  
    "plan": None,    
    "current_step": 0,
    "notes": [],
    "sources": [],
    "status": "idle"
}


agent_state["task"] = json.loads(response.content)['task']

In [4]:
#system prompt for planner
planner_system_prompt = """
You are an expert research planner.

Given a structured research task, generate a step-by-step execution plan.

Rules:
- Return ONLY a valid JSON object.
- No explanations
- Steps must be from this allowed list:
  - search_sources
  - read_documents
  - extract_key_points
  - compare_methods
  - analyze_trends
  - write_report

Return format:
{
  "steps": ["step1", "step2", ...]
}
"""


planner_messages = [
    SystemMessage(content=planner_system_prompt),
    HumanMessage(content=task_n_prompt)
]
planner_response = llm.invoke(planner_messages)
print(planner_response.content)
agent_state['plan'] = json.loads(planner_response.content)['steps']
agent_state["status"] = "planned"

{
  "steps": [
    "search_sources",
    "read_documents",
    "extract_key_points",
    "compare_methods",
    "analyze_trends",
    "write_report"
  ]
}


# ***WEB SEARCHING***

In [5]:
# Generate Search queries for web searching
query_system_prompt = """
You are a search query generator.

Your task is to generate effective ACADEMIC search queries
based on the user's original question and a structured Python dictionary.

Rules:
- Generate 3 to 5 distinct academic search queries
- Queries should be suitable for Google Scholar / arXiv
- Return ONLY a valid JSON object
- No explanations, no markdown

Sample INPUT:
{
  "user_prompt": "Compare Transformers and LSTMs for time-series forecasting",
  "task": {
    "task_type": ["comparison"],
    "topic": "time-series forecasting",
    "methods": ["transformers", "lstms"]
  }
}

Sample OUTPUT:
{
  "search_queries": [
    "transformer vs lstm time series forecasting",
    "benchmark transformer lstm time series forecasting",
    "deep learning time series forecasting comparison paper"
  ]
}
"""

# LLM fwhich will generate query
query_llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=GROQ_TOKEN,
    temperature=0.2,       
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

query_messages = [
    SystemMessage(content=query_system_prompt),
    HumanMessage(content=task_n_prompt)
]

query_response = query_llm.invoke(query_messages)

# Parse & Validate Output
try:
    parsed_output = json.loads(query_response.content)
    search_queries = parsed_output["search_queries"]

    if not (3 <= len(search_queries) <= 5):
        raise ValueError("Expected 3–5 search queries")

except Exception as e:
    raise ValueError(f"Invalid query generator output: {e}")


print("\nGenerated Search Queries:")
search_queries


Generated Search Queries:


['impact of artificial intelligence on education sector students teachers and learning methods',
 'ai in education benefits and drawbacks on students jobs and teachers',
 'comparative study of traditional and ai-based learning methods in education sector',
 'effects of ai on education sector jobs and learning outcomes for students',
 'benefits and limitations of ai-based learning methods for students and teachers in education sector']

In [6]:
#TAVILY
TAVILY_TOKEN = os.environ['TAVILY_API_KEY']

def search_infos(queries,max_results=3):
    print('Searching Internet for relevant information....\n')
    search = TavilySearchResults(max_results=max_results)
    all_results = []

    for query in queries:
        pdf_query = f"{query} research paper OR arxiv"
        web_query = f"{query} blog OR article OR analysis"

        pdf_results = search.invoke(pdf_query)
        web_results = search.invoke(web_query)

        results = pdf_results + web_results
        all_results.extend(results)

    seen_urls = set()
    unique_results = []

    for item in all_results:
        if item['url'] not in seen_urls:
            unique_results.append(item)
            seen_urls.add(item['url'])

    display(seen_urls)
    print("Information Aquired from Internet//\n")
    return unique_results


urls = search_infos(search_queries)

Searching Internet for relevant information....



  search = TavilySearchResults(max_results=max_results)


{'https://arxiv.org/abs/2412.02166',
 'https://arxiv.org/html/2309.02029v2',
 'https://arxiv.org/html/2412.02166v1',
 'https://arxiv.org/html/2505.02198v1',
 'https://arxiv.org/html/2510.16019v1',
 'https://astra-ai.co/blog/how-does-ai-impact-education',
 'https://education.illinois.edu/about/news-events/news/article/2024/10/24/ai-in-schools--pros-and-cons',
 'https://hrmars.com/papers_submitted/24690/enhancing-early-education-with-artificial-intelligence-a-comparative-study-of-ai-powered-learning-versus-traditional-methods.pdf',
 'https://learningsciences.smu.edu/blog/artificial-intelligence-in-education',
 'https://link.springer.com/article/10.1007/s11165-024-10176-3',
 'https://www.edweek.org/technology/rising-use-of-ai-in-schools-comes-with-big-downsides-for-students/2025/10',
 'https://www.frontiersin.org/journals/artificial-intelligence/articles/10.3389/frai.2024.1457299/full',
 'https://www.iiisci.org/journal/PDV/sci/pdfs/SA121NK24.pdf',
 'https://www.ijfmr.com/papers/2024/4/246

Information Aquired from Internet//



# ***READ AND RETRIEVE INFORMATIONS***

In [7]:
import requests
from bs4 import BeautifulSoup
from io import BytesIO
from pypdf import PdfReader
from readability import Document

def extract_text_from_url(url):
    headers = {"User-Agent":"Mozilla/5.0"}
    response = requests.get(url, headers=headers, timeout=10)

    if response.status_code != 200:
        raise Exception(f"Failed to Fetch {url}\n")
    
    content_type = response.headers.get("content-Type","")

    if "application/pdf" in content_type or url.endswith('.pdf'):
        return extract_pdf(response.content)
    elif "text/html" in content_type:
        return extract_html(response.text)
    else:
        return response.text
    

def extract_html(html_content):
    doc = Document(html_content)
    cleaned_html = doc.summary()

    soup = BeautifulSoup(cleaned_html, "html.parser")

    for tag in soup(["script", "style", "noscript"]):
        tag.decompose()

    return soup.get_text(separator=" ")



def extract_pdf(binary_content):
    try:
        reader = PdfReader(BytesIO(binary_content))
        text_parts = []

        for i, page in enumerate(reader.pages):
            try:
                page_text = page.extract_text()
                if page_text:
                    clean_text = str(page_text).replace('\x00','')
                    if clean_text:
                        text_parts.append(clean_text)

            except Exception as page_error:
                # Log that a specific page failed, but continue the loop
                print(f"  > Warning: Could not extract page {i+1}. Skipping stream. Error: {page_error}")
                continue
                
        return " ".join(text_parts)
    
    except Exception as e:
        print(f"Critical error reading PDF structure: {e}")
        return ""

In [8]:
from langchain_core.documents import Document as LCDocument
from pathlib import Path
base_dir = Path.cwd().parent
vs_folder = base_dir / "ARA_vector_store"
vs_folder.mkdir(parents=True, exist_ok=True)
persist_dir = vs_folder / "ARA_research_report.pdf"

def read_documents(urls):
    embed_manager = EmbeddingManager()
    vectorstore=VectorStore(persist_directory=persist_dir)
    for url in urls:
        print(f'processing {url}')
        try:
            text = extract_text_from_url(url['url'])

            print('Chunking the loaded data...............')
            chunker = Chunks(text)
            chunks_text = chunker.split_documents()
            chunk_docs = [LCDocument(page_content=t, metadata={"source": url['url']}) for t in chunks_text]            
            print('chunking completed\n')

            print('Embedding chunks................')
            embeddings = embed_manager.generate_embeddings(texts=chunks_text)
            print('Embedding completed\n')

            print("Storing Embedding vector.............")
            vectorstore.add_documents(documents=chunk_docs, embeddings=embeddings)
            print("All embeddings Stored\n")

        except Exception as e:
            print(f"Skipping the {url['url']}\nERROR: {e}")

    return embed_manager, vectorstore

embed_manager, vectorstore = read_documents(urls)

Loading embedding model: all-MiniLM-L6-v2
Model loaded successfully. Embedding dimension: 384
Vector store initialized. Collection: pdf_documents
Existing documents in collection: 0
processing {'title': 'Analyzing the Impact of AI Tools on Student Study Habits and ... - arXiv', 'url': 'https://arxiv.org/html/2412.02166v1', 'content': 'R. Baker, “AI in education: Promises and implications for teaching and learning,” Technology-Based Assessment for 21st Century Skills: Theoretical and Practical Implications from Modern Research, pp. 253–268, 2019.\n   N. Selwyn, Should robots replace teachers?: AI and the future of education.   John Wiley & Sons, 2019. [...] AI has the potential to transform personalized learning and student engagement by tailoring educational experiences to individual needs and learning styles. Through data-driven insights, AI can provide real-time feedback on student performance, emotions, and engagement levels, enabling educators to customize teaching methods and inte

Batches: 100%|██████████| 2/2 [00:01<00:00,  1.36it/s]


Generated embeddings with shape: (49, 384)
Embedding completed

Storing Embedding vector.............
Adding 49 documents to vector store...
Successfully added 49 documents to vector store
Total documents in collection: 49
All embeddings Stored

processing {'title': '[PDF] The Impact of Artificial Intelligence on Education', 'url': 'https://www.iiisci.org/journal/PDV/sci/pdfs/SA121NK24.pdf', 'content': 'in Contextual word embeddings,” arXiv.org, Nov. 25, 2024.   L. Malmqvist, “Sycophancy in Large Language Models: Causes and mitigations,” arXiv.org, Nov. 22, 2024.   E. Shein, “The impact of AI on computer science education,” Communications of the ACM, vol. 67, no. 9, pp. 13–15, Jun. 2024, doi: 10.1145/3673428.  B. A. Liang, “AI to hit 40% of jobs and worsen inequality, IMF says,” Jan. 15, 2024.   R. General, “Nvidia CEO: AI won’t take your job, but someone using AI might,” NextShark, Oct. 29, 2024.   “The purpose of education,” The Martin Luther King, Jr. Research and Education Institut

Batches: 100%|██████████| 2/2 [00:01<00:00,  1.20it/s]


Generated embeddings with shape: (49, 384)
Embedding completed

Storing Embedding vector.............
Adding 49 documents to vector store...
Successfully added 49 documents to vector store
Total documents in collection: 98
All embeddings Stored

processing {'title': 'Analyzing the Impact of AI Tools on Student Study Habits and ... - arXiv', 'url': 'https://arxiv.org/abs/2412.02166', 'content': 'refinement of AI features to maximize their educational benefits.', 'score': 0.99987066}
Chunking the loaded data...............
Split 43 documents into 1 chunks
chunking completed

Embedding chunks................
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 51.13it/s]

Generated embeddings with shape: (1, 384)
Embedding completed

Storing Embedding vector.............
Adding 1 documents to vector store...
Successfully added 1 documents to vector store
Total documents in collection: 99
All embeddings Stored

processing {'title': 'Exploring the Impact of Artificial Intelligence in Teaching and ...', 'url': 'https://link.springer.com/article/10.1007/s11165-024-10176-3', 'content': 'Our analysis found that students exhibit increased engagement and interest in science courses when AI tools are integrated into learning environments. This heightened interest is attributed to AI’s ability to provide predictions and personalized feedback (Jiao et al., 2022b. Artificial intelligence-enabled prediction model of student academic performance in online engineering education. Artificial Intelligence Review, 55(8), 6321–6344.")), making learning more engaging and enjoyable (Hewapathirana & Almasri, 2022. Active learning compared with lecture-based pedagogies in gend




Chunking the loaded data...............
Split 21529 documents into 28 chunks
chunking completed

Embedding chunks................
Generating embeddings for 28 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.08it/s]


Generated embeddings with shape: (28, 384)
Embedding completed

Storing Embedding vector.............
Adding 28 documents to vector store...
Successfully added 28 documents to vector store
Total documents in collection: 127
All embeddings Stored

processing {'title': 'Advantages and disadvantages of AI in education', 'url': 'https://www.ucanwest.ca/blog/education-careers-tips/advantages-and-disadvantages-of-ai-in-education', 'content': '## Impact of AI in Education\n\nThe impact of AI in education has been both transformative and far-reaching. AI technologies enhance learning experiences by enabling personalized education, automating administrative tasks and offering intelligent tutoring systems that adapt to each student’s pace and ability. This allows educators to focus more on student development while AI handles repetitive duties. Additionally, AI helps institutions analyze student data to improve academic outcomes and curriculum design, fostering more efficient and effective learn

Batches: 100%|██████████| 1/1 [00:00<00:00,  1.55it/s]


Generated embeddings with shape: (27, 384)
Embedding completed

Storing Embedding vector.............
Adding 27 documents to vector store...
Successfully added 27 documents to vector store
Total documents in collection: 154
All embeddings Stored

processing {'title': 'How artificial intelligence in education is transforming classrooms', 'url': 'https://learningsciences.smu.edu/blog/artificial-intelligence-in-education', 'content': "January 09, 2025 11:04 AM\n\nArtificial Intelligence (AI) is swiftly changing the educational landscape, bringing about significant benefits and noteworthy challenges. In classrooms across the globe, AI's influence can be seen through advanced technologies like machine learning, which open up new possibilities for personalized learning and enhanced student engagement.\n\nThis blog aims to unpack the concept of AI in education, breaking down its key components, current applications, and the very real benefits it brings to learning environments. We will also a

Batches: 100%|██████████| 1/1 [00:00<00:00,  1.39it/s]


Generated embeddings with shape: (21, 384)
Embedding completed

Storing Embedding vector.............
Adding 21 documents to vector store...
Successfully added 21 documents to vector store
Total documents in collection: 175
All embeddings Stored

processing {'title': '1.Introduction - arXiv', 'url': 'https://arxiv.org/html/2309.02029v2', 'content': 'The use of artificial intelligence chatbots, such as ChatGPT, in higher education has been studied by Rudolph et al. . The researchers examined how teacher-focused AI tools might automate tasks such as assessments, plagiarism detection, and feedback, as well as the benefits of student-focused AI tools, such as enhancing intelligent support systems for students. Suggestions were provided to educational institutions and students for avoiding potential drawbacks associated with the use of ChatGPT and other AI tools. In different research, Susnjak et al.  addressed the possible dangers ChatGPT poses to online tests, highlighting the significanc

Batches: 100%|██████████| 2/2 [00:01<00:00,  1.22it/s]


Generated embeddings with shape: (50, 384)
Embedding completed

Storing Embedding vector.............
Adding 50 documents to vector store...
Successfully added 50 documents to vector store
Total documents in collection: 225
All embeddings Stored

processing {'title': 'Student Perspectives on the Benefits and Risks of AI in Education', 'url': 'https://arxiv.org/html/2505.02198v1', 'content': 'The findings demonstrate alignment between student perspectives and prior research demonstrating potential the benefits of AI in education. Students valued the feedback and study support, instruction capabilities, increased access to information, increased productivity, increased creativity offered by AI chatbots.They specifically appreciated immediate assistance when instructors were unavailable, help with organizing tasks and brainstorming ideas, and clear explanations of complex topics. However, students also expressed concerns that both paralleled and expanded upon previous research. Academic i

Batches: 100%|██████████| 1/1 [00:00<00:00,  9.42it/s]

Generated embeddings with shape: (3, 384)
Embedding completed

Storing Embedding vector.............
Adding 3 documents to vector store...
Successfully added 3 documents to vector store
Total documents in collection: 228
All embeddings Stored

processing {'title': 'The Impact of AI on Modern Education - Pros and Cons', 'url': 'https://astra-ai.co/blog/how-does-ai-impact-education', 'content': 'The system-level impact is mixed. Equity can improve when high quality support reaches more students, including those without private tutors. Equity can suffer if only some schools can afford reliable tools or if bias in models persists. Long term, curricula will include more data literacy, critical thinking and human skills that are hard to automate. Assessment will rely more on performance tasks, oral defenses and project work that captures the process, not just the final answer.\n\n## Benefits of AI in education\n\nAI in education brings practical advantages when used with care. [...] The cons




Chunking the loaded data...............
Split 23026 documents into 29 chunks
chunking completed

Embedding chunks................
Generating embeddings for 29 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.38it/s]


Generated embeddings with shape: (29, 384)
Embedding completed

Storing Embedding vector.............
Adding 29 documents to vector store...
Successfully added 29 documents to vector store
Total documents in collection: 257
All embeddings Stored

processing {'title': 'AI in Schools: Pros and Cons', 'url': 'https://education.illinois.edu/about/news-events/news/article/2024/10/24/ai-in-schools--pros-and-cons', 'content': 'It can streamline administrative tasks, free more time for teachers to build relationships and the social and emotional skills of students, tailor students’ learning experiences according to their individual needs and learning styles, and improve accessibility for students with disabilities. It can also help researchers collect and analyze data to enhance curriculum effectiveness and spot areas that need improvement, and it offers a wide range of educational resources and platforms for both students and educators. [...] College of Education [...] It can provide students

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.27it/s]


Generated embeddings with shape: (16, 384)
Embedding completed

Storing Embedding vector.............
Adding 16 documents to vector store...
Successfully added 16 documents to vector store
Total documents in collection: 273
All embeddings Stored

processing {'title': '[PDF] A Comparative Analysis of AI-Driven Education and Traditional ...', 'url': 'https://www.ijfmr.com/papers/2024/4/24635.pdf', 'content': 'Findings \n\n1 Lombardi \n\nDario, 2020 \n\nReview \n\npaper \n\nThis paper suggests that AI increases the possibilities of \n\ninteraction between humans virtually and at the same time \n\nallows us to find links between the phenomena that \n\ncharacterize modernity. Maintaining a solid relationship \n\nbetween AI and traditional methods creates communicative \n\nbridges between people, and projects the world of education \n\ntowards new scenarios. \n\n2 Jiahui Huang, \n\n2021 \n\nReview \n\npaper \n\nThis study states that AI can continuously optimize and \n\nimprove the learning 

Batches: 100%|██████████| 2/2 [00:01<00:00,  1.84it/s]


Generated embeddings with shape: (34, 384)
Embedding completed

Storing Embedding vector.............
Adding 34 documents to vector store...
Successfully added 34 documents to vector store
Total documents in collection: 307
All embeddings Stored

processing {'title': '[PDF] A Comparative Study of AI-Powered Learning Versus Traditional ...', 'url': 'https://hrmars.com/papers_submitted/24690/enhancing-early-education-with-artificial-intelligence-a-comparative-study-of-ai-powered-learning-versus-traditional-methods.pdf', 'content': 'technologies towards early learning by putting side by side traditional learning approaches against AI tools for learning. This research encompasses the various facets of Artificial Intelligence in education, including relevant literature on the use of AI for enhancing engagement, relevance, and responding mechanisms in the classroom. Also, the study investigates the barriers of the implementation of AI in education, including concerns regarding ethical issues

Batches: 100%|██████████| 3/3 [00:01<00:00,  1.52it/s]


Generated embeddings with shape: (66, 384)
Embedding completed

Storing Embedding vector.............
Adding 66 documents to vector store...
Successfully added 66 documents to vector store
Total documents in collection: 373
All embeddings Stored

processing {'title': 'Comparison between the traditional study method and AI use in the ...', 'url': 'https://www.sciencedirect.com/science/article/pii/S1557308725002513', 'content': '### Results\n\nBoth groups improved their overall knowledge levels (p\u202f<\u202f0.001), with the control group exhibiting a more pronounced improvement (p\u202f=\u202f0.002). The control group demonstrated superior performance in the knowledge-based questions (p\u202f=\u202f0.001). However, no significant differences were observed in comprehension, analysis, or application.\n\n### Conclusions\n\nConventional study methods provide superior knowledge acquisition in comparison to AI methods. The use of AI did not improve critical thinking skills, such as understan

Batches: 100%|██████████| 1/1 [00:00<00:00,  1.24it/s]


Generated embeddings with shape: (29, 384)
Embedding completed

Storing Embedding vector.............
Adding 29 documents to vector store...
Successfully added 29 documents to vector store
Total documents in collection: 402
All embeddings Stored

processing {'title': 'Rising Use of AI in Schools: Downsides for Students', 'url': 'https://www.edweek.org/technology/rising-use-of-ai-in-schools-comes-with-big-downsides-for-students/2025/10', 'content': '“Our research shows AI use in schools comes with real risks, like large-scale data breaches, tech-fueled sexual harassment and bullying, and treating students unfairly,” Laird said. “Acknowledging those risks enables education leaders, policymakers, and communities to mount prevention and response efforts so that the positive uses of AI are not overshadowed by harm to students.”\n\nTwo ways to address the potentially negative effects are for schools to develop AI training and craft policies that put meaningful guardrails around its use, expe

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.61it/s]


Generated embeddings with shape: (12, 384)
Embedding completed

Storing Embedding vector.............
Adding 12 documents to vector store...
Successfully added 12 documents to vector store
Total documents in collection: 414
All embeddings Stored

processing {'title': 'The impact of AI on education and careers: What do students think?', 'url': 'https://www.frontiersin.org/journals/artificial-intelligence/articles/10.3389/frai.2024.1457299/full', 'content': "Research by Jeffrey (2020) supports the later findings, suggesting that the general view of AI among students is positive, but that they have concerns about its rapid development and impact on humankind. Finally, research by Chan and Hu (2023) found that students see AI as generally positive, with the potential to help personalize their learning experiences, and help with writing, idea generation and research. However, there continues to be a thread of concerns, in this case relating to accuracy, privacy, ethics and impact on persona

Batches: 100%|██████████| 4/4 [00:02<00:00,  1.34it/s]


Generated embeddings with shape: (100, 384)
Embedding completed

Storing Embedding vector.............
Adding 100 documents to vector store...
Successfully added 100 documents to vector store
Total documents in collection: 514
All embeddings Stored

processing {'title': 'The effects of artificial intelligence applications in educational settings', 'url': 'https://www.sciencedirect.com/science/article/pii/S0040162523007618', 'content': "With the continuous intervention of AI tools in the education sector, new research is required to evaluate the viability and feasibility of extant AI platforms to inform various pedagogical methods of instruction. The current manuscript explores the cumulative published literature to date in order to evaluate the key challenges that influence the implications of adopting AI models in the Education Sector. The researchers' present works both in favour and against AI-based applications within the Academic milieu. A total of 69 articles from a 618-article p

Batches: 100%|██████████| 1/1 [00:00<00:00,  3.10it/s]

Generated embeddings with shape: (9, 384)
Embedding completed

Storing Embedding vector.............
Adding 9 documents to vector store...
Successfully added 9 documents to vector store
Total documents in collection: 523
All embeddings Stored






In [9]:
#a llm for retrieval generation
RETRIEVAL_QUERY_PROMPT = """
You are a retrieval query generator for a research assistant.

Your task is to generate 5 to 7 focused retrieval queries
that help retrieve information from research papers.

Rules:
- DO NOT answer the question
- DO NOT explain anything
- DO NOT include markdown
- DO NOT include bullet points
- Output ONLY a valid JSON object
- The response must start with '{' and end with '}'

Output format:
{
  "queries": [
    "query 1",
    "query 2",
    "query 3",
    "query 4",
    "query 5"
  ]
}
"""

import json
from langchain_groq import ChatGroq
from langchain_core.messages import SystemMessage, HumanMessage


def generate_retrieval_queries(user_query: str, api_key: str) -> list[str]:
    query_expansion_llm = ChatGroq(
        model="llama-3.3-70b-versatile",
        api_key=api_key,
        temperature=0.2,   # low = stable retrieval
    )

    messages = [
        SystemMessage(content=RETRIEVAL_QUERY_PROMPT),
        HumanMessage(content=user_query)
    ]

    response = query_expansion_llm.invoke(messages)
    print("RAW LLM OUTPUT:")
    print(response.content)
    print("------")
    try:
        parsed = json.loads(response.content)
        queries = parsed["queries"]

        if not (5 <= len(queries) <= 7):
            raise ValueError("Expected 5–7 retrieval queries")

        return queries

    except Exception as e:
        raise RuntimeError(f"Invalid retrieval-query output: {e}")


def multi_query_retrieve(
    retriever,
    queries: list[str],
    top_k: int = 10
) -> list[dict]:
    """
    Runs RAG retrieval for each query and merges results.
    """
    all_chunks = []

    for q in queries:
        chunks = retriever.retrieve(q, top_k=top_k)
        all_chunks.extend(chunks)

    return all_chunks


def deduplicate_chunks(chunks: list[dict]) -> list[dict]:
    seen = set()
    deduped = []

    for chunk in chunks:
        key = (
            chunk["content"][:200],  # content fingerprint
            chunk["metadata"].get("source")
        )

        if key not in seen:
            seen.add(key)
            deduped.append(chunk)

    return deduped

user_prompt = json.loads(task_n_prompt)['user_prompt'][0]
ragretriver = RAGRetriever(vector_store=vectorstore, embedding_manager=embed_manager)

# Step 1: expand query
retrieval_queries = generate_retrieval_queries(
    user_query=user_prompt,
    api_key=GROQ_TOKEN
)

# Step 2: retrieve per query
raw_chunks = multi_query_retrieve(
    retriever=ragretriver,
    queries=retrieval_queries,
    top_k=10
)

# Step 3: deduplicate
clean_chunks = deduplicate_chunks(raw_chunks)

# Step 4: promote to papers (you already implemented this)
papers = ragretriver.group_chunks_by_paper(clean_chunks)

RAW LLM OUTPUT:
{
  "queries": [
    "artificial intelligence in education benefits and drawbacks",
    "impact of AI on student learning outcomes and academic performance",
    "AI and job displacement in the education sector",
    "role of AI in transforming teaching methods and teacher professional development",
    "effects of AI on educational equity and access to quality learning",
    "AI-powered adaptive learning systems and personalized education",
    "challenges and limitations of implementing AI in educational institutions"
  ]
}
------
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 55.19it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 57.03it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 84.94it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 64.43it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 73.90it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 71.38it/s]


Generated embeddings with shape: (1, 384)
Generating embeddings for 1 texts...


Batches: 100%|██████████| 1/1 [00:00<00:00, 79.85it/s]

Generated embeddings with shape: (1, 384)





In [10]:
verify_llm = ChatGroq(
    model = 'meta-llama/llama-4-scout-17b-16e-instruct',
    # model = 'llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

verify_system_prompt = """
You are an academic paper relevance evaluator.

Your task is to evaluate a SINGLE paper context against a user query.

You must decide:
1. Whether this paper is relevant enough to help answer the user query.
2. If relevant, classify the paper’s role.

Definitions:
- A paper is RELEVANT if it:
  • directly compares methods mentioned in the query, OR
  • provides a survey/review of the topic, OR
  • provides guidelines, benchmarks, or empirical insights related to the query.

- A paper is NOT RELEVANT if it:
  • only briefly mentions the topic without analysis, OR
  • focuses on a single narrow method without comparison or broader insight, OR
  • is unrelated to answering the user query.

If the paper is NOT relevant:
→ Output exactly:
NO

If the paper IS relevant:
→ Classify it into ONE of the following categories:
survey
comparison
method-specific
tutorial

Output rules:
- Output ONLY one token.
- Do NOT include explanations.
- Do NOT include punctuation or formatting.

"""
verified_papers = []

for paper in papers:
    is_verified = False
    verify_human_prompt = f"""
      here is the User's query:
        {task_n_prompt}

      here is the Context:
        {paper['best_chunk']}
    """
    verify_messages = [
    SystemMessage(content=verify_system_prompt),
    HumanMessage(content=verify_human_prompt)
    ]
    response = verify_llm.invoke(verify_messages)
    if response.content in ['survey','comparison','method-specific','tutorial']:
        verified_papers.append(paper)
        is_verified = True
    

    if is_verified:
      print(f"{paper['source']} got verified")

https://astra-ai.co/blog/how-does-ai-impact-education got verified
https://www.waldenu.edu/programs/education/resource/five-pros-and-cons-of-ai-in-the-education-sector got verified
https://www.ucanwest.ca/blog/education-careers-tips/advantages-and-disadvantages-of-ai-in-education got verified
https://education.illinois.edu/about/news-events/news/article/2024/10/24/ai-in-schools--pros-and-cons got verified
https://arxiv.org/html/2412.02166v1 got verified
https://learningsciences.smu.edu/blog/artificial-intelligence-in-education got verified
https://arxiv.org/html/2510.16019v1 got verified
https://www.ijfmr.com/papers/2024/4/24635.pdf got verified
https://hrmars.com/papers_submitted/24690/enhancing-early-education-with-artificial-intelligence-a-comparative-study-of-ai-powered-learning-versus-traditional-methods.pdf got verified
https://www.iiisci.org/journal/PDV/sci/pdfs/SA121NK24.pdf got verified
https://www.frontiersin.org/journals/artificial-intelligence/articles/10.3389/frai.2024.145

In [11]:
#this is what that will be feeded into the the write llm.
from typing import List, Dict, Any

verified_papers
req_info = []


for papera in verified_papers:
    url = papera['source']
    for chunka in papera['chunks']:
        chunkas = chunka['content']
        req_info.append({
            "content":chunkas,
            "url": url
        })

In [12]:
req_info

[{'content': 'the process, not just the final answer. Benefits of AI in education AI in education brings practical advantages when used with care. Faster feedback and more practice opportunities for students Personalized learning paths that adjust to strengths and gaps Clearer step-by-step explanations for complex problems Differentiated materials for diverse classrooms and multilingual learners Automation of routine tasks, from quiz generation to parent communication Insights from learning data that help teachers target instruction Extended access to help outside school hours, including weekends and holidays Greater accessibility through speech to text, captions and reading supports These benefits appear when AI is embedded in teaching, monitored by adults and used to support the curriculum, not replace it. Disadvantages and risks of AI in education The disadvantages of AI in education are real and require active management. Accuracy concerns, including hallucinated content and wrong 

In [27]:
writer_llm = ChatGroq(
    model='openai/gpt-oss-20b',
    # model='llama-3.3-70b-versatile',
    api_key=GROQ_TOKEN,
    temperature=0.,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

writer_system_prompt = """
    Based on provided verified information in this format:
    [{
        "content":content,
        "url": url
    }]

    against a given user's QUERY

    Using ONLY the provided verified information, generate a structured
    and Detailed research-style report answering the user query.

    Include the urls for refrences

    If the provided information is insufficient to confidently answer
    any part of the query, explicitly state the missing information
    instead of guessing.

    Output format:
        {"Title":small but concise title for the report,
        "Report":"Report content in proper formatting"}
    Output rules:
    - Report should start with a proper title with proper formatting
    - Report should follow a proper format of report writing
    - Output ONLY the final report in proper docs formatting so that later it can be saved in form PDF.
    - Do NOT include extra tokens other than the report.
    - Do not repeat the url for each info chunk instead put the url there only once in the refrence section.
    
"""
x = json.loads(task_n_prompt)['user_prompt'][0]
trimmed_info = str(req_info)[:25000]
def write_report():
    writer_human_prompt = f"""
    Here is the 
        provided verified information: {trimmed_info},
        User's Query: {x}
    """

    writer_messages = [
        SystemMessage(writer_system_prompt),
        HumanMessage(writer_human_prompt)
    ]

    response = writer_llm.invoke(writer_messages)
    return response.content


final_report = write_report()

In [28]:
display(final_report)

'{"Title":"AI in Education: Opportunities and Challenges","Report":"# AI in Education: Opportunities and Challenges\\n\\n## 1. Introduction\\nArtificial Intelligence (AI) is increasingly integrated into educational settings, offering tools that can personalize learning, automate routine tasks, and provide real‑time feedback. While the potential benefits are significant, there are also notable risks and challenges that must be managed to ensure equitable and effective outcomes.\\n\\n## 2. Impact on Students\\n| Positive Impact | Evidence | Negative Impact | Evidence |\\n|---|---|---|---|\\n| **Personalized learning** – AI adapts content to individual strengths and gaps, providing practice at the right level and in plain language. | [1], [2], [4] | **Overreliance on AI** – Students may depend too heavily on automated answers, reducing critical‑thinking and problem‑solving skills. | [15], [17] |\\n| **Faster feedback & extended access** – Immediate hints and corrections encourage repeated

In [29]:
fr = json.loads(final_report)
print(fr['Report'])

# AI in Education: Opportunities and Challenges

## 1. Introduction
Artificial Intelligence (AI) is increasingly integrated into educational settings, offering tools that can personalize learning, automate routine tasks, and provide real‑time feedback. While the potential benefits are significant, there are also notable risks and challenges that must be managed to ensure equitable and effective outcomes.

## 2. Impact on Students
| Positive Impact | Evidence | Negative Impact | Evidence |
|---|---|---|---|
| **Personalized learning** – AI adapts content to individual strengths and gaps, providing practice at the right level and in plain language. | [1], [2], [4] | **Overreliance on AI** – Students may depend too heavily on automated answers, reducing critical‑thinking and problem‑solving skills. | [15], [17] |
| **Faster feedback & extended access** – Immediate hints and corrections encourage repeated attempts and reduce frustration; support is available outside school hours. | [1], [2

In [30]:
from markdown_pdf import MarkdownPdf, Section

pdf = MarkdownPdf(toc_level=2)
pdf.add_section(Section(fr['Report']))

from pathlib import Path

base_dir = Path.cwd().parent
report_folder = base_dir / "reports"

report_folder.mkdir(parents=True, exist_ok=True)

file_path = report_folder / "ARA_research_report_1.pdf"

pdf.save(str(file_path))
print(f"Report saved successfully at: {file_path}")

Report saved successfully at: c:\Users\krdhi\OneDrive\Desktop\RAG_tutorials\A_R_A\reports\ARA_research_report_1.pdf


In [16]:
while agent_state["current_step"] < len(agent_state["plan"]):
    step = agent_state["plan"][agent_state["current_step"]]
    print(f"Executing step: {step}")

    if step == "search_sources":
        agent_state["notes"].append("searched sources")

    elif step == "read_documents":
        agent_state["notes"].append("read documents")

    elif step == "extract_key_points":
        agent_state["notes"].append("extracted key points")

    elif step == "compare_methods":
        agent_state["notes"].append("compared methods")

    elif step == "analyze_trends":
        agent_state["notes"].append("analyzed trends")

    elif step == "write_report":
        agent_state["notes"].append("final report written")
        break

    agent_state["current_step"] += 1

Executing step: search_sources
Executing step: read_documents
Executing step: extract_key_points
Executing step: compare_methods
Executing step: analyze_trends
Executing step: write_report


In [17]:
agent_state

{'task': {'task_type': ['research', 'comparison'],
  'topic': 'ai in education sector',
  'methods': ['traditional learning methods', 'ai-based learning methods'],
  'constraints or assumptions': ['impact on students',
   'impact on jobs',
   'impact on teachers',
   'impact on learning methods'],
  'time_range': None,
  'approaches': ['analyzing benefits', 'analyzing drawbacks']},
 'plan': ['search_sources',
  'read_documents',
  'extract_key_points',
  'compare_methods',
  'analyze_trends',
  'write_report'],
 'current_step': 5,
 'notes': ['searched sources',
  'read documents',
  'extracted key points',
  'compared methods',
  'analyzed trends',
  'final report written'],
 'sources': [],
 'status': 'planned'}