In [1]:
%pwd

'd:\\INTERNSHIP\\AI_Document_Summarizer_QA_System\\research'

In [22]:
import os
from dotenv import load_dotenv
from typing import TypedDict
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field

from AI_Document_Summarizer_QA_System.components.stage_01_document_loader import DocumentLoader
from AI_Document_Summarizer_QA_System.components.stage_02_text_splitter import TextSplitter
from AI_Document_Summarizer_QA_System.components.stage_03_store_index import VectorIndex
from AI_Document_Summarizer_QA_System.components.stage_04_RAG import RAGPipeline
from AI_Document_Summarizer_QA_System.components.stage_05_Output import SummaryGenerator, OutputFormatter

load_dotenv()

True

In [3]:
class Question_Answer_State(BaseModel):
    question : str = Field(description="Question regarding the Document Provided by the user") 
    answer : str  = Field(description= "Answer given Provided by the LLM from the Document")


In [4]:
llm =  ChatGroq(model_name = "Gemma2-9b-It")

model = llm.with_structured_output(Question_Answer_State)

In [5]:
os.chdir("../")

In [6]:
%pwd

'd:\\INTERNSHIP\\AI_Document_Summarizer_QA_System'

# Document Loader

In [25]:
loader = DocumentLoader()
pdf_path = "Data/Capstone_Project_Artificial _Intelligence.pdf"
text = loader.load(pdf_path)
print(text[:700])

Operations@learnnex.in www.LearnNex.in
2. AI-Powered Personal Health Tracker:
Description: 
Use AI to monitor and predict health patterns using fitness tracker data, diet, and
sleep logs
Steps:
1.Collect data (manual entry or from APIs like Fitbit or Apple Health).
2.Use Pandas to clean and analyze data.
3.Visualize trends using Matplotlib/Plotly.
CAPSTONE PROJECT
Artificial Intelligence :-
1. AI Chatbot using LLMs:
Description: Build a custom chatbot using open-source LLMs like Meta’s LLaMA, Mistral, orintegrate with OpenAI API for a domain-specific assistant (e.g., legal, medical,education).
Steps:1.Learn Python (focus on data structures, classes, API usage).2.Study how chatbots work (rule


# Text Splitter

In [26]:
splitter = TextSplitter(chunk_size=800, chunk_overlap=200)
chunks = splitter.split_documents(text)

print("Chunks created:", len(chunks))
print(chunks[0][:300])

Chunks created: 4
Operations@learnnex.in www.LearnNex.in
2. AI-Powered Personal Health Tracker:
Description: 
Use AI to monitor and predict health patterns using fitness tracker data, diet, and
sleep logs
Steps:
1.Collect data (manual entry or from APIs like Fitbit or Apple Health).
2.Use Pandas to clean and analyze 


# Vector Store

In [27]:
indexer = VectorIndex()
result = indexer.add_documents(chunks)
result

{'status': 'success', 'chunks_added': 4}

# Rag Pipeline

In [29]:
rag = RAGPipeline()

query = "What is the third capstone project described in the document?"
rag_result = rag.ask(query)

print(rag_result)

print("-----------------------------------------\n")

print(rag_result['answer'])

{'answer': 'The third capstone project described in the document is the "AI Image Generator", which generates images from text using Stable Diffusion or DALL·E with custom styles.', 'sources': ['Operations@learnnex.in www.LearnNex.in\n3. AI Image Generator:\nDescription:\n Generate images from text using Stable Diffusion or DALL·E with custom styles.\nSteps:\n1.Install and run pre-trained Stable Diffusion models using diffusers.\n2.Prompt engineering to guide image generation.\n3.Use gradio to build a web UI.\nAs part of your internship program, you are required to prepare any two projects. These projects will form an important\ncomponent of your internship evaluation and should reflect both your theoretical knowledge and practical application.\n4. AI Document Summarizer & Q&A System:\nDescription:\n Create a system that can take PDFs (like research papers or legal docs), summarize\nthem, and answer questions.\nSteps:\n1.Use LangChain or LlamaIndex to create a RAG (Retrieval-Augmented 

# Output Format

In [32]:
groq_key = os.getenv("GROQ_API_KEY")

summarizer = SummaryGenerator(groq_key)
formatter = OutputFormatter()

summaries = summarizer.generate_all(text)

print("--------------------------------\n")

print("Short Summary \n\n", summaries['short'])

print("--------------------------------\n")

print("Medium Summary \n\n", summaries['medium'])

print("--------------------------------\n")

print("Detailed Summary \n\n", summaries['detailed'])

--------------------------------

Short Summary 

 Here is a 4-6 sentence TL;DR summary of the document: 
The document outlines five AI-powered project ideas for an internship program, including a personal health tracker, AI chatbot, AI image generator, AI document summarizer, and AI music generator. Each project has a detailed description and step-by-step guide on how to implement it. The projects require a range of skills, including data analysis, natural language processing, and machine learning. Interns are required to complete any two of these projects as part of their evaluation. The projects aim to test both theoretical knowledge and practical application of AI concepts. By completing these projects, interns can demonstrate their skills and understanding of AI technologies.
--------------------------------

Medium Summary 

 Here is a structured summary of the document in 3 sections:

**1. Overview**
The document outlines various AI-powered projects that can be undertaken as par

# Final Testing

In [38]:
rag_test = rag.ask("Can you summarize the first two projects described in the document?")
med_sum = summarizer.medium_summary(text)

formatter.build_response(
    answer=rag_test["answer"],
    sources=rag_test["sources"],
    summary=med_sum
)

print(med_sum)

Here is a structured summary of the document in 3 sections:

**1. Overview**
The document outlines five AI-powered projects that can be undertaken as part of an internship program. These projects aim to apply theoretical knowledge and practical skills in artificial intelligence, machine learning, and data analysis. The projects are designed to evaluate the intern's understanding of AI concepts and their ability to implement them in real-world applications.

**2. Key Concepts**
The key concepts covered in the document include:
* AI-powered personal health tracking using fitness tracker data, diet, and sleep logs
* Building custom chatbots using open-source LLMs and integrating with OpenAI API
* Generating images from text using Stable Diffusion or DALL·E
* Creating a document summarizer and Q&A system using LangChain or LlamaIndex
* Generating music from text prompts or mood using AI models like MusicGen, Riffusion, or Jukebox
* Using libraries and frameworks such as Pandas, Matplotlib,