In [2]:
import os
from dotenv import load_dotenv
import openai
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain_community.vectorstores import FAISS
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

In [3]:
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")

In [4]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [6]:


# Function to load and extract text from all PDFs using PyPDFLoader
def load_pdfs(directory):
    pdf_texts = []
    
    for filename in os.listdir(directory):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(directory, filename)
            loader = PyPDFLoader(pdf_path)  # ✅ Use LangChain's PyPDFLoader
            documents = loader.load()
            text = "\n".join([doc.page_content for doc in documents])  # Extract text from pages
    
    return text

In [7]:
# Define the directory containing PDFs
pdf_data = load_pdfs(os.path.join(os.getcwd(), "data"))

In [8]:
# Print extracted text for verification
pdf_data

"A STUDY ON ATTITUDE OF URBAN AND RURAL COLLEGE STUDENT \nTEACHERS TOWARDS SCIENCE\n* Research Scholar, Manonmaniam Sundaranar University, Tirunelveli.\n** Assistant Professor in Physical Science, St.Xavier's College of Education, Palayamkottai.\nABSTRACT\nThere is a great need to identify and develop positive attitude towards science subject of student teachers. The time has \ncome to increase our efforts to develop positive attitude towards science subject among teachers, student teachers \nand school children. This is an immediate requirement of the present day.  This paper reports on attitude of student \nteachers towards science with respect to their locality of the college. The sample consists of 1080 student teachers of \nMadurai revenue district. A scale on attitude towards science was used to get the data from the student teachers. \nPercentage Analysis, Mean, Standard Deviation and ’t'tests were used for analyzing the data. The results showed that \nthere is no significant di

In [9]:
def recursive_text_split(text, chunk_size=500, chunk_overlap=50):
    """
    Recursively splits text into chunks while preserving hierarchical structure.

    Args:
    - text (str): The full text to be split.
    - chunk_size (int): Max size of each chunk.
    - chunk_overlap (int): Overlapping portion for context retention.

    Returns:
    - List of structured text chunks.
    """
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", " ", ""],  # Breaks at paragraph, sentence, word levels
    )

    return splitter.split_text(text)

In [10]:
text_chunks = recursive_text_split(pdf_data)

In [11]:
embeddings_model = OpenAIEmbeddings()

In [12]:
documents = [Document(page_content=chunk) for chunk in text_chunks]

In [55]:
documents

[Document(metadata={}, page_content="A STUDY ON ATTITUDE OF URBAN AND RURAL COLLEGE STUDENT \nTEACHERS TOWARDS SCIENCE\n* Research Scholar, Manonmaniam Sundaranar University, Tirunelveli.\n** Assistant Professor in Physical Science, St.Xavier's College of Education, Palayamkottai.\nABSTRACT\nThere is a great need to identify and develop positive attitude towards science subject of student teachers. The time has \ncome to increase our efforts to develop positive attitude towards science subject among teachers, student teachers"),
 Document(metadata={}, page_content="and school children. This is an immediate requirement of the present day.  This paper reports on attitude of student \nteachers towards science with respect to their locality of the college. The sample consists of 1080 student teachers of \nMadurai revenue district. A scale on attitude towards science was used to get the data from the student teachers. \nPercentage Analysis, Mean, Standard Deviation and ’t'tests were used fo

In [13]:
db = FAISS.from_documents(documents, embeddings_model)

In [14]:
query = "who is S. PRAKASH"
docs = db.similarity_search(query)
print(docs[0].page_content)

in Psychology and Education”.5th Ed., Bombay: Vakils 
Fefer and Sons Private Limited.
[13]. Sharma, R. C. (1989). “Modern Science Teaching”. 
New Delhi: Dhanpat Rai and sons.
[14]. Vaidya and Narendra. (1976). “The Impact of 
Science Teaching”. New Delhi: Oxford and IBH Publishing 
Company.
Mr.S.Prakash is a research scholar at Manonmaniam Sundaranar University, Tirunelveli. Presently he is working as Assistant


In [15]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":2})

In [16]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [17]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [18]:
response = rag_chain.invoke({"input": "who is S. PRAKASH"})
print(response["answer"])

S. Prakash is a research scholar at Manonmaniam Sundaranar University, Tirunelveli, who is currently working as an Assistant Professor. He has received the Best Chemistry Teacher Award in 1998-1999 from Jain Mahavir Association, Chennai, and is interested in research, soft skills, and educational psychology. He has presented papers in National and International seminars.
