In [11]:
from langchain_community.document_loaders import Docx2txtLoader
from langchain_community.docstore.document import Document
from typing import List
def load_documents(filepath:str)->List[Document]:
    """Take a filepath a return the list of documents"""
    loader=Docx2txtLoader(
        filepath
    )
    documents=loader.load()
    return documents

In [12]:
documents=load_documents(r"C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx")
documents

[Document(metadata={'source': 'C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx'}, page_content="CHATBOT data requirements \n\nFAQs for STEAM Minds\n\n1. What problem does STEAM Minds aim to address?\n\nSTEAM Minds focuses on overcoming the shortage of effective, skill-based STEM learning experiences in traditional education. Current learning models often lack flexibility, engagement, and hands-on opportunities as they often involve rote learning. The skill based learning I essential for preparing students for a technology-driven future. This gap impacts industries requiring a tech-literate workforce.\n\n\n\n2. What solutions does STEAM Minds offer?\n\nSTEAM Minds provides innovative solutions such as:\n\nAI-driven personalized learning to tailor educational experiences to individual needs.\n\nVirtual and Augmented Reality (VR/AR) for immersive simulative and gamified learning.\n\nHands-on, skill-based activities to engage students in problem-solving, coding, and scie

In [13]:
def filter_documents(docs:List[Document])->List[Document]:
    """give a list of document objects, return a new list Document objects containing only 'source' in metadata and the original page_content"""
    filter_docs:List[Document]=[]
    for doc in docs:
        source=doc.metadata.get("source")
        filter_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source":source}
            )
        )
        return filter_docs

In [14]:
filter_docs=filter_documents(documents)
filter_docs

[Document(metadata={'source': 'C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx'}, page_content="CHATBOT data requirements \n\nFAQs for STEAM Minds\n\n1. What problem does STEAM Minds aim to address?\n\nSTEAM Minds focuses on overcoming the shortage of effective, skill-based STEM learning experiences in traditional education. Current learning models often lack flexibility, engagement, and hands-on opportunities as they often involve rote learning. The skill based learning I essential for preparing students for a technology-driven future. This gap impacts industries requiring a tech-literate workforce.\n\n\n\n2. What solutions does STEAM Minds offer?\n\nSTEAM Minds provides innovative solutions such as:\n\nAI-driven personalized learning to tailor educational experiences to individual needs.\n\nVirtual and Augmented Reality (VR/AR) for immersive simulative and gamified learning.\n\nHands-on, skill-based activities to engage students in problem-solving, coding, and scie

In [15]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
def text_split(filter_docs):
    splitter=RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )
    text_chunks=splitter.split_documents(filter_docs)
    return text_chunks

In [16]:
chunks=text_split(filter_docs)
chunks

[Document(metadata={'source': 'C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx'}, page_content='CHATBOT data requirements \n\nFAQs for STEAM Minds\n\n1. What problem does STEAM Minds aim to address?'),
 Document(metadata={'source': 'C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx'}, page_content='1. What problem does STEAM Minds aim to address?\n\nSTEAM Minds focuses on overcoming the shortage of effective, skill-based STEM learning experiences in traditional education. Current learning models often lack flexibility, engagement, and hands-on opportunities as they often involve rote learning. The skill based learning I essential for preparing students for a technology-driven future. This gap impacts industries requiring a tech-literate workforce.'),
 Document(metadata={'source': 'C:/Users/PMYLS/Desktop/chatobt/data/CHATBOT data requirements.docx'}, page_content='2. What solutions does STEAM Minds offer?\n\nSTEAM Minds provides innovative solutions s

In [19]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from dotenv import load_dotenv
load_dotenv()
embeddings=OpenAIEmbeddings()
vector_store=FAISS.from_documents(chunks,embeddings)

In [24]:
retriever=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":5})

In [25]:
from langchain.prompts import ChatPromptTemplate
system_prompt="""
    You are helpful assistant.
    You can answer from the given text.
    If you don't have sufficient context, just say don't know.
    {context}\n
    """
prompt=ChatPromptTemplate.from_messages([
    ("system",system_prompt),
    ("human","{input}")
])

In [26]:
from langchain_openai import ChatOpenAI
model=ChatOpenAI()

In [27]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
question_answer_chain=create_stuff_documents_chain(model,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [29]:
response=rag_chain.invoke({"input":"What is STEAM Minds' mission"})
print(response["answer"])

STEAM Minds' mission is to transform K-12 STEAM education by delivering AI-driven, interactive, and simulative, gamified learning experiences. The goal is to create an inclusive and immersive educational environment that enhances essential skills in innovation and critical thinking.
