In [1]:
import os
import openai
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())
openai.api_key = os.environ["OPENAI_API_KEY"]


In [3]:
from openai.embeddings_utils import get_embedding
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter

In [17]:
docs_path = "/home/alok/projects/LangChain/project/chat_with_pdf"

In [19]:
text_chunk = []
for f_name in os.listdir(docs_path):
    f_path = os.path.join(docs_path, f_name)
    if os.path.isfile(f_path) and f_path.endswith('.pdf'):
        reader = PdfReader(f_path)
        text = "".join(page.extract_text() for  page in reader.pages)
        text_splitter = CharacterTextSplitter(
            separator="\n",
            chunk_size=1000,
            chunk_overlap=200,
        )
        text_chunk.extend(text_splitter.split_text(text))

print("Total number of text chunks: ", len(text_chunk))

Total number of text chunks:  3


In [20]:
# Remove all chunks shorter than 10 words and strip the rest
text_chunk = [string.strip().strip('\n') for string in text_chunk if len(string.split()) > 10]

In [21]:
# write code for vectore store here
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()  # type: ignore
vector_store = FAISS.from_texts(texts=text_chunk, embedding=embeddings)

In [22]:
# write code for conversation chain here
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI()  # type: ignore
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=vector_store.as_retriever(),
    memory=memory,
)



In [23]:
# write code for print answer here
def print_answer(question):
    res = conversation_chain({'question': question})
    print(res['answer'])

In [24]:
text = "What is Eligibility criteria for CISCO"
print_answer("User: " + text)

Eligibility criteria for the CISCO Training Program through NetAcad are as follows:

- Student's Graduation Year: 2024
- Minimum CGPA: 7.0 and above (highest degree)
- No. of backlogs/arrears: 0
