In [2]:
%pip install --upgrade --quiet langchain-community langchain-openai langchain-pinecone pinecone openai python-dotenv pypdf pydantic chainlit
import os
from langchain.chains import RetrievalQA
from langchain.embeddings import OpenAIEmbeddings
from langchain.agents import Agent
from langchain.chat_models import openai,ChatOpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone as PineconeClient
load_dotenv()


Note: you may need to restart the kernel to use updated packages.


True

In [25]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME")

In [26]:
def load_pdf(data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader,
    )
    documents = loader.load()
    return documents

In [27]:
extracted_data = None

In [28]:
extracted_data = load_pdf("./data/")

In [29]:
len(extracted_data)

637

In [30]:
def text_split(extracted_data):
    text_spliter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20
    )
    text_chunks = text_spliter.split_documents(extracted_data)
    return text_chunks

In [9]:
text_chunks = text_split(extracted_data)

In [31]:
print(f'length of total chunks is {len(text_chunks)}')

length of total chunks is 5860


In [32]:
def load_embeddings():
    embeddings = OpenAIEmbeddings(api_key=OPENAI_API_KEY,model="text-embedding-3-large")
    return embeddings

In [33]:
embedding = load_embeddings()

In [34]:
vectors = embedding.embed_query("What is the capital of ind?")

In [35]:
len(vectors)

3072

In [36]:
pc = PineconeClient(api_key=PINECONE_API_KEY)
index_name = PINECONE_INDEX_NAME

In [37]:
dosearch = PineconeVectorStore.from_texts(
    [
        tc.page_content for tc in text_chunks
    ],
    embedding=embedding,
    index_name=index_name,
)

In [38]:
docsearch = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embedding)

In [39]:
docsearch

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x12e075d10>

In [None]:
query = "what is acne?"
docs = docsearch.similarity_search(query, k=4)
print(docs)

In [41]:
prompt_template="""
Use only the following pieces of information to answer the user's question.
If you don't get the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [42]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [44]:
llm = ChatOpenAI(api_key=OPENAI_API_KEY, model="gpt-4o")

In [None]:
llm

In [46]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=docsearch.as_retriever(search_kwargs={'k': 4}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs,
)

In [47]:
res = qa.invoke({"query": "how to preven is  acne?"})

In [48]:
print(res["result"])

There are no sure ways to prevent acne, but the following steps may be taken to minimize flare-ups: gentle washing of affected areas once or twice every day, avoid abrasive cleansers, use noncomedogenic makeup and moisturizers, shampoo often and wear hair off face, eat a well-balanced diet avoiding foods that trigger flare-ups, give dry pimples a limited amount of sun exposure unless told otherwise, do not pick or squeeze blemishes, and reduce stress.


In [49]:
res_should_unknown = qa.invoke({"query": "what is captal of india?"})

In [50]:
res_should_unknown['result']

"I don't know."

pip install pydantic==2.10.1 chainlit
chainlit hello (start is fine and chainlit is working)