In [1]:
import os

In [2]:
%pwd

'f:\\llam2_langchain\\research'

In [3]:
os.chdir('../')

In [4]:
%pwd

'f:\\llam2_langchain'

In [99]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [121]:
def load_pdf(data):
    loader=DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )

    documents=loader.load()

    return documents[0:200]

In [122]:
extracted_data=load_pdf('learning_python/')

In [102]:
len(extracted_data)

1213

In [103]:
docs=extracted_data[0:200]

In [104]:
len(docs)

200

In [105]:
def text_split(docs):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=100)
    text_chunk=text_splitter.split_documents(docs)
    return text_chunk

In [106]:
text_chunk=text_split(docs)

In [107]:
len(text_chunk)

550

In [108]:
text_chunk

[Document(metadata={'source': 'learning_python\\Learning_Python.pdf', 'page': 1}, page_content='Learning Python'),
 Document(metadata={'source': 'learning_python\\Learning_Python.pdf', 'page': 3}, page_content='FOURTH EDITION\nLearning Python\nMark Lutz\nBeijing • Cambridge • Farnham • Köln • Sebastopol • Taipei • Tokyo'),
 Document(metadata={'source': 'learning_python\\Learning_Python.pdf', 'page': 4}, page_content='Learning Python, Fourth Edition\nby Mark Lutz\nCopyright © 2009 Mark Lutz. All rights reserved.\nPrinted in the United States of America.\nPublished by O’Reilly Media, Inc., 1005 Gravenstein Highway North, Sebastopol, CA 95472.\nO’Reilly books may \nbe purchased for educational, business, or sales promotional use. Online editions\nare also available for most titles ( http://my.safaribooksonline.com). For more information, contact our\ncorporate/institutional sales department: (800) 998-9938 or corporate@oreilly.com.\nEditor: Julie Steele\nProduction Editor: Sumita Mukherji

In [109]:
from langchain.embeddings import HuggingFaceEmbeddings

def download_huggingface_embeddings():
    embeddings=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [111]:
embeddings=download_huggingface_embeddings()

In [112]:
result=embeddings.embed_query("hello world")
len(result)

384

In [113]:
from dotenv import load_dotenv
load_dotenv()

PINECONE_API_KEY=os.environ.get("PINECONE_API_KEY")

In [114]:
from pinecone import ServerlessSpec
from pinecone.grpc import PineconeGRPC as Pinecone

In [115]:
pc=Pinecone(api_key=PINECONE_API_KEY)
index_name="llama"

pc.create_index(
    name=index_name,
    dimension=384,
    metric='cosine',
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [116]:
import os

os.environ["PINECONE_API_KEY"]=PINECONE_API_KEY

In [117]:
from langchain.vectorstores import Pinecone

In [118]:
docs_search=Pinecone.from_documents(

    documents=text_chunk,
    index_name=index_name,
    embedding=embeddings

)

In [119]:
docs_search=Pinecone.from_existing_index(
    index_name=index_name,
    embedding=embeddings
)

In [120]:
retriever=docs_search.as_retriever(search_type="similarity",search_kwargs={"k":3})

In [71]:
retriever.invoke("what is python")

[Document(metadata={'page': 1.0, 'source': 'learning_python\\Learning_Python.pdf'}, page_content='Learning Python'),
 Document(metadata={'page': 59.0, 'source': 'learning_python\\Learning_Python.pdf'}, page_content='For more details on companies using Python today, see Python’s website at http://www\n.python.org.\nWhat Can I Do with Python?\nIn addition to \nbeing a well-designed programming language, Python is useful for ac-\ncomplishing real-world tasks—the sorts of things developers do day in and day out.\nIt’s commonly used in a variety of domains, as a tool for scripting other components\nand implementing standalone programs. In fact, as a general-purpose language,\nPython’s roles are virtually unlimited: you can use it for everything from website de-\nvelopment and gaming to robotics and spacecraft control.\nHowever, the most common Python roles currently seem to fall into a few broad cat-\negories. The next few sections describe some of Python’s most common applications\ntoday, 

In [78]:
from dotenv import load_dotenv
load_dotenv()

True

In [80]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm=ChatGoogleGenerativeAI(model="gemini-1.5-pro",temprature=0.3,max_token=100)

In [90]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt=(
    "you are an assistant for question-answering tasks."
    "use the following pieces of retrieved context to answer"
    "the question. if you donot know the answer, say that you"
    "don't know. use three sentence maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt=ChatPromptTemplate.from_messages(
    [
        ("system",system_prompt),
        ("human","{input}"),
    ]
)

In [91]:
question_answer_chain=create_stuff_documents_chain(llm,prompt)
reg_chain=create_retrieval_chain(retriever,question_answer_chain)

In [98]:
response=reg_chain.invoke({"input":"what is python"})
print(response['answer'])

Python is a popular, open-source programming language used for both standalone programs and scripting applications. It's known for being free, portable, powerful, easy to use, and emphasizes developer productivity and software quality.  You can use Python for a wide variety of tasks, from web development and gaming to robotics and spacecraft control.

