In [3]:
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai.llms import GoogleGenerativeAI
from langchain.document_loaders import PyPDFDirectoryLoader
import pinecone
import langchain
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

True

In [4]:

# Configure Google Generative AI
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise EnvironmentError("GOOGLE_API_KEY not found. Please set it in your .env file.")

genai.configure(api_key=GOOGLE_API_KEY)

# Initialize Language Model
llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash', google_api_key=GOOGLE_API_KEY)

# Read PDF

In [21]:
# read file
def read_doc(directory):
    file_loader = PyPDFDirectoryLoader(directory)
    documents = file_loader.load()
    return documents

In [27]:
docs = read_doc('documents/')
docs

[Document(metadata={'source': 'documents\\peft.pdf', 'page': 0}, page_content='1\nParameter-Efficient Fine-Tuning Methods for\nPretrained Language Models: A Critical\nReview and Assessment\nLingling Xu, Haoran Xie, Si-Zhao Joe Qin, Xiaohui Tao, Fu Lee Wang\nAbstract—With the continuous growth in the number of\nparameters of transformer-based pretrained language models\n(PLMs), particularly the emergence of large language models\n(LLMs) with billions of parameters, many natural language\nprocessing (NLP) tasks have demonstrated remarkable success.\nHowever, the enormous size and computational demands of\nthese models pose significant challenges for adapting them\nto specific downstream tasks, especially in environments with\nlimited computational resources. Parameter Efficient Fine-Tuning\n(PEFT) offers an effective solution by reducing the number\nof fine-tuning parameters and memory usage while achieving\ncomparable performance to full fine-tuning. The demands for\nfine-tuning PLMs, e

In [28]:
len(docs)

20

# Chunks of PDF

In [29]:
def chunk_doc(docs,chunk_size=300, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc =text_splitter.split_documents(docs)
    return doc

In [30]:
documents=chunk_doc(docs=doc)
documents

[Document(metadata={'source': 'documents\\peft.pdf', 'page': 0}, page_content='1\nParameter-Efficient Fine-Tuning Methods for\nPretrained Language Models: A Critical\nReview and Assessment\nLingling Xu, Haoran Xie, Si-Zhao Joe Qin, Xiaohui Tao, Fu Lee Wang\nAbstract—With the continuous growth in the number of\nparameters of transformer-based pretrained language models'),
 Document(metadata={'source': 'documents\\peft.pdf', 'page': 0}, page_content='(PLMs), particularly the emergence of large language models\n(LLMs) with billions of parameters, many natural language\nprocessing (NLP) tasks have demonstrated remarkable success.\nHowever, the enormous size and computational demands of\nthese models pose significant challenges for adapting them'),
 Document(metadata={'source': 'documents\\peft.pdf', 'page': 0}, page_content='to specific downstream tasks, especially in environments with\nlimited computational resources. Parameter Efficient Fine-Tuning\n(PEFT) offers an effective solution by

# Embeddings

In [32]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


# vectorizer

In [34]:
e = embeddings.embed_query("how are you?")
type(e)
len(e)

768

# Pinecone Vector DB

In [49]:
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [50]:
from pinecone import Pinecone, ServerlessSpec
import time

pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)

index_name = "rag-project"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=embeddings.dimension,
        metric="cosine",
        spec=spec
    )
    # Wait for index to be ready
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# See that it is empty
print("Index before upsert:")
print(pc.Index(index_name).describe_index_stats())
print("\n")


Index before upsert:
{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}




In [52]:
!pip install langchain-pinecone


Collecting langchain-pinecone
  Using cached langchain_pinecone-0.2.0-py3-none-any.whl (11 kB)
Installing collected packages: langchain-pinecone
Successfully installed langchain-pinecone-0.2.0


You should consider upgrading via the 'E:\LangChain\LangChain\End-End-GenAI-Education-Industry-Project\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [54]:
from langchain_pinecone import PineconeVectorStore

namespace = "wondervector5000"

docsearch = PineconeVectorStore.from_documents(
    documents=doc,
    index_name=index_name,
    embedding=embeddings,
    namespace=namespace
)

time.sleep(5)

# See how many vectors have been upserted
print("Index after upsert:")
print(pc.Index(index_name).describe_index_stats())
print("\n")
time.sleep(2)


Index after upsert:
{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}




### Indexing in Pinecone

In [55]:
index = pc.Index(index_name)
namespace = "wondervector5000"

for ids in index.list(namespace=namespace):
    query = index.query(
        id=ids[0], 
        namespace=namespace, 
        top_k=1,
        include_values=True,
        include_metadata=True
    )
    print(query)
    print("\n")


{'matches': [{'id': '09fdca80-b308-4305-8874-3fc4e282126b',
              'metadata': {'page': 4.0,
                           'source': 'documents\\peft.pdf',
                           'text': '5\n'
                                   'Multi-Head Attention\n'
                                   'Layer Normalization\n'
                                   'Layer Normalization\n'
                                   'Feed-Forward Network\n'
                                   '+\n'
                                   '+\n'
                                   'Adapter Network\n'
                                   'Adapter Network\n'
                                   'Nonlinear \n'
                                   'Activation\n'
                                   '+\n'
                                   'Down-projection\n'
                                   'Up-projection\n'
                                   'kd\n'
                                   'k d\n'
                                   

In [56]:
def retiver_query(query,k=2):
    matching_result=index.similarity_search(query,k=k)
    return matching_result

# chain

In [63]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub

In [64]:


retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
retriever=docsearch.as_retriever()

combine_docs_chain = create_stuff_documents_chain(
    llm, retrieval_qa_chat_prompt
)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)




In [66]:
query1 = "What are the peft?"
answer1_without_knowledge = llm.invoke(query1)

print("Query 1:", query1)
print("\nAnswer without knowledge:\n\n", answer1_without_knowledge.content)
print("\n")
time.sleep(2)




Query 1: What are the peft?

Answer without knowledge:

 PEFT stands for **Parameter-Efficient Fine-Tuning**.  It's a collection of techniques in machine learning, specifically for large language models (LLMs), that aim to adapt a pre-trained model to a new task or dataset with minimal changes to the model's parameters.  Instead of fine-tuning all the parameters of the massive model (which is computationally expensive and resource-intensive), PEFT methods focus on modifying only a small subset of parameters, or adding a small number of new parameters.

This makes PEFT techniques significantly more efficient than full fine-tuning in terms of:

* **Computational cost:** Requires less computing power and time.
* **Memory usage:** Needs less memory to train and deploy.
* **Storage space:** The adapted model requires less storage.

Several popular PEFT methods exist, including:

* **Adapter modules:**  These add small, task-specific modules to the pre-trained model, allowing the model to le

In [67]:
query1 = "Explain Full Fine-tuning of PLMs?"
answer1_without_knowledge = llm.invoke(query1)

print("Query 1:", query1)
print("\nAnswer without knowledge:\n\n", answer1_without_knowledge.content)
print("\n")
time.sleep(2)




Query 1: Explain Full Fine-tuning of PLMs?

Answer without knowledge:

 Full fine-tuning, in the context of Pre-trained Language Models (PLMs), refers to a training approach where **all** the parameters of the pre-trained model are updated during the fine-tuning process.  This contrasts with other methods like adapter-based fine-tuning or parameter-efficient fine-tuning, which only adjust a small subset of the parameters.

Here's a breakdown of full fine-tuning:

**How it works:**

1. **Start with a pre-trained PLM:**  You begin with a large language model (like BERT, RoBERTa, GPT-3, etc.) that has already been trained on a massive text corpus.  This pre-training provides the model with a strong foundation in language understanding.

2. **Prepare a downstream task dataset:** You gather a dataset specific to the task you want the model to perform (e.g., sentiment analysis, question answering, text classification). This dataset needs to be properly formatted and prepared for training.

3