In [13]:
!pip install llama_index==0.10.19 llama_index_core==0.10.19 torch llama-index-embeddings-huggingface peft optimum bitsandbytes





In [1]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings,SimpleDirectoryReader,VectorStoreIndex
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from transformers import AutoModelForCausalLM,AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#to globally set whatever resources we are going to use
Settings.embed_model=HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.llm=None
Settings.chunk_size=256   #here we are assuning 5000 chunck we are doing 5000/256=~20 so we diving documents into 20 chunks
Settings.chunk_overlap=15

LLM is explicitly disabled. Using MockLLM.


In [4]:
import os
print("Current Working Directory:", os.getcwd())


Current Working Directory: /Users/samjosephbritto/LLMs RAG/content


In [8]:
pip install nbconvert

Collecting nbconvert
  Downloading nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)
Collecting bleach!=5.0.0 (from nbconvert)
  Downloading bleach-6.2.0-py3-none-any.whl.metadata (30 kB)
Collecting defusedxml (from nbconvert)
  Downloading defusedxml-0.7.1-py2.py3-none-any.whl.metadata (32 kB)
Collecting jupyterlab-pygments (from nbconvert)
  Downloading jupyterlab_pygments-0.3.0-py3-none-any.whl.metadata (4.4 kB)
Collecting mistune<4,>=2.0.3 (from nbconvert)
  Downloading mistune-3.0.2-py3-none-any.whl.metadata (1.7 kB)
Collecting nbclient>=0.5.0 (from nbconvert)
  Downloading nbclient-0.10.0-py3-none-any.whl.metadata (7.8 kB)
Collecting nbformat>=5.7 (from nbconvert)
  Downloading nbformat-5.10.4-py3-none-any.whl.metadata (3.6 kB)
Collecting pandocfilters>=1.4.1 (from nbconvert)
  Downloading pandocfilters-1.5.1-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting tinycss2 (from nbconvert)
  Downloading tinycss2-1.4.0-py3-none-any.whl.metadata (3.0 kB)
Collecting webencodings (from b

In [9]:
documents=SimpleDirectoryReader("/Users/samjosephbritto/LLMs RAG/content").load_data()
print(len(documents))
for doc in documents:
    if len(doc.text)==0:
        documents.remove(doc)
        continue
print(len(documents))    

51
51


In [16]:
#creating a vector store
index=VectorStoreIndex.from_documents(documents)

#set number of docs to retrieve

top_k=2
#let configure retrieve

retriever=VectorIndexRetriever(
    index=index,
    similarity_top_k=top_k,
)

In [17]:
#assembling the query engine

query_engine=RetrieverQueryEngine(
    retriever=retriever,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.5)]#50% similar to it will retrived by query engine
)

In [19]:
query="what's all this text about? "
response=query_engine.query(query)
print(response)

context="Context: \n"

for i in range(top_k):
    context=context+ response.source_nodes[i].text+"\n\n"
print(context)    

Context information is below.
---------------------
page_label: 29
file_path: /Users/samjosephbritto/LLMs RAG/content/2307.06435v10.pdf

LAMBADA [335], LCSTS [336], AdGen [337], E2E [338], CHID [339], CHID-
FC [312]
Physical Knowledge and
World UnderstandingPIQA [340], TriviaQA [341], ARC [342], ARC-Easy [342], ARC-Challenge [342], PROST [343], Open-
BookQA [344], WebNLG [345], DogWhistle Insider & Outsider [346]
Contextual Language
UnderstandingRACE [347], RACE-Middle [347], RACE-High [347], QuAC [348], StrategyQA [349], Quiz Bowl [350],
cMedQA [351],cMedQA2 [352], MATINF-QA [353]
Commonsense Reasoning WinoGrande [354], HellaSwag [355], COPA [356], WSC [357], CSQA [358], SIQA [359], C3[360],
CLUEWSC2020 [311], CLUEWSC [311], CLUEWSC-FC [312],

page_label: 44
file_path: /Users/samjosephbritto/LLMs RAG/content/2307.06435v10.pdf

29
[362] P. Rajpurkar, J. Zhang, K. Lopyrev, P. Liang, Squad: 100,000 +questions
for machine comprehension of text, arXiv preprint arXiv:1606.05250
(2016). 29, 

In [23]:
#load model

model_name="Qwen/Qwen2.5-1.5B-Instruct"
model=AutoModelForCausalLM.from_pretrained(model_name,
                           trust_remote_code=False,
                           revision="main",
                           #device_map='cuda:0'  who are having gpu can use
                           )
#load tokenizer
tokenizer=AutoTokenizer.from_pretrained(model_name,use_fast=True)

In [25]:
prompt_template_with_context=lambda context,query : f"""I am an AI assiantant tasked with answering question based on the provided PDF content.
please analyze the following except from PDF and answer the question
PDF content:
{context}

Question:{query}


Instructions:

-Answer only based on the information provided in the PDF content above.
-If the answer cannot be found in the provided content,say I cannot find the answer to the question and provide a PDF documnets
-Be concise and specifice
-Include relevant quote or references from the PDF when applicable
Answer:"""

In [30]:
comment="what is the long context-finetuning?"
prompt=prompt_template_with_context(context,query)

inputs=tokenizer(prompt,return_tensors='pt')
outputs=model.generate(input_ids=inputs["input_ids"],max_new_tokens=280)
print(tokenizer.batch_decode(outputs)[0])

I am an AI assiantant tasked with answering question based on the provided PDF content.
please analyze the following except from PDF and answer the question
PDF content:
Context: 
LAMBADA [335], LCSTS [336], AdGen [337], E2E [338], CHID [339], CHID-
FC [312]
Physical Knowledge and
World UnderstandingPIQA [340], TriviaQA [341], ARC [342], ARC-Easy [342], ARC-Challenge [342], PROST [343], Open-
BookQA [344], WebNLG [345], DogWhistle Insider & Outsider [346]
Contextual Language
UnderstandingRACE [347], RACE-Middle [347], RACE-High [347], QuAC [348], StrategyQA [349], Quiz Bowl [350],
cMedQA [351],cMedQA2 [352], MATINF-QA [353]
Commonsense Reasoning WinoGrande [354], HellaSwag [355], COPA [356], WSC [357], CSQA [358], SIQA [359], C3[360],
CLUEWSC2020 [311], CLUEWSC [311], CLUEWSC-FC [312],

29
[362] P. Rajpurkar, J. Zhang, K. Lopyrev, P. Liang, Squad: 100,000 +questions
for machine comprehension of text, arXiv preprint arXiv:1606.05250
(2016). 29, 31
[363] C. Clark, K. Lee, M.-W. Chang, T.