# **Install the required packages**

In [1]:
!pip install pypdf
!pip install python-dotenv
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install sentence_transformers
!pip install llama-index

Collecting pypdf
  Downloading pypdf-3.14.0-py3-none-any.whl (269 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/269.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/269.8 kB[0m [31m2.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m266.2/269.8 kB[0m [31m3.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.8/269.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-3.14.0
Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# **Import The installed packages**

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM

# **Load The Required Documents**

In [3]:
documents = SimpleDirectoryReader("/content/Data/").load_data() #the path of folder having the pdf files

In [4]:

from llama_index.prompts.prompts import SimpleInputPrompt


system_prompt = "You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided."

# This will wrap the default prompts that are internal to llama-index
query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")


# **Authenticate your google colab with HuggingFace**

*--copy paste your Hugging Face Access token to authenticate your Google Colab with HF*

In [None]:
#!huggingface-cli login

from huggingface_hub import login
login()


In [None]:
import torch

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.3, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map="auto",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext

embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)


In [8]:
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

In [9]:
index = VectorStoreIndex.from_documents(documents, service_context=service_context)

In [11]:
    query_engine = index.as_query_engine()
    response = query_engine.query("what is /PROTECTION parameter")

    print("PDF-GPT:",response)

CustomGPT: The /PROTECTION parameter is a literal PSQL parameter that is passed to the Db.select() method when the PSL compiler generates code for a DA TA -QWIK column protection scheme. The value of the /PROTECTION parameter determines the level of protection applied to the data item, with the following options:
* "/PROTECTION=0": ignores the data item protection scheme.
* "/PROTECTION=1": includes code that maintains the protection values of the individual columns, if at least one table specified in the Db.select() method has a data item for which protection is defined.
* "/PROTECTION=2": implies that neither access to individual column values nor access through aggregates is allowed when the column is protected. In this case, ResultSet.next() will replace the column value by NULL when access to the column is not granted.
Therefore, the /PROTECTION parameter determines the level of protection applied to the data item, and the PSL compiler must be able to detect this at compile time, 