In [None]:
### since we will be working with pdf we have to install the "pypdf"

!pip install pypdf
!pip install pypdf2
!pip install transformers
!pip install langchain
!pip install sentence_transformers
!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install huggingface_hub
!pip install -U langchain-community
!pip install llama-index-embeddings-langchain
!pip install langchain
!pip install datasets
!pip install fitz
!pip install docx2txt
!pip install -q transformers einops accelerate langchain bitsandbytes
!pip install gradio



Collecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl.metadata (7.4 kB)
Downloading pypdf-4.3.1-py3-none-any.whl (295 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/295.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-4.3.1
Collecting pypdf2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf2
Successfully installed pypdf2-3.0.1
Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<0.3.0,>=0.2.23 (from langchain)
  Downloading langchain_core-0.2.24-py3-none-any.whl.metadata (6.2 kB)
Collecting la

In [16]:
# Check current disk usage
!df -h

# List files in the root directory
!ls -lh /

# List files in the /opt/bin/.nvidia directory
!ls -lh /opt/bin/.nvidia

# List files in the /content directory (where Colab typically stores user files)
!ls -lh /content

# Remove unnecessary files and directories (uncomment and modify as needed)
# For example, if there are large files in /content, you can remove them:
# !rm -rf /content/your-unnecessary-file-or-directory

# Clear pip cache
!rm -rf ~/.cache/pip

# Clear apt cache
!sudo apt-get clean

# Clear temporary files in /tmp directory
!rm -rf /tmp/*

# Clear other unnecessary caches or temporary files (adjust paths as necessary)
!rm -rf /root/.cache
!rm -rf /var/lib/apt/lists/*

# Check disk usage again to see the changes
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay          79G   31G   48G  39% /
tmpfs            64M     0   64M   0% /dev
shm             5.7G  4.0K  5.7G   1% /dev/shm
/dev/root       2.0G  1.2G  820M  59% /usr/sbin/docker-init
/dev/sda1       119G   96G   24G  81% /kaggle/input
tmpfs           6.4G  6.7M  6.4G   1% /var/colab
tmpfs           6.4G     0  6.4G   0% /proc/acpi
tmpfs           6.4G     0  6.4G   0% /proc/scsi
tmpfs           6.4G     0  6.4G   0% /sys/firmware
total 112K
lrwxrwxrwx   1 root root    7 Oct  4  2023 bin -> usr/bin
drwxr-xr-x   2 root root 4.0K Apr 18  2022 boot
drwxr-xr-x   1 root root 4.0K Jul 29 04:31 content
-rw-r--r--   1 root root 4.3K Nov 10  2023 cuda-keyring_1.0-1_all.deb
drwxr-xr-x   1 root root 4.0K Jul 25 13:38 datalab
drwxr-xr-x   6 root root  460 Jul 29 04:14 dev
drwxr-xr-x   1 root root 4.0K Jul 29 04:14 etc
drwxr-xr-x   2 root root 4.0K Apr 18  2022 home
drwxr-xr-x   3 root root 4.0K Jul 29 04:14 kaggle
lrwxrwxrwx   1 root root    7

In [None]:
import os
import warnings
import torch
import gradio as gr
from huggingface_hub import login
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext, Document
from llama_index.core import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

warnings.filterwarnings('ignore')

# Login to HuggingFace
def login_huggingface(token):
    login(token)

# Load data from the uploaded file
def load_data(directory):
    document = SimpleDirectoryReader(directory).load_data()
    return document

# Initialize the LLM
def initialize_llm(system_prompt, query_template, context_template):
    query_wrapper_prompt = PromptTemplate(query_template)
    context_template_prompt = PromptTemplate(context_template)

    llm = HuggingFaceLLM(
        context_window=4096,
        max_new_tokens=256,
        generate_kwargs={"temperature": 0.0, "do_sample": False},
        system_prompt=system_prompt,
        query_wrapper_prompt=query_wrapper_prompt,
        tokenizer_name="meta-llama/Llama-2-7b-chat-hf",
        model_name="meta-llama/Llama-2-7b-chat-hf",
        device_map="auto",
        model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
    )

    return llm

# Initialize the embedding model
def initialize_embedding_model():
    embed_model = LangchainEmbedding(
        HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    )
    return embed_model

# Create service context
def create_service_context(llm, embed_model, chunk_size=1024):
    service_context = ServiceContext.from_defaults(
        chunk_size=chunk_size,
        llm=llm,
        embed_model=embed_model
    )
    return service_context

# Create vector store index
def create_vector_store_index(document, service_context):
    index = VectorStoreIndex.from_documents(document, service_context=service_context, show_progress=True)
    return index

# Query the index
def query_index(index, question):
    query_engine = index.as_query_engine()
    response = query_engine.query(question)
    return response

# Main function to process the input document and query
def process_input(file_path, question):
    token = "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"  # Replace with your HuggingFace token
    login_huggingface(token)

    # Print the file path
    print(f"Uploaded file path: {file_path}")

    # Pass the directory containing the file to load_data
    document = load_data(os.path.dirname(file_path))

    system_prompt = 'You are a Q&A assistant. Your goal is to answer questions as accurately as possible based on the instructions and context provided.'
    query_template = "\n" + system_prompt + "</s>\n{query_str}</s>"
    context_template = "We have provided context information below. \n\n{context_str}\n\nGiven this information, please answer the question: {query_str}\n"

    llm = initialize_llm(system_prompt, query_template, context_template)

    embed_model = initialize_embedding_model()

    service_context = create_service_context(llm, embed_model)

    index = create_vector_store_index(document, service_context)

    response = query_index(index, question)

    return response

# Create the Gradio interface
def create_gradio_interface():
    iface = gr.Interface(
        fn=process_input,
        inputs=[
            gr.File(label="Upload Document", type="filepath"),
            gr.Textbox(label="Enter your question")
        ],
        outputs="text",
        title="Document-based Q&A System",
        description="Upload a document and enter your question. The system will provide an answer based on the document."
    )
    return iface

if __name__ == "__main__":
    iface = create_gradio_interface()
    iface.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://fc77198f75c991a44f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful
Uploaded file path: /tmp/gradio/a3eec1c3a195f2654eff7598c4c7ea8725cc7cf7/SAYANTAN-RAY-2348057-Synopsis.docx


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1923, in process_api
    result = await self.call_function(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1508, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/usr/local/lib/python3.10/dist-packages/anyio/to_thread.py", line 33, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 877, in run_sync_in_worker_thread
    return await future
  File "/usr/local/lib/python3.10/dist-packages/anyio/_backends/_asyncio.py", line 8

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://fc77198f75c991a44f.gradio.live
