## RAG System Using Llama2 With Hugging Face

In [1]:
!pip install pypdf



In [2]:
!pip install -q transformers einops accelerate langchain bitsandbytes

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 24.4.1 requires cubinlinker, which is not installed.
cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 24.4.1 requires ptxcompiler, which is not installed.
cuml 24.4.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 24.4.1 requires cupy-cuda11x>=12.0.0, which is not installed.
keras-cv 0.9.0 requires keras-core, which is not installed.
keras-nlp 0.12.1 requires keras-core, which is not installed.
tensorflow-decision-forests 1.8.1 requires wurlitzer, which is not installed.
apache-beam 2.46.0 requires dill<0.3.2,>=0.3.1.1, but you have dill 0.3.8 which is incompatible.
apache-beam 2.46.0 requires numpy<1.25.0,>=1.14.3, but you have numpy 1.26.4 which is incompatible.
apache-beam 2.46.0 requires pyarrow<10.0.0,>=3.0.0, but you have pyarrow 14.0.2 which is incompatible

In [3]:
## Embedding
!pip install install sentence_transformers

Collecting install
  Downloading install-1.3.5-py3-none-any.whl.metadata (925 bytes)
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Downloading install-1.3.5-py3-none-any.whl (3.2 kB)
Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: install, sentence_transformers
Successfully installed install-1.3.5 sentence_transformers-3.0.1


In [None]:
pip install llama_index

Collecting llama_index
  Downloading llama_index-0.10.44-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-agent-openai<0.3.0,>=0.1.4 (from llama_index)
  Downloading llama_index_agent_openai-0.2.7-py3-none-any.whl.metadata (678 bytes)
Collecting llama-index-cli<0.2.0,>=0.1.2 (from llama_index)
  Downloading llama_index_cli-0.1.12-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core==0.10.44 (from llama_index)
  Downloading llama_index_core-0.10.44-py3-none-any.whl.metadata (2.4 kB)
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.5 (from llama_index)
  Downloading llama_index_embeddings_openai-0.1.10-py3-none-any.whl.metadata (604 bytes)
Collecting llama-index-indices-managed-llama-cloud<0.2.0,>=0.1.2 (from llama_index)
  Downloading llama_index_indices_managed_llama_cloud-0.1.6-py3-none-any.whl.metadata (3.8 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama_index)
  Downloading llama_index_legacy-0.9.48-py3-none-any.whl.metadata (8.5 kB)
Collecting ll

In [None]:
pip install llama-index-llms-huggingface

In [None]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt

In [None]:
documents=SimpleDirectoryReader("/kaggle/input/paper-nurul/").load_data()
documents

In [None]:
system_prompt="""
Kamu adalah seorang asisten guru. tugas kamu adalah memberikan semua jawaban dari pertanyaan yang ditanyakan, dan permintaan yang diminta oleh guru seakurat mungkin berdasarkan instruksi dan konteks yang diberikan. Jika kamu tidak tahu jawabannya, bilang kamu tidak mengetahui hal tersebut karena keterbatasan pengetahuan.
"""
## Default format supportable by LLama2
query_wrapper_prompt=SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import torch

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.5, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="Equinox391/Athena-Llama-2-7b-chat-finetune",
    model_name="Equinox391/Athena-Llama-2-7b-chat-finetune",
    device_map="auto",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True}
)

In [None]:
pip install -U langchain-community

In [None]:
pip install llama-index-embeddings-langchain

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
# from llama_index import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="cassador/indobert-embeddings"))

In [None]:
service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

In [None]:
service_context

In [None]:
index=VectorStoreIndex.from_documents(documents,service_context=service_context)

In [None]:
index

In [None]:
query_engine=index.as_query_engine()

In [None]:
response=query_engine.query("Apa itu lstm?")

In [None]:
print(response)

In [None]:
response=query_engine.query("apa isi dari dokumen tersebut")

In [None]:
print(response)