# Simple RAG for GitHub issues using Hugging Face Zephyr and LangChain

## Import

In [1]:
!pip install -q torch transformers accelerate bitsandbytes transformers sentence-transformers faiss-gpu-cu12

In [2]:
# # If running in Google Colab, you may need to run this cell to make sure you're using UTF-8 locale to install LangChain
# import locale
# locale.getpreferredencoding = lambda: "UTF-8"

In [3]:
!pip install -q langchain langchain-community

## Prepare the data

In [1]:
from getpass import getpass
ACCESS_TOKEN = getpass("YOUR_GITHUB_PERSONAL_TOKEN")


In [2]:
from langchain.document_loaders import GitHubIssuesLoader

loader = GitHubIssuesLoader(
    repo="huggingface/peft",
    access_token=ACCESS_TOKEN,
    include_prs=False,
    state="all",
)

docs = loader.load()

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=30)
chunked_docs = splitter.split_documents(docs)

## Create the embeddings + retriever

In [4]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5"))

  db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5"))
  from tqdm.autonotebook import tqdm, trange


In [5]:
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 4}
)

## Load quantized model

In [6]:
import torch
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForCausalLM

model_name = 'HuggingFaceH4/zephyr-7b-beta'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 8/8 [04:07<00:00, 30.89s/it]


## Setup the LLM chain

In [13]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "questions"],
    template = prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

In [14]:
from langchain_core.runnables import RunnablePassthrough

retriever = db.as_retriever()

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

## Compare the results

In [15]:
question = "How do you combine multiple adapters?"

In [18]:
llm_chain_result = llm_chain.invoke({"context":"", "question": question})

In [27]:
rag_chain_result = rag_chain.invoke(question)

In [22]:
print(llm_chain_result)


<|system|>
Answer the question based on your knowledge. Use the following context to help:



</s>
<|user|>
How do you combine multiple adapters?
</s>
<|assistant|>

  To combine multiple adapters, you need to ensure that they are compatible with each other and with the devices you want to connect. Here's how you can do it:

1. Identify the types of connections required: Determine which types of connectors you need for both the source device and the destination device. For example, if you want to connect a USB-C laptop to an HDMI monitor, you'll need a USB-C to HDMI adapter.

2. Choose the right adapters: Look for adapters that have the appropriate connectors at both ends. For instance, you might need a USB-C to Lightning adapter to connect your iPhone to a USB-C charger.

3. Connect the adapters: Plug one adapter into another until all the necessary connections are made. Make sure that the connectors fit securely into place.

4. Test the connection: Turn on your devices and check whe

In [28]:
print(rag_chain_result)


<|system|>
Answer the question based on your knowledge. Use the following context to help:

[Document(metadata={'url': 'https://github.com/huggingface/peft/issues/1802', 'title': 'Issues when switching between multiple adapters LoRAs ', 'creator': 'JhonDan1999', 'created_at': '2024-05-26T19:18:13Z', 'comments': 8, 'state': 'closed', 'labels': [], 'assignee': None, 'milestone': None, 'locked': False, 'number': 1802, 'is_pull_request': False}, page_content='The documentation does not mention the need to perform a merge when switching adapters. Additionally, the methods add_adapter, set_adapter, and enable_adapters do not appear to work\r\n\r\nPlease provide clarification on how to correctly switch between adapters'), Document(metadata={'url': 'https://github.com/huggingface/peft/issues/1045', 'title': 'add_weighted_adapter() is unusable, throws error: "Invalid type <class \'list\'> found in target_modules"', 'creator': 'Vectorrent', 'created_at': '2023-10-22T21:42:32Z', 'comments': 6, '