# RAG using Langchain

## Packages loading & import

In [1]:
# !pip --version
# !pip install langchain
# !pip install langchain_community
# !pip install langchain_huggingface
# !pip install langchain_text_splitters
# !pip install langchain_chroma
# !pip install rank-bm25
# !pip install huggingface_hub

In [21]:
import os
import json
import bs4
import nltk
import torch
import pickle
import numpy as np

# from pyserini.index import IndexWriter
# from pyserini.search import SimpleSearcher
from numpy.linalg import norm
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize

from langchain_community.llms import Ollama
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer, util
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.embeddings import JinaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer

from tqdm import tqdm

In [3]:
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

## Hugging face login
- Please apply the model first: https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
- If you haven't been granted access to this model, you can use other LLM model that doesn't have to apply.
- You must save the hf token otherwise you need to regenrate the token everytime.
- When using Ollama, no login is required to access and utilize the llama model.

In [4]:
from huggingface_hub import login

hf_token = "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"
login(token=hf_token, add_to_git_credential=True)

In [5]:
!huggingface-cli whoami

Ethanzzzzz


## TODO1: Set up the environment of Ollama

### Introduction to Ollama
- Ollama is a platform designed for running and managing large language models (LLMs) directly **on local devices**, providing a balance between performance, privacy, and control.
- There are also other tools support users to manage LLM on local devices and accelerate it like *vllm*, *Llamafile*, *GPT4ALL*...etc.

### Launch colabxterm

In [6]:
# TODO1-1: You should install colab-xterm and launch it.
# Write your commands here.
# Implement on local machine, so xterm is not necessary
# !pip install colab-xterm
# %load_ext colabxterm

In [7]:
# TODO1-2: You should install Ollama.
# Install the by OllamaSetup.exe
# You may need root privileges if you use a local machine instead of Colab.
# %xterm
# curl -fsSL https://ollama.com/install.sh | sh

In [8]:
# TODO1-3: Pull Llama3.2:1b via Ollama and start the Ollama service in the xterm
# Write your commands in the xterm (local machine terminal, powershell)
# ollama pull llama3.2:1b (load the llm to local)
# ollama serve (start running ollama)

## Ollama testing
You can test your Ollama status with the following cells.

In [9]:
# Setting up the model that this tutorial will use
MODEL = "llama3.2:1b" # https://ollama.com/library/llama3.2:3b
# EMBED_MODEL = "jinaai/jina-embeddings-v2-base-en"
EMBED_MODEL = "nomic-ai/nomic-embed-text-v1"

In [10]:
# Initialize an instance of the Ollama model
# llm = Ollama(model=MODEL)
# temperaturea stands for model creativity
llm = Ollama(model=MODEL, temperature=0, top_k=1, top_p=0)
# Invoke the model to generate responses
response = llm.invoke("What is the capital of Taiwan?")
print(response)

  llm = Ollama(model=MODEL, temperature=0, top_k=1, top_p=0)


The capital of Taiwan is Taipei.


## Build a simple RAG system by using LangChain

### TODO2: Load the cat-facts dataset and prepare the retrieval database

In [11]:
# !wget https://huggingface.co/ngxson/demo_simple_rag_py/resolve/main/cat-facts.txt
# With local machine, just copy the content and paste it to the created 'cat-facts.txt'

In [12]:
# TODO2-1: Load the cat-facts dataset (as `refs`, which is a list of strings for all the cat facts)
# Write your code here
with open("cat-facts.txt", "r", encoding="utf-8") as f:
    refs = f.read().split('\n')
    print(refs)
    print(len(refs))

['On average, cats spend 2/3 of every day sleeping. That means a nine-year-old cat has been awake for only three years of its life.', 'Unlike dogs, cats do not have a sweet tooth. Scientists believe this is due to a mutation in a key taste receptor.', 'When a cat chases its prey, it keeps its head level. Dogs and humans bob their heads up and down.', 'The technical term for a cat’s hairball is a “bezoar.”', 'A group of cats is called a “clowder.”', 'Female cats tend to be right pawed, while male cats are more often left pawed. Interestingly, while 90% of humans are right handed, the remaining 10% of lefties also tend to be male.', 'A cat can’t climb head first down a tree because every claw on a cat’s paw points the same way. To get down from a tree, a cat must back down.', 'Cats make about 100 different sounds. Dogs make only about 10.', 'A cat’s brain is biologically more similar to a human brain than it is to a dog’s. Both humans and cats have identical regions in their brains that 

In [13]:
from langchain_core.documents import Document
docs = [Document(page_content=doc, metadata={"id": i}) for i, doc in enumerate(refs)]

In [14]:
# Create an embedding model
model_kwargs = {'trust_remote_code': True}
encode_kwargs = {'normalize_embeddings': False}
embeddings_model = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

  state_dict = loader(resolved_archive_file)
<All keys matched successfully>


In [15]:
# TODO2-2: Prepare the retrieval database
# You should create a Chroma vector store.
# search_type can be “similarity” (default), “mmr”, or “similarity_score_threshold”
vector_store = Chroma.from_documents(
    # Write your code here
    documents=docs,
    embedding=embeddings_model
)
retriever = vector_store.as_retriever(
    # Write your code here
    search_type="mmr",
    search_kwargs={"k": 3, "fetch_k": 5}
)

### Prompt setting

In [16]:
# TODO3: Set up the `system_prompt` and configure the prompt.
system_prompt = (
    "Only provide the exact answer from the context."
    "Do not add extra explanations."
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

- For the vectorspace, the common algorithm would be used like Faiss, Chroma...(https://python.langchain.com/docs/integrations/vectorstores/) to deal with the extreme huge database.

In [17]:
# TODO4: Build and run the RAG system
# TODO4-1: Load the QA chain
# You should create a chain for passing a list of Documents to a model.
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# TODO4-2: Create retrieval chain
# You should create retrieval chain that retrieves documents and then passes them on.
chain = create_retrieval_chain(retriever, question_answer_chain)


In [18]:
# Question (queries) and answer pairs
# Please do not modify this cell.
queries = [
    "How much of a day do cats spend sleeping on average?",
    "What is the technical term for a cat's hairball?",
    "What do scientists believe caused cats to lose their sweet tooth?",
    "What is the top speed a cat can travel over short distances?",
    "What is the name of the organ in a cat's mouth that helps it smell?",
    "Which wildcat is considered the ancestor of all domestic cats?",
    "What is the group term for cats?",
    "How many different sounds can cats make?",
    "What is the name of the first cat in space?",
    "How many toes does a cat have on its back paws?"
]
answers = [
    "2/3",
    "Bezoar",
    "a mutation in a key taste receptor",
    ["31 mph", "49 km"],
    "Jacobson’s organ",
    "the African Wild Cat",
    "clowder",
    "100",
    ["Felicette", "Astrocat"],
    "four",
]

In [19]:
counts = 0
err = []
for i, query in enumerate(queries):
    # TODO4-3: Run the RAG system
    response = chain.invoke({"input": query})
    print(f"Query: {query}\nResponse: {response['answer']}\n")
    # The following lines perform evaluations.
    # if the answer shows up in your response, the response is considered correct.
    if type(answers[i]) == list:
        for answer in answers[i]:
            if answer.lower() in response['answer'].lower():
                counts += 1
                break
            else:
                err.append({"Ans": answers[i], "Response": response['answer']})
    else:
        if answers[i].lower() in response['answer'].lower():
            counts += 1
        else:
            err.append({"Ans": answers[i], "Response": response['answer']})

# TODO5: Improve to let the LLM correctly answer the ten questions.
print(f"Correct numbers: {counts}")
print(f"Wrong pairs: {err}" if err else "All Correct!")

Query: How much of a day do cats spend sleeping on average?
Response: 2/3

Query: What is the technical term for a cat's hairball?
Response: bezoar.

Query: What do scientists believe caused cats to lose their sweet tooth?
Response: A mutation in a key taste receptor.

Query: What is the top speed a cat can travel over short distances?
Response: 31 mph (49 km)

Query: What is the name of the organ in a cat's mouth that helps it smell?
Response: Jacobson’s organ

Query: Which wildcat is considered the ancestor of all domestic cats?
Response: African Wild Cat

Query: What is the group term for cats?
Response: clowder

Query: How many different sounds can cats make?
Response: 100

Query: What is the name of the first cat in space?
Response: Felicette (a.k.a. "Astrocat")

Query: How many toes does a cat have on its back paws?
Response: Four.

Correct numbers: 9
Wrong pairs: [{'Ans': 'the African Wild Cat', 'Response': 'African Wild Cat'}]


In [32]:
# Get this method from ChatGPT
model = SentenceTransformer('all-MiniLM-L6-v2')
counts = 0
err = []

for i, query in enumerate(queries):
    response = chain.invoke({"input": query})
    print(f"Query: {query}\nResponse: {response['answer']}")

    # Compute semantic similarity
    if type(answers[i]) == list:
        ans = ' '.join(answers[i])
    else:
        ans = answers[i]
    answer_embeddings = model.encode(ans, convert_to_tensor=True)
    response_embedding = model.encode(response['answer'], convert_to_tensor=True)

    similarity_scores = util.pytorch_cos_sim(response_embedding, answer_embeddings)
    if similarity_scores >= 0.8:
        counts += 1
    else:
        err.append({"Ans": answers[i], "Response": response['answer']})
    
    print(f'Score: {similarity_scores.item()}\n')

print(f"Correct numbers: {counts}")
print(f"Wrong pairs: {err}" if err else "All Correct!")


Query: How much of a day do cats spend sleeping on average?
Response: 2/3
Score: 0.9999999403953552

Query: What is the technical term for a cat's hairball?
Response: bezoar.
Score: 0.9782807230949402

Query: What do scientists believe caused cats to lose their sweet tooth?
Response: A mutation in a key taste receptor.
Score: 0.992026686668396

Query: What is the top speed a cat can travel over short distances?
Response: 31 mph (49 km)
Score: 0.9811943769454956

Query: What is the name of the organ in a cat's mouth that helps it smell?
Response: Jacobson’s organ
Score: 1.0

Query: Which wildcat is considered the ancestor of all domestic cats?
Response: African Wild Cat
Score: 0.9873355627059937

Query: What is the group term for cats?
Response: clowder
Score: 0.9999999403953552

Query: How many different sounds can cats make?
Response: 100
Score: 1.0

Query: What is the name of the first cat in space?
Response: Felicette (a.k.a. "Astrocat")
Score: 0.9213581085205078

Query: How many to