### Authored by Omar Hoang

# Installing required libraries

In [None]:
!pip install chromadb langchain transformers sentence_transformers langchain-community transformers bitsandbytes accelerate

Collecting chromadb
  Downloading chromadb-0.5.5-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain
  Downloading langchain-0.2.11-py3-none-any.whl.metadata (7.1 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.10-py3-none-any.whl.metadata (2.7 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.111.1-py3-none-any.whl.metadata (26 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.30.4-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.5.0-py2.py3-none-any.whl.metada

# Setting up the LLaMA mdoel

In [None]:
# -*- coding: utf-8 -*-
"""
Authored by Leon Garza

Original file is located at
    https://colab.research.google.com/drive/1VjzeOv58SFkjrvhHMghdf8BjL80af8gt


# RAG with LLaMa 7B

"""
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
import os

# Set the environment variable
os.environ['SECRET_TOKEN'] = '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'

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

l6_batch_size = 32

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': l6_batch_size}
)

  from tqdm.autonotebook import tqdm, trange
  warn_deprecated(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

## Mounting Google Drive to colab notebook

In [None]:
from google.colab import drive
# Run this cell to mount your Google Drive.
drive.mount('/content/drive')

Mounted at /content/drive


## Using text file and csv files for Chroma Database

In [None]:
"""## Building the Vector Index

We now need to use the embedding pipeline to build our embeddings and store them in a Chroma vector index. To begin we'll initialize our persistent chroma collection
Code for generating this persistent collection is in another file.
This consistent collection is going to be stored in your computer once you run it.

"""
import chromadb

# Path to save Chroma DB client and collection
database_path = '/content/drive/MyDrive/CS4371/Source_code'
collection_name = "GOT1"

# creates a client that interacts with the Chroma database
chroma_client = chromadb.PersistentClient(path = database_path)

# This method either retrieves an existing collection named "GOT1" or creates a new one if it doesn't exist
collection = chroma_client.get_or_create_collection(name=collection_name)

In [None]:
# ONLY RUN CELL IF RESETING COLLECTION!!!!
#chroma_client.delete_collection(name='GOT1')

### Game of Thrones

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
textsplitter_size = 250
# Read the book from a .txt file
book_path = '/content/drive/MyDrive/CS4371/Source_code/BOOKS/A GAME OF THRONES.txt'
with open(book_path, 'r', encoding='utf-8') as file:
    book_text = file.read()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=textsplitter_size, chunk_overlap=0)
texts = text_splitter.split_text(book_text)
print(len(texts))

8790


In [None]:
# method of the Sentence Transformer model takes a list of chunks and
# converts each chuck into a numerical vector that captures its semantic meaning.
embeddings = embed_model.embed_documents(texts)

# list of lists, careful running might crash
print(len(embeddings))

8790


In [None]:
# Ids for collection
id_lst = [str(i) for i, chunk in enumerate(texts)]
print(len(id_lst))

8790


In [None]:
# Add each chunk and its corresponding embedding to the collection
collection.add(ids=id_lst, embeddings=embeddings, documents=texts)



In [None]:
# Test to see if chunks are in collection
# TODO: Add notes about this cell
res = collection.query(query_embeddings=embeddings[0], n_results=5)
print(res)

{'ids': [['0', '6', '8', '11', '3']], 'distances': [[0.6249571442604065, 0.6430827975273132, 0.6481829285621643, 0.668796181678772, 0.7051805257797241]], 'metadatas': [[None, None, None, None, None]], 'embeddings': None, 'documents': [['A GAME OF THRONES\n\nCONTENTS\n\nCOVER\n\nTITLE PAGE', 'JON\n\nEDDARD\n\nCATELYN\n\nSANSA\n\nEDDARD\n\nTYRION', 'TYRION\n\nEDDARD\n\nCATELYN\n\nJON\n\nTYRION\n\nEDDARD', 'TYRION\n\nSANSA\n\nEDDARD\n\nCATELYN\n\nJON\n\nDAENERYS', 'BRAN\n\nTYRION\n\nJON\n\nDAENERYS\n\nEDDARD\n\nTYRION']], 'uris': None, 'data': None, 'included': ['metadatas', 'documents', 'distances']}


In [None]:
#Load llama2
from torch import cuda, bfloat16
import transformers
import bitsandbytes as bnb
import accelerate

model_id = 'meta-llama/Llama-2-7b-chat-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
# a configuration for loading the model with 4-bit quantization to reduce GPU memory usage
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, need auth token for these
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")



config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Model loaded on cuda:0


In [None]:
"""The pipeline requires a tokenizer which handles the translation of human readable plaintext to LLM readable token IDs. The Llama 2 7B models were trained using the Llama 2 7B tokenizer, which we initialize like so:"""

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

llama_temp = 0.4
llama_tokens = 50
llama_penalty = 1.7

"""Now we're ready to initialize the HF pipeline. There are a few additional parameters that we must define here. Comments explaining these have been included in the code."""

generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=llama_temp,  # 'randomness' of outputs, 0.0 is the min (more deterministic) and 1.0 the max (more random)
    max_new_tokens=llama_tokens,  # max number of tokens to generate in the output
    repetition_penalty=llama_penalty  # without this output begins repeating, helpins to reduce repetition in the generated text.
)



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
"""
We have to put our llama2 pipeline in langchain pipeline in order to be used in RetrievalQA
"""

from langchain_community.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

  warn_deprecated(


In [None]:
"""
Now we have to Initialize a RetrievalQA Chain and put our collection in a langchain vectorstore.
"""

from langchain_community.vectorstores import Chroma

vectorstore = Chroma(client=chroma_client,
    collection_name="GOT1",
    embedding_function=embed_model)

from langchain.chains import RetrievalQA

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=vectorstore.as_retriever(search_kwargs={"k": 10}),
    return_source_documents=False)

  warn_deprecated(


In [None]:
#question = "Who is the one the call 'King Slayer'?"
#result = rag_pipeline(question)

In [None]:
#print(result['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

now called the Kingslayer, and a second, Ser

king,

king … although, come to think on it, the king

the Kingsguard,

who is king here?”

Commander of the Kingsguard.

the kingsroad.”

the King, oh, what a great man that one is, him

Sack of King’s Landing,

who is it died, if not the king?”

Question: Who is the one the call 'King Slayer'?
Helpful Answer: You seem to be referring to George R.R. Martin's book series "A Song of Ice and Fire." In this series, the one who is called "Kingslayer" is Jaime Lannister.


# Questions

In [None]:
import pandas as pd

# Read the CSV file into a DataFrame
df_sec = pd.read_csv('/content/drive/MyDrive/CS4371/Source_code/talos_dataset.csv')

# Function to extract the context lines between the markers
def extract_between_markers(answer, start_marker, end_marker):
    # Check if both markers are in the answer
    if start_marker in answer and end_marker in answer:
        # Find the indices of the markers
        start_index = answer.find(start_marker) + len(start_marker)
        end_index = answer.find(end_marker)
        # Get the text between the markers
        question_text = answer[start_index:end_index].strip()

        return question_text
    return None

In [None]:
df_sec['question'] = df_sec['text'].apply(extract_between_markers,args=("[INST]", "[/INST]"))
print(df_sec['question'])

0       What is Netgear RAX30 JSON Parsing getblocksch...
1       What is NVIDIA D3D10 Driver Shader Functionali...
2       What is Google Chrome Video Encoder Metrics de...
3       What is llama.cpp GGUF library header.n_kv hea...
4       What is llama.cpp GGUF library header.n_tensor...
                              ...                        
1799    What is Microsoft Windows FastFAT NumberOfFATs...
1800    What is Pidgin libpurple Gadu Gadu HTTP Conten...
1801    What is Pidgin libpurple Mxit Emoticon Name Le...
1802    What is Pidgin libpurple SIP/SIMPLE Content-Le...
1803    What is Pidgin for Windows URL Handling Remote...
Name: question, Length: 1804, dtype: object


In [None]:
df_sec['groundtruth'] = df_sec['text'].apply(extract_between_markers,args=("[/INST]", "</s>"))
print(df_sec['groundtruth'])

0       A stack-based buffer overflow vulnerability ex...
1       An out-of-bounds read vulnerability exists in ...
2       A denial of service vulnerability exists in th...
3       A heap-based buffer overflow vulnerability exi...
4       A heap-based buffer overflow vulnerability exi...
                              ...                        
1799    An exploitable local privileged code execution...
1800    An exploitable remote code execution vulnerabi...
1801    An exploitable remote code execution vulnerabi...
1802    An exploitable remote code execution vulnerabi...
1803    An exploitable remote code execution vulnerabi...
Name: groundtruth, Length: 1804, dtype: object


In [None]:
import pandas as pd

# Create a list of questions
questions = [
	"Who is Ramsay Bolton?",
	"What House rules the Kingdom of the North?",
  "Who is the youngest Lannister sibling?",
	"What is the name of the Valyrian steel sword owned by Eddard Stark?",
	"Who pushes Bran Stark from the tower window?",
	"Who are the members of House Stark?",
	"What are the political implications of Eddard Stark's appointment as Hand of the King?",
	"Who is known as the Kingslayer?",
	"Which character is known as the Mother of Dragons?",
  "What is the name of the ancestral home of House Tully?",
  "Who is the father of Daenerys Targaryen?",
	"What is the name of the continent where most of the story takes place?",
  "What is the name of Jon Snow's direwolf?",
  "Who is known as the 'Queen of Thorns'?",
  "What is the name of the Wall made of ice?",
	"Who is the Captain of the ship that takes Tyrion Lannister to King's Landing after his capture by Catelyn Stark?",
	"Who is the Hound?",
	"What is the name of Jon Snow’s sword?",
	"How does Varys's role impact the power dynamics in King's Landing?",
	"Who is the most honorable in the story?",
	"What is the motto of House Stark?",
  "Who is the king that Daenerys Targaryen marries?",
	"Who is the main steward of the Night's Watch and a close friend to Jon Snow?",
  "What is the name of the wolf pup that Arya Stark adopts?",
  "Who is the youngest Stark son?",
	"How does the relationship between Theon Greyjoy and Ramsay Bolton affect Theon's character development?",
	"How does the character of Petyr Baelish use his intelligence and manipulation to climb the political ladder?",
	"Who is the first character to discover the existence of White Walkers in the prologue?",
	"What is the full name of Littlefinger?",
	"What are the names of Daenerys’s three dragons?",
	"What clan are referred to as horselords?",
	"What House rules The Riverlands?",
	"What is the significance of Winterfell?",
	"What are the consequences of Daenerys Targaryen's decisions in her quest to reclaim the Iron Throne?",
	"Who mentors Jon Snow at the Wall?",
	"Who is the youngest child of Eddard and Catelyn Stark?",
	"What are the implications of Tyrion Lannister's marriage to Sansa Stark?",
	"Who is appointed as Hand of the King after Jon Arryn's death?",
	"What is the role of the maesters?",
  "What is the name of the continent where the Dothraki live?",
	"How does the power struggle between the houses affect the stability of the Seven Kingdoms?",
  "Who is the ruler of the Vale of Arryn?",
	"Who is Khal Drogo?",
	"Who is the King of the Seven Kingdoms when the story begins?",
	"What are the significant political ramifications of Tyrion Lannister's trial by combat?",
  "What is the name of Tyrion Lannister's lover from Essos?",
  "Who trains Arya Stark in Braavos?",
	"What is the significance of the character development of Jaime Lannister throughout the series?",
	"Are dragons mentioned?",
	"What is the Iron Throne?",
	"Who is the mother of Joffrey, Myrcella, and Tommen?",
	"Who is the evilest character in the story?",
	"How do the events at the Red Wedding change the political alliances in Westeros?"
  # Add more questions as needed
]
print(len(questions))

# Create a DataFrame
df = pd.DataFrame(questions, columns=["question"])

53


In [None]:
# Function to apply rag_pipeline to a question
def get_answer(question):
    result = rag_pipeline(question)
    return result["result"]  # Adjust this if your pipeline's return format is different

In [None]:
# Apply the function to the DataFrame
df["answer"] = df["question"].apply(get_answer)
print(df)

                                             question  \
0                               Who is Ramsay Bolton?   
1          What House rules the Kingdom of the North?   
2              Who is the youngest Lannister sibling?   
3   What is the name of the Valyrian steel sword o...   
4        Who pushes Bran Stark from the tower window?   
5                 Who are the members of House Stark?   
6   What are the political implications of Eddard ...   
7                     Who is known as the Kingslayer?   
8   Which character is known as the Mother of Drag...   
9   What is the name of the ancestral home of Hous...   
10           Who is the father of Daenerys Targaryen?   
11  What is the name of the continent where most o...   
12           What is the name of Jon Snow's direwolf?   
13             Who is known as the 'Queen of Thorns'?   
14          What is the name of the Wall made of ice?   
15  Who is the Captain of the ship that takes Tyri...   
16                             

In [None]:
df_sec = df_sec.iloc[:53]
df_sec["answer"] = df_sec["question"].apply(get_answer)

  warn_deprecated(
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [None]:
# Save the DataFrame to a CSV file in Google Drive
save_path = '/content/drive/MyDrive/CS4371/Source_code/talos_run5.csv'
df_sec.to_csv(save_path, index=False)

In [None]:
# Save the DataFrame to a pickle file in Google Drive
save_path = '/content/drive/MyDrive/CS4371/Source_code/talos_run5.pkl'
df_sec.to_pickle(save_path)