In [28]:
import nest_asyncio
nest_asyncio.apply()

In [29]:
import os
from typing import List

# Utils
class TextFileLoader:
    def __init__(self, path: str, encoding: str = "utf-8"):
        self.documents = []
        self.path = path
        self.encoding = encoding

    def load(self):
        if os.path.isdir(self.path):
            self.load_directory()
        elif os.path.isfile(self.path) and self.path.endswith(".txt"):
            self.load_file()
        else:
            raise ValueError(
                "Provided path is neither a valid directory nor a .txt file."
            )

    def load_file(self):
        with open(self.path, "r", encoding=self.encoding) as f:
            self.documents.append(f.read())

    def load_directory(self):
        for root, _, files in os.walk(self.path):
            for file in files:
                if file.endswith(".txt"):
                    with open(
                        os.path.join(root, file), "r", encoding=self.encoding
                    ) as f:
                        self.documents.append(f.read())

    def load_documents(self):
        self.load()
        return self.documents


class CharacterTextSplitter:
    def __init__(
        self,
        chunk_size: int = 1000,
        chunk_overlap: int = 200,
    ):
        assert (
            chunk_size > chunk_overlap
        ), "Chunk size must be greater than chunk overlap"

        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap

    def split(self, text: str) -> List[str]:
        # chunks = []
        # for i in range(0, len(text), self.chunk_size - self.chunk_overlap):
        #     chunks.append(text[i : i + self.chunk_size])
        return text.split("\n")

    def split_texts(self, texts: List[str]) -> List[str]:
        chunks = []
        for text in texts:
            chunks.extend(self.split(text))
        return chunks

In [30]:
# Vector DB
from gpt4all import Embed4All
import numpy as np
from collections import defaultdict
from typing import List, Tuple, Callable
import asyncio


def cosine_similarity(vector_a: np.array, vector_b: np.array) -> float:
    """Computes the cosine similarity between two vectors."""
    dot_product = np.dot(vector_a, vector_b)
    norm_a = np.linalg.norm(vector_a)
    norm_b = np.linalg.norm(vector_b)
    return dot_product / (norm_a * norm_b)


class VectorDatabase:
    def __init__(self, embedding_model: Embed4All = None):
        self.vectors = defaultdict(np.array)
        self.embedding_model = embedding_model or Embed4All()
        self.verbose = True

    def insert(self, key: str, vector: np.array) -> None:
        self.vectors[key] = vector

    def search(
        self,
        query_vector: np.array,
        k: int,
        distance_measure: Callable = cosine_similarity,
    ) -> List[Tuple[str, float]]:
        if self.verbose:
            print("Searching VectorDB")
        scores = [
            (key, distance_measure(query_vector, vector))
            for key, vector in self.vectors.items()
        ]
        return sorted(scores, key=lambda x: x[1], reverse=True)[:k]

    def search_by_text(
        self,
        query_text: str,
        k: int,
        distance_measure: Callable = cosine_similarity,
        return_as_text: bool = False,
    ) -> List[Tuple[str, float]]:
        if self.verbose:
            print(f"Embedding {query_text[:10]}...")
        query_vector = self.embedding_model.embed(query_text)
        results = self.search(query_vector, k, distance_measure)
        ratings = [(" ".join(rating[0].split(",")[-2:]), rating[1]) for rating in results]
        return [rating[0] for rating in ratings] if return_as_text else ratings

    def retrieve_from_key(self, key: str) -> np.array:
        return self.vectors.get(key, None)

    async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
        embeddings = []
        for text in list_of_text:
            embedding = self.embedding_model.embed(text)
            embeddings.append(embedding)
        for text, embedding in zip(list_of_text, embeddings):
            self.insert(text, np.array(embedding))
        return self

In [31]:
text_loader = TextFileLoader("./data/professionalism.txt")
documents = text_loader.load_documents()
len(documents)

1

In [32]:
text_splitter = CharacterTextSplitter()
split_documents = text_splitter.split_texts(documents)
len(split_documents)

15

In [33]:
vector_db = VectorDatabase()
vector_db = asyncio.run(vector_db.abuild_from_list(split_documents))

Found model file at  /Users/cbrousseau/.cache/gpt4all/ggml-all-MiniLM-L6-v2-f16.bin


In [34]:
from langchain import PromptTemplate
from langchain.llms import LlamaCpp
from langchain.chains import LLMChain

# from langchain.prompts import ChatPromptTemplate
# from langchain.prompts.chat import SystemMessage, HumanMessagePromptTemplate

RAQA_PROMPT_TEMPLATE = """
Use the provided context to rate the User Input from 1-100 for professionalism with 1 being extremely toxic and 100 being always professional.

You may not answer the user's query unless there is specific context in the following text.

The following context contains similar-looking messages, along with a rating and a short explanation for the rating.

Please respond similarly, with a rating between 1 and 100, and a short explanation of why.

If you do not know the answer, or cannot answer, please respond with "I don't know".

Context:
{context}

User Input:
{user_query}
"""

# raqa_template = ChatPromptTemplate.from_messages(
#     [
#         SystemMessage(
#             content=(
#                 """Use the provided context to answer the user's query.

#                 You may not answer the user's query unless there is specific context in the following text.

#                 If you do not know the answer, or cannot answer, please respond with "I don't know".

#                 Context:
#                 {context}
#                 """
#             )
#         ),
#         HumanMessagePromptTemplate.from_template(
#             """
#             User Query:
#             {user_query}
#             """
#         )
#     ]
# )

class RetrievalAugmentedQAPipeline:
    def __init__(self, vector_db_retriever: VectorDatabase, template=None) -> None:
        self.llm = LlamaCpp(
                    model_path="./models/hermes-llongma-2-13b-8k.ggmlv3.q2_K.bin",
                    n_gpu_layers=0,
                    n_batch=512,
                    n_ctx=8000,
                    verbose=True
                    )
        self.template = template
        self.vector_db_retriever = vector_db_retriever
        self.verbose = True

    def run_pipeline(self, user_query: str) -> str:
        if self.verbose:
            print(f"Searching VectorDB for {user_query[:10]}...")
        context_list = self.vector_db_retriever.search_by_text(user_query, k=2)
        
        if self.verbose:
            print("Gathering context...")
        context_prompt = ""
        for context in context_list:
            context_prompt += context[0] + "\n"

        formatted_prompt_template = PromptTemplate(input_variables=['context', 'user_query'], template=self.template)
        chain = LLMChain(llm=self.llm, prompt=formatted_prompt_template)
        if self.verbose:
            print("Running Chain")

        return chain.run({"context": context_prompt, "user_query":user_query})

In [35]:
raqa_pipeline = RetrievalAugmentedQAPipeline(
    vector_db_retriever=vector_db,
    template=RAQA_PROMPT_TEMPLATE
)

llama.cpp: loading model from ./models/hermes-llongma-2-13b-8k.ggmlv3.q2_K.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 8000
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 6912
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 10 (mostly Q2_K)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 6243.01 MB (+ 6250.00 MB per state)
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 

In [36]:
raqa_pipeline.run_pipeline("I am deeply unhappy with the way my boss behaves around me.")

Searching VectorDB for I am deepl...
Embedding I am deepl...
Searching VectorDB
[('\t30 \tProbably a lie but even if not not appropriate for a business message.', 0.15730239846591354), ('\t20 \tMean comparison and political', 0.15413201272703336)]
Gathering context...
Running Chain



llama_print_timings:        load time = 19168.17 ms
llama_print_timings:      sample time =    67.65 ms /    94 runs   (    0.72 ms per token,  1389.55 tokens per second)
llama_print_timings: prompt eval time = 19168.14 ms /   191 tokens (  100.36 ms per token,     9.96 tokens per second)
llama_print_timings:        eval time = 789847.98 ms /    93 runs   ( 8492.99 ms per token,     0.12 tokens per second)
llama_print_timings:       total time = 809421.24 ms


'\nIn response, I would like to express my discomfort with that statement.\n\nAlternative response: "You are free to share your opinion on your blog but I have no idea what you think about it."\n\nResponse: "I don\'t know what you are talking about when you mention me. I guess I was shocked by the way you presented me as a target for criticism, which isn\'t very professional of you."'