In [1]:
import re
import json
from typing import Optional

import spacy
import numpy as np
from sentence_transformers import SentenceTransformer, util
from llama_cpp import Llama, LogitsProcessorList
from lmformatenforcer import CharacterLevelParser, JsonSchemaParser
from lmformatenforcer.integrations.llamacpp import build_llamacpp_logits_processor, build_token_enforcer_tokenizer_data

from src.utils.lexrank import degree_centrality_scores
from src.utils.prompts import get_summarization_prompt, get_question_prompt

from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import Chroma


SENTENCE_TRANSFORMERS_HOME="./models/embeddings"
MODEL_PATH = "models/llama-2-13b-chat.Q2_K.gguf"
EMBEDDING_MODEL_PATH = "all-mpnet-base-v2"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_PATH,
    cache_folder=SENTENCE_TRANSFORMERS_HOME
)

In [3]:
raw_documents = TextLoader('books/MarcusAurelius.txt').load()

In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = Chroma.from_documents(documents, embedding_model)

Created a chunk of size 2180, which is longer than the specified 1000
Created a chunk of size 2625, which is longer than the specified 1000
Created a chunk of size 1202, which is longer than the specified 1000
Created a chunk of size 1805, which is longer than the specified 1000
Created a chunk of size 1993, which is longer than the specified 1000
Created a chunk of size 1143, which is longer than the specified 1000
Created a chunk of size 2900, which is longer than the specified 1000
Created a chunk of size 2598, which is longer than the specified 1000
Created a chunk of size 1819, which is longer than the specified 1000
Created a chunk of size 1224, which is longer than the specified 1000
Created a chunk of size 1063, which is longer than the specified 1000
Created a chunk of size 1215, which is longer than the specified 1000
Created a chunk of size 1037, which is longer than the specified 1000
Created a chunk of size 1104, which is longer than the specified 1000
Created a chunk of s

In [6]:
question = "When was Marcus Aurelius born?"

In [8]:
docs = db.similarity_search(question)
page_contents = [doc.page_content for doc in docs]
prompt = get_question_prompt(question, page_contents[0])

In [12]:
from langchain.docstore.document import Document

doc =  Document(page_content=page_contents[0], metadata={"source": "local"})

In [14]:

text_splitter2 = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
documents = text_splitter2.split_documents([doc])
db = Chroma.from_documents(documents, embedding_model)

In [16]:
docs = db.similarity_search(question)
page_contents = [doc.page_content for doc in docs]

In [18]:
page_contents

"MARCUS AURELIUS ANTONINUS was born on April 26, A.D. 121. His real name\nwas M. Annius Verus, and he was sprung of a noble family which claimed\ndescent from Numa, second King of Rome. Thus the most religious of\nemperors came of the blood of the most pious of early kings. His father,\nAnnius Verus, had held high office in Rome, and his grandfather, of\nthe same name, had been thrice Consul. Both his parents died young, but\nMarcus held them in loving remembrance. On his father's death Marcus\nwas adopted by his grandfather, the consular Annius Verus, and there was\ndeep love between these two. On the very first page of his book Marcus\ngratefully declares how of his grandfather he had learned to be gentle\nand meek, and to refrain from all anger and passion. The Emperor Hadrian\ndivined the fine character of the lad, whom he used to call not Verus\nbut Verissimus, more Truthful than his own name. He advanced Marcus to\nequestrian rank when six years of age, and at the age of eight ma

In [19]:
prompt = get_question_prompt(question, page_contents[0])

In [20]:
result = self.model(prompt, temperature=0.0, max_tokens=1024)

NameError: name 'self' is not defined