In [4]:
%pip install llama-index

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
from llama_index import VectorStoreIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage

from langchain.llms.llamacpp import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import chromadb
from llama_index import SimpleDirectoryReader, StorageContext, VectorStoreIndex, ServiceContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings

database_path = "/home/tpllmws23/Chatbot-LLama-Pruefungsamt/Chatbot-Jan/databases/sentence-transformers/all-mpnet-base-v2/context_1024_chunk_128_overlap_0.db"
database_collection = "Pruefungsamt"
# model_path = "/home/tpllmws23/llms/llama-2-13b-chat.Q4_K_M.gguf"
model_path = "/home/tpllmws23/llms/mistral-7b-instruct-v0.2.Q4_K_M.gguf"

n_ctx = 4096

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(model_path=model_path,
        n_gpu_layers=-1,
        n_batch=512,
        n_ctx=n_ctx,
        f16_kv=True,
        verbose=False,
        temperature=0.0,
        top_p=1,
        callback_manager=callback_manager,
        return_full_text=False
)

from langchain_community.embeddings import LlamaCppEmbeddings
embedding = LlamaCppEmbeddings(model_path=model_path,
        n_gpu_layers=-1,
        n_batch=512,
        n_ctx=n_ctx,
        f16_kv=True,
        verbose=False
        )

embed_model = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)

# check if storage already exists
if not os.path.exists(database_path):
    # load the documents and create the index
    documents = SimpleDirectoryReader("./main_data").load_data()
    index = VectorStoreIndex.from_documents(documents, service_context=service_context)
    # store it for later
    index.storage_context.persist()
else:
    # load the existing index
    db2 = chromadb.PersistentClient(path=database_path)
    chroma_collection = db2.get_or_create_collection("Pruefungsamt")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)

                return_full_text was transferred to model_kwargs.
                Please confirm that return_full_text is what you intended.
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from /home/tpllmws23/llms/mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32   

In [5]:
from llama_index.retrievers import BaseRetriever as LIBaseRetriever
from langchain_core.retrievers import BaseRetriever as LCBaseRetriever
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from typing import List
from langchain_community.retrievers.llama_index import LlamaIndexRetriever

from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.chains import LLMChain
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.globals import set_debug
from langchain.chains.combine_documents import create_stuff_documents_chain
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores.chroma import Chroma

set_debug(True)


class CustomRetriever(LCBaseRetriever):
    li_retriever: LIBaseRetriever | None = None

    def set_retriever(self, index: LIBaseRetriever):
        self.li_retriever = index

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        if self.li_retriever is None:
            raise Exception("No retriever is set")
        
        docs : List[Document] = []
        
        llama_index_nodes = self.li_retriever.retrieve(query)
        for node in llama_index_nodes:
            docs.append(Document(page_content = node.get_text(), metadata = node.metadata))

        return docs

custom_retriever = CustomRetriever()
custom_retriever.set_retriever(index.as_retriever())

template = """Answer the following question based only on the provided context. Also return the source in APA style:
{context}

Question: {input}"""

prompt = ChatPromptTemplate.from_template(template)


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

chain = (
    {"context": custom_retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [6]:
custom_retriever.get_relevant_documents("What are the requirements for studying MSI in masters degree at the HTWG?")

[Document(page_content='3)  Lehrveranstaltungen sind aus dem Wahlpflichtangeb ot aller drei Vertiefungsrichtungen und dem \nveröffentlichten Wahlpflichtkatalog für den Studiengan g MSI und anderer Masterprogramme der Hochschule \nKonstanz zu wählen.', metadata={'page_label': '4', 'file_name': 'SPO_MSI_SPONr5_Senat_10122019.pdf', 'file_path': '/home/tpllmws23/llms/main_data/SPO_MSI_SPONr5_Senat_10122019.pdf', 'file_type': 'application/pdf', 'file_size': 170767, 'creation_date': '2023-11-20', 'last_modified_date': '2023-11-20', 'last_accessed_date': '2024-03-09'}),
 Document(page_content='3)  Lehrveranstaltungen sind aus dem Wahlpflichtangeb ot aller drei Vertiefungsrichtungen und dem \nveröffentlichten Wahlpflichtkatalog für den Studiengan g MSI und anderer Masterprogramme der Hochschule \nKonstanz zu wählen.', metadata={'page_label': '2', 'file_name': 'SPO_MSI_SPONr5_Senat_10122019.pdf', 'file_path': '/home/tpllmws23/llms/main_data/SPO_MSI_SPONr5_Senat_10122019.pdf', 'file_type': 'appl

In [5]:
chain.invoke("What are the requirements for studying MSI in masters degree at the HTWG?")

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What are the requirements for studying MSI in masters degree at the HTWG?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,input>] Entering Chain run with input:
[0m{
  "input": "What are the requirements for studying MSI in masters degree at the HTWG?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,input> > 3:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "input": "What are the requirements for studying MSI in masters degree at the HTWG?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:chain:RunnableParallel<context,input> > 4:chain:RunnablePassthrough] Entering Chain run with input:
[0m{
  "input": "What are the requirements for studying MSI in masters degree at the HTWG?"
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:ch



Answer: To study MSI (Master Informatik) in a master's degree at the HTWG, you must have already chosen your preferred field of study during the application process. The allocation of places is based on the chosen field of study. For more information, please visit the official website of the HTWG Konstanz.

Source: Hochschule Konstanz | Brauneggerstr . 55 | 78462 Konstanz | www.htwg-konstanz.de[36;1m[1;3m[llm/end][0m [1m[1:chain:RunnableSequence > 7:llm:LlamaCpp] [5.39s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "\n\nAnswer: To study MSI (Master Informatik) in a master's degree at the HTWG, you must have already chosen your preferred field of study during the application process. The allocation of places is based on the chosen field of study. For more information, please visit the official website of the HTWG Konstanz.\n\nSource: Hochschule Konstanz | Brauneggerstr . 55 | 78462 Konstanz | www.htwg-konstanz.de",
        "generation_info": 

"\n\nAnswer: To study MSI (Master Informatik) in a master's degree at the HTWG, you must have already chosen your preferred field of study during the application process. The allocation of places is based on the chosen field of study. For more information, please visit the official website of the HTWG Konstanz.\n\nSource: Hochschule Konstanz | Brauneggerstr . 55 | 78462 Konstanz | www.htwg-konstanz.de"

: 