In [8]:
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_ollama import OllamaEmbeddings,ChatOllama
from langchain_chroma import Chroma
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.retrieval import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import faiss
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

In [9]:

#llm = ChatOllama(model="llama3.2:latest", temperature=0)
embeddings_model = OllamaEmbeddings(model="mxbai-embed-large")

llm = LlamaCpp(
    model_path="models/Llama-3.2-3B-Instruct-Q6_K_L.gguf",
    n_gpu_layers=-1,
    n_ctx=1024,
    temperature=0
)

#embeddings_model = LlamaCppEmbeddings(model_path="models/mxbai-embed-large-v1.Q8_0.gguf")


llama_model_loader: loaded meta data with 35 key-value pairs and 255 tensors from models/Llama-3.2-3B-Instruct-Q6_K_L.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.2 3B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.2
llama_model_loader: - kv   5:                         general.size_label str              = 3B
llama_model_loader: - kv   6:                            general.license str              = llama3.2
llama_model_loader: - kv   7:              

In [10]:
pdf_paths = [
    "test-data/Dijkstra-Algorithmus.pdf",
    "test-data/Prog1_K06.pdf"
]

pages = []

for path in pdf_paths:
    loader = PyPDFLoader(path)
    async for page in loader.alazy_load():
        pages.append(page)

loader = DirectoryLoader(path="test-data", glob="*.txt", loader_cls=TextLoader)
pages = pages + loader.load()

Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)
Ignoring wrong pointing object 44 0 (offset 0)
Ignoring wrong pointing object 75 0 (offset 0)
Ignoring wrong pointing object 90 0 (offset 0)
Ignoring wrong pointing object 115 0 (offset 0)


In [11]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(pages)

In [12]:
#vectorstore = Chroma.from_documents(documents=splits, persist_directory="./v-store", embeddings=embeddings_model)
#vectorstore = Chroma(persist_directory="./v-store", embedding_function=embeddings_model)
#vectorstore.add_documents(splits)

index = faiss.IndexFlatL2(len(embeddings_model.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings_model,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

vector_store.add_documents(documents=splits)

vectorstore = vector_store

retriever = vectorstore.as_retriever(k=4)

In [13]:
sim_question = "Datenkapselung"
docs = vectorstore.similarity_search_with_score(sim_question, k=6)
print(len(docs))
docs[0]

6


(Document(metadata={'source': 'test-data/Prog1_K06.pdf', 'page': 25}, page_content='Wintersemester 2022/23                  Prof. Dr. Carsten SinzHochschule Karlsruhe                  Vorlesung Programmieren 1\nZusammenfassung\n20\n•Datenkapselung: •Erlaubt Daten / Methoden vor dem Zugriﬀ von außen zu verbergen\n•Zugriﬀ nur über deﬁnierte Schnittstelle\u2028•Sichtbarkeit: •Sichtbarkeits-Modiﬁkatoren (public / private) und Pakete unterstützen die Datenkapselung'),
 np.float32(0.45864785))

In [14]:


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise. Answer only with the information and not with any kind of chat!"
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

rag_chain.invoke({"input": "Warum sollte ich Daten kapseln?"})


llama_perf_context_print:        load time =   22686.57 ms
llama_perf_context_print: prompt eval time =       0.00 ms /   683 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   255 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   59340.96 ms /   938 tokens


{'input': 'Warum sollte ich Daten kapseln?',
 'context': [Document(metadata={'source': 'test-data/Prog1_K06.pdf', 'page': 25}, page_content='Wintersemester 2022/23                  Prof. Dr. Carsten SinzHochschule Karlsruhe                  Vorlesung Programmieren 1\nZusammenfassung\n20\n•Datenkapselung: •Erlaubt Daten / Methoden vor dem Zugriﬀ von außen zu verbergen\n•Zugriﬀ nur über deﬁnierte Schnittstelle\u2028•Sichtbarkeit: •Sichtbarkeits-Modiﬁkatoren (public / private) und Pakete unterstützen die Datenkapselung'),
  Document(metadata={'source': 'test-data/Prog1_K06.pdf', 'page': 0}, page_content='Vorlesung Programmieren Kapitel 6: Konvertierung, Datenkapselung, SichtbarkeitProf. Dr. Carsten Sinz\n24.10.2022'),
  Document(metadata={'source': 'test-data/Prog1_K06.pdf', 'page': 6}, page_content='Wintersemester 2022/23                  Prof. Dr. Carsten SinzHochschule Karlsruhe                  Vorlesung Programmieren 1\nDatenkapselung\n7\n•Datenkapselung: •Verbergen von Daten/Informa