# Experiment vector csv

In [6]:
# Install langchain
!pip install langchain

# Install vectorStore
!pip install faiss-cpu

# Install gpt4all
!pip install gpt4all

# Install huggingfaceHub
!pip install huggingface-hub

# Install PyPdf for working with PDFs
!pip install pypdf


Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Downloading faiss_cpu-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


Quellen
- https://medium.com/@vikastiwari708409/how-to-use-gpt4all-llms-with-langchain-to-work-with-pdf-files-f0f0becadcb6

In [9]:
from langchain.document_loaders import CSVLoader
from langchain import PromptTemplate, LLMChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.callbacks.base import BaseCallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from pathlib import Path

documents = CSVLoader('./Resources/customers-100.csv').load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,chunk_overlap=64)
texts = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
faiss_index = FAISS.from_documents(texts, embeddings)
faiss_index.save_local("./Output/customers-100_faiss.db")


# # Set your query here manually
question = "Wie lautet die Mail Adresse von Greg Mata"
matched_docs = faiss_index.similarity_search(question, 4)
context = ""
for doc in matched_docs:
    context = context + doc.page_content + " \n\n "

template = """
Please use the following context to answer questions.
Context: {context}
 - -
Question: {question}
Answer: Let's think step by step."""

callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
# llm = GPT4All(
#     model=f"{Path.home()}/Models/mistral-7b-openorca.Q4_0.gguf",
#                                 n_ctx=1000, 
#                                 callback_manager=callback_manager, 
#                                 verbose=True,
#                                 repeat_last_n=0
#                                 )
llm = GPT4All(model=f"{Path.home()}/Models/mistral-7b-openorca.Q4_0.gguf", n_threads=8)
prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)
llm_chain = LLMChain(prompt=prompt, llm=llm)
print(llm_chain.run(question))

 First, we need to find the customer with the first name "Greg" and last name "Mata". In our context, this is Customer Id F8Aa9d6DfcBeeF8. Then, we look for their email address which is jaredjuarez@carroll.org. So, Greg Mata's mail adress is jaredjuarez@carroll.org.
