In [3]:
!pip install -q langchain_mistralai

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain 0.1.16 requires langchain-core<0.2.0,>=0.1.42, but you have langchain-core 0.2.1 which is incompatible.
langchain-community 0.0.34 requires langchain-core<0.2.0,>=0.1.45, but you have langchain-core 0.2.1 which is incompatible.
langchain-text-splitters 0.0.1 requires langchain-core<0.2.0,>=0.1.28, but you have langchain-core 0.2.1 which is incompatible.
llama-index-llms-huggingface 0.1.4 requires huggingface-hub<0.21.0,>=0.20.3, but you have huggingface-hub 0.23.0 which is incompatible.


In [10]:

from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

In [12]:
# Load data
directory = "../Data/mini_data.csv"
loader = CSVLoader(file_path=directory)
docs = loader.load()

In [13]:
docs

[Document(page_content='context: The credit card with the id of 1 is a SuperiorCard with the number of 33332664695310 and the expiration date of 11/2006 and it was last modified on 2013-07-29\n: ', metadata={'source': '../Data/mini_data.csv', 'row': 0}),
 Document(page_content='context: The credit card with the id of 2 is a Distinguish with the number of 55552127249722 and the expiration date of 8/2005 and it was last modified on 2013-12-05\n: ', metadata={'source': '../Data/mini_data.csv', 'row': 1}),
 Document(page_content='context: The credit card with the id of 3 is a ColonialVoice with the number of 77778344838353 and the expiration date of 7/2005 and it was last modified on 2014-01-14\n: ', metadata={'source': '../Data/mini_data.csv', 'row': 2}),
 Document(page_content='context: The credit card with the id of 4 is a ColonialVoice with the number of 77774915718248 and the expiration date of 7/2006 and it was last modified on 2013-05-20\n: ', metadata={'source': '../Data/mini_data.

In [14]:
# Split text into chunks 
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)

In [25]:
# Define the embedding model
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

embed_model_Path = "..\models\sentence-transformers/all-mpnet-base-v2"
embed_model_Name = "sentence-transformers/all-MiniLM-L6-v2"

embeddings = HuggingFaceEmbeddings(model_name=embed_model_Name)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [26]:
# Create the vector store 
vector = FAISS.from_documents(documents, embeddings)

In [27]:
# Define a retriever interface
retriever = vector.as_retriever()

In [28]:
from langchain_community.llms import LlamaCpp

# Make sure the model path is correct for your system!
model = LlamaCpp(
    model_path="..\models\mistral-7b-instruct-v0.2.Q4_K_M.gguf",
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    verbose=True,  # Verbose is required to pass to the callback manager
)

llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from ..\models\mistral-7b-instruct-v0.2.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:        

llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1
llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2
llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0
llama_model_loader: - kv  19:            tokenizer.ggml.padding_token_id u32              = 0
llama_model_loader: - kv  20:               tokenizer.ggml.add_bos_token bool             = true
llama_model_loader: - kv  21:               tokenizer.ggml.add_eos_token bool             = false
llama_model_loader: - kv  22:                    tokenizer.chat_template str              = {{ bos_token }}{% for message in mess...
llama_model_loader: - kv  23: 

In [29]:
# Define prompt template
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

In [30]:
# Create a retrieval chain to answer questions
document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [32]:

response = retrieval_chain.invoke({"input": "What is the type of the card with the id 1?"})
print(response["answer"])

Llama.generate: prefix-match hit

llama_print_timings:        load time =    1409.88 ms
llama_print_timings:      sample time =       4.15 ms /    19 runs   (    0.22 ms per token,  4575.01 tokens per second)
llama_print_timings: prompt eval time =   39018.79 ms /   288 tokens (  135.48 ms per token,     7.38 tokens per second)
llama_print_timings:        eval time =    2957.66 ms /    18 runs   (  164.31 ms per token,     6.09 tokens per second)
llama_print_timings:       total time =   42099.42 ms /   306 tokens




Answer: The card with the id 1 is a SuperiorCard.
