## docs

https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#langchain_ollama.chat_models.ChatOllama


## installs

In [None]:
%pip install -qU langchain-ollama


## imports

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document

vector_store = InMemoryVectorStore(embeddings)

## Indexing and Retrieval


Initializes an OllamaEmbeddings instance with the specified model.

The OllamaEmbeddings class is used to generate embeddings for text using the LLaMA language model.


In [None]:

from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="llama3.2:latest",
)

Initializes an in-memory vector store using the provided embeddings.
The `InMemoryVectorStore` is a simple vector store implementation that stores the embeddings in memory. 


In [5]:

document_1 = Document(id="1", page_content="Jose is an AI engineer", metadata={"Jose": "AI engineer"})
document_2 = Document(id="2", page_content="LangGraph is a library for building stateful, multi-actor applications with LLMs", metadata={"LangGraph": "LLMs"})
document_3 = Document(id="3", page_content="Python is a versatile programming language", metadata={"Python": "programming language"})
document_4 = Document(id="4", page_content="Machine learning is a subset of artificial intelligence", metadata={"Machine learning": "AI"})
document_5 = Document(id="5", page_content="Natural language processing deals with interactions between computers and human language", metadata={"NLP": "human language"})
document_6 = Document(id="6", page_content="LlamaIndex is a data framework for LLM-based applications", metadata={"LlamaIndex": "data framework"})
document_7 = Document(id="7", page_content="Caching is a scalability pattern that stores and reuses the results of expensive operations", metadata={"Scalability": "caching"})
document_8 = Document(id="8", page_content="Parallelism is a scalability pattern that executes multiple tasks simultaneously", metadata={"Scalability": "parallelism"})
document_9 = Document(id="9", page_content="Routing is a scalability pattern that directs requests to appropriate resources", metadata={"Scalability": "routing"})
document_10 = Document(id="10", page_content="Asynchrony is a scalability pattern that allows non-blocking operations", metadata={"Scalability": "asynchrony"})
document_11 = Document(id="11", page_content="Decoupling is a scalability pattern that separates system components to reduce dependencies", metadata={"Scalability": "decoupling"})
document_12 = Document(id="12", page_content="Linear regression is a supervised learning algorithm used for predicting continuous values", metadata={"Machine Learning": "Linear Regression"})
document_13 = Document(id="13", page_content="Logistic regression is used for binary classification problems in supervised learning", metadata={"Machine Learning": "Logistic Regression"})
document_14 = Document(id="14", page_content="Decision trees are a type of supervised learning algorithm used for both classification and regression tasks", metadata={"Machine Learning": "Decision Trees"})
document_15 = Document(id="15", page_content="Random forests are an ensemble learning method that constructs multiple decision trees", metadata={"Machine Learning": "Random Forests"})
document_16 = Document(id="16", page_content="Gradient boosting is a machine learning technique for regression and classification problems", metadata={"Machine Learning": "Gradient Boosting"})
document_17 = Document(id="17", page_content="Feature engineering is the process of creating new features from existing data to improve model performance", metadata={"Machine Learning": "Feature Engineering"})
document_18 = Document(id="18", page_content="Feature selection is the process of selecting a subset of relevant features for use in model construction", metadata={"Machine Learning": "Feature Selection"})

documents = [document_1, document_2, document_3, document_4, document_5]
documents.extend([document_6, document_7, document_8, document_9, document_10, document_11])
documents.extend([document_12, document_13, document_14, document_15, document_16, document_17, document_18])
vector_store.add_documents(documents=documents)
# Use the vectorstore as a retriever
retriever = vector_store.as_retriever()


Search:


In [6]:
query = "Feature engineering"

Performs a similarity search on the vector store and prints the top 3 most similar documents along with their similarity scores.


In [None]:
results = await vector_store.asimilarity_search_with_score(query=query, k=3,fetch_k=5)
for doc,score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

create a single text string by combining document contents from search results.



In [None]:
context = ', '.join([doc.page_content for doc, score in results])
context

In [None]:
len(context)

## Invokes a ChatOllama model with the provided context and query, and returns the model's response.


In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model = "llama3.1",
    temperature = 0.8
)

messages = [
        ("system", f"You answer questions strictly based on this context provided and nothing else. context: {context}. If the answer is not in the context, respond with 'I don't know.'"),
        ("human", query),
]

llm.invoke(messages).content