### Setup

To download any models and use them from Ollama.

In [None]:
# cmd:
# >>ollama run deepseek-r1:1.5b
! ollama pull deepseek-r1:1.5b
! pip install -U langchain-ollama

# https://python.langchain.com/api_reference/ollama/chat_models/langchain_ollama.chat_models.ChatOllama.html#langchain_ollama.chat_models.ChatOllama

### Model Instance

In [6]:
! pip install -qU langchain_ollama 

In [3]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model = "deepseek-r1:1.5b",
    temperature = 0.8,
)

**Usage**

**1. Sync invoke**

In [None]:
messages = [
    ("system", "You are a helpful translator. Translate the user sentence to Hindi."),
    ("human", "I love programming"),
]
result=llm.invoke(messages)  # Prompt: Translate the sentence into Hindi. Human: I love programming.
print(result.content)

**2. Sync Stream**

In [None]:
messages = [
    ("system", "You are a helpful translator. Translate the user sentence to Hindi."),
    ("human", "HOw are you?"),
]

for chunk in llm.stream(messages):
    print(chunk.content, end='', flush=True) # Prompt: Translate the sentence into Hindi. Human: I love programming.

**3. Async**

In [None]:
import asyncio

# Define multiple messages
messages_list = [
    [("system", "Translate the user sentence to Hindi."), ("human", "How are you?")],
    [("system", "Translate the user sentence to French."), ("human", "What is your name?")],
    [("system", "Translate the user sentence to Spanish."), ("human", "Where do you live?")],
]

# Simple async function to process the messages
async def process_requests(messages_list):
    for messages in messages_list:
        response = await llm.ainvoke(messages)
        print(response.content)

# Run the async function
await process_requests(messages_list)


**4. Async stream**

In [None]:
# Define multiple messages
messages_list = [
    [("system", "Translate the user sentence to Hindi."), ("human", "How are you?")],
    [("system", "Translate the user sentence to French."), ("human", "What is your name?")],
    [("system", "Translate the user sentence to Spanish."), ("human", "Where do you live?")],
]

# Async function to stream messages
async def stream_requests(messages_list):
    for messages in messages_list:
        async for chunk in llm.astream(messages):
            print(chunk.content,end='',flush=True)

# Run the async streaming function
await stream_requests(messages_list)

**5.Async batch**

In [None]:
# Define the list of messages
messages_list = [
    [("system", "Translate the user sentence to Hindi."), ("human", "How are you?")],
    [("system", "Translate the user sentence to French."), ("human", "What is your name?")],
    [("system", "Translate the user sentence to Spanish."), ("human", "Where do you live?")],
]

# Send multiple messages asynchronously in a batch
response = await llm.abatch(messages_list)

# Loop through the responses and print each content
for res in response:
    print(res.content)


In [5]:
! ollama pull llama3.2:1b

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest 
pulling 74701a8c35f6... 100% ▕████████████████▏ 1.3 GB                         
pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB                         
pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB                         
pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB                         
pulling 4f659a1e86d7... 100% ▕████████████████▏  485 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [8]:
import ollama

print(ollama.list())

[Model(model='llama3.2:1b', modified_at=datetime.datetime(2025, 1, 27, 21, 19, 16, 493866, tzinfo=TzInfo(+05:30)), digest='baf6a787fdffd633537aa2eb51cfd54cb93ff08e28040095462bb63daf552878', size=1321098329, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='1.2B', quantization_level='Q8_0')),
 Model(model='nomic-embed-text:latest', modified_at=datetime.datetime(2025, 1, 26, 22, 53, 34, 471628, tzinfo=TzInfo(+05:30)), digest='0a109f422b47e3a30ba2b10eca18548e944e8a23073ee3f3e947efcf3c45e59f', size=274302450, details=ModelDetails(parent_model='', format='gguf', family='nomic-bert', families=['nomic-bert'], parameter_size='137M', quantization_level='F16')),
 Model(model='deepseek-r1:1.5b', modified_at=datetime.datetime(2025, 1, 25, 16, 40, 52, 746978, tzinfo=TzInfo(+05:30)), digest='a42b25d8c10a841bd24724309898ae851466696a7d7f3a0a408b895538ccbc96', size=1117322599, details=ModelDetails(parent_model='', format='gguf', family='qwen2', fam