In [2]:
import ollama
response = ollama.chat(
    model="deepseek-r1:1.5b",
    messages=[
        {"role": "user", "content": "Explain Newton's second law of motion"},
    ],
)
print(response["message"]["content"])

<think>
Okay, so I need to explain Newton's Second Law of Motion. Let me see... From what I remember in physics class, there are three laws named after him: first, third, and second. The second one talks about the relationship between force, mass, and acceleration.

Hmm, how does that work? So if a force is applied on an object, it causes the object to accelerate. That means more force equals more acceleration or something like that? But I need to explain all of this clearly.

Wait, force is equal to mass times acceleration. Oh yeah, F = ma. So when you apply a greater force, the acceleration increases, making the object speed up faster. Or if there's less force, it doesn't accelerate as much.

But why is that? Like, how does force cause acceleration? I think it's because of inertia. Inertia is the resistance to change in motion, right? So an object will keep moving at its current speed unless something stops it or makes it go faster. So if there's a force, you need more mass or apply 

In [3]:
import json 
import requests 
from typing import Any, Dict, List, Mapping, Optional, Union, Iterator
from langchain_core.callbacks.manager import CallbackManager
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
    BaseMessage, 
    AIMessage,
    HumanMessage,
    SystemMessage
)
from langchain_core.outputs import ChatGeneration, ChatResult

In [None]:
import json
import requests
from typing import Any, Dict, List, Mapping, Optional, Union, Iterator
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
    BaseMessage,
    AIMessage,
    HumanMessage,
    SystemMessage,
)
from langchain_core.outputs import ChatGeneration, ChatResult

class OllamaChat(BaseChatModel):
    """Chat model implementation for Ollama API."""
    
    model_name: str = "deepseek-r1"
    temperature: float = 0.0
    streaming: bool = False
    base_url: str = "http://localhost:11434"
    
    def _convert_to_ollama_messages(self, messages: List[BaseMessage]) -> List[Dict[str, str]]:
        """Convert LangChain messages to Ollama message format."""
        ollama_messages = []
        for message in messages:
            if isinstance(message, HumanMessage):
                role = "user"
            elif isinstance(message, AIMessage):
                role = "assistant"
            elif isinstance(message, SystemMessage):
                role = "system"
            else:
                role = "user"  # Default fallback
                
            ollama_messages.append({
                "role": role,
                "content": message.content
            })
        return ollama_messages
    
    def _generate(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> ChatResult:
        """Generate a chat response using the Ollama API."""
        # Convert LangChain messages to Ollama format
        ollama_messages = self._convert_to_ollama_messages(messages)
        
        # Prepare the API request payload
        payload = {
            "model": self.model_name,
            "messages": ollama_messages,
            "stream": False,  # We'll handle streaming separately
            "temperature": self.temperature,
        }
        
        # Add stop sequences if provided
        if stop:
            payload["stop"] = stop
            
        # Add any additional parameters
        for key, value in kwargs.items():
            payload[key] = value
            
        # Make the API request
        endpoint = f"{self.base_url}/api/chat"
        response = requests.post(endpoint, data=json.dumps(payload))
        response.raise_for_status()
        
        # Parse the response
        result = response.json()
        
        # Create ChatGeneration object
        generation = ChatGeneration(
            message=AIMessage(content=result["message"]["content"]),
            generation_info={"model": self.model_name, 
                           "finish_reason": result.get("finish_reason", "stop")}
        )
        
        # Return the ChatResult
        return ChatResult(generations=[generation])
    
    def _stream(
        self,
        messages: List[BaseMessage],
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[ChatGeneration]:
        """Stream the chat response from Ollama API."""
        if not self.streaming:
            yield next(iter(self._generate(messages, stop, run_manager, **kwargs).generations))
            return
            
        # Convert LangChain messages to Ollama format
        ollama_messages = self._convert_to_ollama_messages(messages)
        
        # Prepare the API request payload
        payload = {
            "model": self.model_name,
            "messages": ollama_messages,
            "stream": True,
            "temperature": self.temperature,
        }
        
        # Add stop sequences if provided
        if stop:
            payload["stop"] = stop
            
        # Add any additional parameters
        for key, value in kwargs.items():
            payload[key] = value
            
        # Make the API request
        endpoint = f"{self.base_url}/api/chat"
        response = requests.post(endpoint, data=json.dumps(payload), stream=True)
        response.raise_for_status()
        
        # Stream the response
        content = ""
        for line in response.iter_lines():
            if line:
                chunk = json.loads(line)
                content_chunk = chunk.get("message", {}).get("content", "")
                content += content_chunk
                
                # Create a generation with the accumulated content
                generation = ChatGeneration(
                    message=AIMessage(content=content),
                    generation_info={"model": self.model_name}
                )
                
                # Yield the generation
                if run_manager:
                    run_manager.on_llm_new_token(content_chunk)
                yield generation
                
                # Check if done
                if chunk.get("done", False):
                    break
    
    @property
    def _llm_type(self) -> str:
        """Return the type of LLM."""
        return "ollama-chat"