In [1]:
# | default_exp llms.client

In [2]:
# | export
import os
from typing import Dict, Any, List, Optional, Iterator
from openai import OpenAI
from rich.console import Console
from rich.markdown import Markdown

In [3]:
# | export
from agentic.configs.loader import get_model_config, get_settings_config
from agentic.llms.response_processor import ResponseProcessor
from agentic.llms.streaming_handler import StreamingHandler

In [4]:
# | export
class LLMClient:
    """Enhanced LLM client"""
    
    def __init__(self, model: Optional[str] = None, base_url: Optional[str] = None, 
                 api_key: Optional[str] = None):
        # Load config defaults
        model_config = get_model_config()
        settings_config = get_settings_config()
        
        # Set parameters with fallbacks
        self.model = model or model_config.get('name', 'qwen3:8b')
        self.base_url = base_url or model_config.get('url', 'http://localhost:11434/v1')
        self.api_key = api_key or model_config.get('api_key', 'ollama')

        
        # Initialize OpenAI client
        self.client = OpenAI(base_url=self.base_url, 
                             api_key=self.api_key)
        
        # Initialize processors
        self.response_processor = ResponseProcessor()
        self.streaming_handler = StreamingHandler()
        
        # Validate connection
        self._validate_connection()
    
    def _validate_connection(self):
        """Validate LLM connection"""
        try:
            # Test with a simple request
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": "test"}],
                max_tokens=1,
                timeout=5
            )
            return True
        except Exception as e:
            print(f"⚠️ LLM connection warning: {e}")
            return False
    
    def create_completion(self, messages: List[Dict[str, Any]], 
                         tools: Optional[List[Dict]] = None,
                         stream: bool = True, **kwargs) -> Any:
        """Create chat completion with optional tools"""
        completion_params = {
            "model": self.model,
            "messages": messages,
            "stream": stream,
            **kwargs
        }
        
        if tools:
            completion_params["tools"] = tools
            completion_params["tool_choice"] = "auto"
        
        try:
            
            import mlflow
            
            # Enable auto-tracing for OpenAI
            mlflow.openai.autolog()
            
            # Optional: Set a tracking URI and an experiment
            mlflow.set_tracking_uri("http://localhost:5000")
            mlflow.set_experiment("tool_test")

            return self.client.chat.completions.create(**completion_params)
        except Exception as e:
            raise RuntimeError(f"LLM completion failed: {e}")
    
    def process_response(self, response: Any, console: Optional[Console] = None) -> Dict[str, Any]:
        """Process non-streaming response"""
        return self.response_processor.process_response(response, console)
    
    def handle_streaming_response(self, response: Iterator, console: Optional[Console] = None) -> Dict[str, Any]:
        """Handle streaming response"""
        return self.streaming_handler.handle_streaming_response(response, console)
    
    def get_model_info(self) -> Dict[str, Any]:
        """Get information about the current model"""
        return {
            "model": self.model,
            "base_url": self.base_url,
            "api_key_set": bool(self.api_key),
            "connection_valid": self._validate_connection()
        }


In [5]:
# | hide
# Example usage 

system_prompt = {
    "role": "system",
    "content": (
        "You are a helpful assistant. "
        "Return all your responses using valid **Markdown syntax**, including:\n"
        "- Headers (`#`, `##`)\n"
        "- Bullet points\n"
        "- Code blocks (triple backticks)\n"
        "- Bold / italic text\n"
        "- Quotes and tables if needed\n\n"
        "Also, try to use formatting that works well with color rendering in terminals using `rich`."
    )
}


from rich.console import Console
client = LLMClient()
response = client.create_completion(
    messages=[
    system_prompt,
    {"role": "user", "content": "Explain how a neural network works."}
    ],
    tools=[],
    stream=True
)
result = client.handle_streaming_response(response,Console() )
# result = client.process_response(response,Console())


[38;2;200;100;120m╭─────────────────────── 🤔 Thinking ───────────────────────╮[0m
[38;2;200;100;120m│ [38;2;200;100;120mWe[0m[38;2;200;100;120m need[0m[38;2;200;100;120m to[0m[38;2;200;100;120m explain[0m[38;2;200;100;120m how[0m[38;2;200;100;120m a[0m[38;2;200;100;120m neural[0m[38;2;200;100;120m network[0m[38;2;200;100;120m works[0m[38;2;200;100;120m,[0m[38;2;200;100;120m using[0m[38;2;200;100;120m Markdown[0m[38;2;200;100;120m.[0m[38;2;200;100;120m Should[0m[38;2;200;100;120m be[0m[38;2;200;100;120m comprehensive[0m[38;2;200;100;120m,[0m[38;2;200;100;120m but[0m[38;2;200;100;120m no[0m[38;2;200;100;120m fluff[0m[38;2;200;100;120m.[0m[38;2;200;100;120m Provide[0m[38;2;200;100;120m headers[0m[38;2;200;100;120m,[0m[38;2;200;100;120m bullet[0m[38;2;200;100;120m points[0m[38;2;200;100;120m,[0m[38;2;200;100;120m code[0m[38;2;200;100;120m blocks[0m[38;2;200;100;120m,[0m[38;2;200;100;120m tables[0m[38;2;200;100;120m,[0m[38

In [7]:
result

{'content': 'The user asks: "Explain how a neural network works." They want an explanation. We need to output in Markdown format; presumably use some formatting. Should explain basic concept, layers, neurons, weights, activation, learning, backpropagation, gradient descent. Use bullet points and maybe tables. Use rich-friendly formatting: no special colors but use bold etc. Provide some code examples maybe. Keep it accessible. Let\'s produce a thorough but concise explanation. Use headings. Use lists. Use code blocks for sample network. Use tables for layers.\n\nLet\'s craft final answer.# How a Neural Network Works\n\nA neural network is a computational model inspired by the way biological brains process information.  \nIt learns to map inputs to outputs by adjusting numerical parameters (weights) so that its predictions match observed data.\n\nBelow is a step‑by‑step outline, from architecture to learning, with examples and key terminology.\n\n---\n\n## 1. The Basic Building Blocks\n