# 🚀 Ollama Python API Tutorial

**A comprehensive guide for the Slashdot Programming Club - IISER Kolkata**

Welcome to this hands-on tutorial where you'll learn to harness the power of Ollama's Python API for building intelligent applications!

## 🎯 What You'll Learn

- Setting up and connecting to Ollama
- Making basic API calls
- Implementing chat functionality
- Building a conversational AI assistant
- Advanced features like streaming responses
- Best practices and error handling

## 📋 Prerequisites

- Python 3.7+
- Ollama installed and running
- Basic understanding of Python
- A curious mind! 🧠

## 🛠️ Step 1: Environment Setup

First, let's install the required packages and verify our setup.

In [None]:
# Install required packages
!pip install requests aiohttp python-dotenv rich

# Optional: For advanced features
!pip install asyncio

In [None]:
# Import essential libraries
import requests
import json
import asyncio
import aiohttp
from typing import List, Dict, Optional, AsyncGenerator
from rich.console import Console
from rich.panel import Panel
from rich.text import Text
from rich.markdown import Markdown

# Initialize Rich console for beautiful output
console = Console()

## 🔗 Step 2: Basic Connection & Health Check

Let's start by verifying that Ollama is running and accessible.

In [None]:
# Ollama API Configuration
OLLAMA_BASE_URL = "http://localhost:11434"

def check_ollama_status():
    """Check if Ollama server is running and accessible."""
    try:
        response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
        if response.status_code == 200:
            console.print("✅ Ollama server is running!", style="bold green")
            return True
        else:
            console.print(f"❌ Ollama server returned status code: {response.status_code}", style="red")
            return False
    except requests.exceptions.RequestException as e:
        console.print(f"❌ Failed to connect to Ollama: {e}", style="red")
        return False

# Test the connection
check_ollama_status()

In [None]:
def get_available_models():
    """Fetch list of available models from Ollama."""
    try:
        response = requests.get(f"{OLLAMA_BASE_URL}/api/tags")
        if response.status_code == 200:
            models_data = response.json()
            models = [model['name'] for model in models_data.get('models', [])]
            
            console.print(Panel(
                f"📚 Available Models ({len(models)}): {', '.join(models) if models else 'None'}",
                title="🤖 Ollama Models",
                border_style="cyan"
            ))
            return models
        else:
            console.print(f"❌ Failed to fetch models: {response.status_code}", style="red")
            return []
    except Exception as e:
        console.print(f"❌ Error fetching models: {e}", style="red")
        return []

# Get available models
available_models = get_available_models()

## 💬 Step 3: Basic Generate API

The Generate API is the simplest way to get completions from Ollama models.

In [None]:
class OllamaGenerator:
    """Simple wrapper for Ollama Generate API."""
    
    def __init__(self, base_url: str = "http://localhost:11434"):
        self.base_url = base_url
        self.endpoint = f"{base_url}/api/generate"
    
    def generate(self, model: str, prompt: str, **kwargs) -> str:
        """Generate text using the specified model and prompt."""
        
        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False,  # Get complete response at once
            **kwargs
        }
        
        try:
            console.print(f"🤔 Thinking with {model}...", style="yellow")
            
            response = requests.post(
                self.endpoint,
                json=payload,
                headers={"Content-Type": "application/json"}
            )
            
            if response.status_code == 200:
                result = response.json()
                generated_text = result.get('response', '')
                
                # Display the result beautifully
                console.print(Panel(
                    generated_text,
                    title=f"🤖 {model} Response",
                    border_style="green"
                ))
                
                return generated_text
            else:
                console.print(f"❌ API Error: {response.status_code}", style="red")
                return ""
                
        except Exception as e:
            console.print(f"❌ Generation failed: {e}", style="red")
            return ""

# Example usage
if available_models:
    generator = OllamaGenerator()
    
    # Try with the first available model
    model_name = available_models[0]
    prompt = "Write a haiku about programming:"
    
    result = generator.generate(model_name, prompt)
else:
    console.print("⚠️ No models available. Download one with 'ollama pull llama2'", style="yellow")

## 💬 Step 4: Chat API - Conversational Interface

The Chat API is more powerful for conversations as it maintains context between messages.

In [None]:
class OllamaChat:
    """Advanced chat interface with conversation memory."""
    
    def __init__(self, model: str, base_url: str = "http://localhost:11434"):
        self.model = model
        self.base_url = base_url
        self.endpoint = f"{base_url}/api/chat"
        self.messages = []  # Conversation history
        self.system_prompt = None
    
    def set_system_prompt(self, prompt: str):
        """Set the system prompt that defines the AI's behavior."""
        self.system_prompt = prompt
        console.print(f"🎭 System prompt set: {prompt[:50]}...", style="blue")
    
    def chat(self, user_message: str, **kwargs) -> str:
        """Send a message and get a response while maintaining context."""
        
        # Prepare messages
        messages = []
        
        # Add system prompt if set
        if self.system_prompt:
            messages.append({
                "role": "system",
                "content": self.system_prompt
            })
        
        # Add conversation history
        messages.extend(self.messages)
        
        # Add current user message
        messages.append({
            "role": "user",
            "content": user_message
        })
        
        payload = {
            "model": self.model,
            "messages": messages,
            "stream": False,
            **kwargs
        }
        
        try:
            console.print(f"💭 You: {user_message}", style="cyan")
            console.print(f"🤔 {self.model} is thinking...", style="yellow")
            
            response = requests.post(
                self.endpoint,
                json=payload,
                headers={"Content-Type": "application/json"}
            )
            
            if response.status_code == 200:
                result = response.json()
                ai_message = result['message']['content']
                
                # Add to conversation history
                self.messages.append({"role": "user", "content": user_message})
                self.messages.append({"role": "assistant", "content": ai_message})
                
                # Display response
                console.print(Panel(
                    ai_message,
                    title=f"🤖 {self.model}",
                    border_style="green"
                ))
                
                return ai_message
            else:
                console.print(f"❌ Chat API Error: {response.status_code}", style="red")
                return ""
                
        except Exception as e:
            console.print(f"❌ Chat failed: {e}", style="red")
            return ""
    
    def clear_history(self):
        """Clear the conversation history."""
        self.messages = []
        console.print("🧹 Conversation history cleared!", style="blue")
    
    def get_conversation_length(self) -> int:
        """Get the number of message exchanges."""
        return len(self.messages) // 2

# Example: Create a helpful programming assistant
if available_models:
    chat = OllamaChat(model=available_models[0])
    
    # Set up the AI as a programming tutor
    chat.set_system_prompt(
        "You are a friendly and knowledgeable programming tutor for the Slashdot Programming Club at IISER Kolkata. "
        "Help students learn programming concepts, debug code, and explore new technologies. "
        "Always be encouraging and provide practical examples."
    )
    
    # Start a conversation
    response1 = chat.chat("Hi! Can you explain what an API is in simple terms?")
    
    # Follow up question (context is maintained)
    response2 = chat.chat("Can you give me a Python example of using an API?")
    
    console.print(f"\n📊 Conversation length: {chat.get_conversation_length()} exchanges", style="blue")
else:
    console.print("⚠️ No models available for chat demo.", style="yellow")

## 🌊 Step 5: Streaming Responses

For longer responses, streaming provides a better user experience by showing text as it's generated.

In [None]:
import time

class OllamaStreamer:
    """Handle streaming responses from Ollama."""
    
    def __init__(self, base_url: str = "http://localhost:11434"):
        self.base_url = base_url
    
    def stream_generate(self, model: str, prompt: str, **kwargs):
        """Stream text generation token by token."""
        
        payload = {
            "model": model,
            "prompt": prompt,
            "stream": True,
            **kwargs
        }
        
        try:
            console.print(f"🌊 Streaming response from {model}...", style="yellow")
            console.print("\n" + "="*50, style="blue")
            
            response = requests.post(
                f"{self.base_url}/api/generate",
                json=payload,
                headers={"Content-Type": "application/json"},
                stream=True
            )
            
            full_response = ""
            
            for line in response.iter_lines():
                if line:
                    try:
                        data = json.loads(line.decode('utf-8'))
                        if 'response' in data:
                            token = data['response']
                            full_response += token
                            print(token, end='', flush=True)
                            
                            # Add slight delay for visual effect
                            time.sleep(0.02)
                            
                        if data.get('done', False):
                            break
                    except json.JSONDecodeError:
                        continue
            
            console.print("\n" + "="*50, style="blue")
            console.print("✅ Streaming complete!", style="green")
            return full_response
            
        except Exception as e:
            console.print(f"❌ Streaming failed: {e}", style="red")
            return ""

# Example: Stream a longer response
if available_models:
    streamer = OllamaStreamer()
    
    prompt = "Write a detailed explanation of how machine learning works, including key concepts and real-world applications:"
    
    result = streamer.stream_generate(available_models[0], prompt)
else:
    console.print("⚠️ No models available for streaming demo.", style="yellow")

## ⚡ Step 6: Async Operations

For applications that need to handle multiple requests concurrently, async operations are essential.

In [None]:
class AsyncOllamaClient:
    """Asynchronous Ollama client for concurrent operations."""
    
    def __init__(self, base_url: str = "http://localhost:11434"):
        self.base_url = base_url
    
    async def async_generate(self, session: aiohttp.ClientSession, model: str, prompt: str, **kwargs) -> str:
        """Generate text asynchronously."""
        
        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False,
            **kwargs
        }
        
        try:
            async with session.post(
                f"{self.base_url}/api/generate",
                json=payload,
                headers={"Content-Type": "application/json"}
            ) as response:
                if response.status == 200:
                    result = await response.json()
                    return result.get('response', '')
                else:
                    return f"Error: {response.status}"
        except Exception as e:
            return f"Exception: {e}"
    
    async def batch_generate(self, model: str, prompts: List[str]) -> List[str]:
        """Generate responses for multiple prompts concurrently."""
        
        async with aiohttp.ClientSession() as session:
            tasks = [
                self.async_generate(session, model, prompt)
                for prompt in prompts
            ]
            
            console.print(f"🚀 Running {len(tasks)} tasks concurrently...", style="yellow")
            
            start_time = time.time()
            results = await asyncio.gather(*tasks)
            end_time = time.time()
            
            console.print(f"✅ Completed {len(tasks)} tasks in {end_time - start_time:.2f} seconds", style="green")
            
            return results

# Example: Concurrent text generation
async def demo_async_operations():
    if not available_models:
        console.print("⚠️ No models available for async demo.", style="yellow")
        return
    
    client = AsyncOllamaClient()
    
    # Multiple prompts to process concurrently
    prompts = [
        "Explain recursion in one sentence:",
        "What is the difference between a list and a tuple in Python?",
        "Write a simple Python function to check if a number is prime:",
        "What are the benefits of using Git for version control?"
    ]
    
    results = await client.batch_generate(available_models[0], prompts)
    
    # Display results
    for i, (prompt, result) in enumerate(zip(prompts, results), 1):
        console.print(Panel(
            f"**Question:** {prompt}\n\n**Answer:** {result}",
            title=f"🤖 Result {i}",
            border_style="cyan"
        ))

# Run the async demo
await demo_async_operations()

## 🛡️ Step 7: Error Handling & Best Practices

Production-ready code needs robust error handling and configuration management.

In [None]:
class RobustOllamaClient:
    """Production-ready Ollama client with comprehensive error handling."""
    
    def __init__(self, base_url: str = "http://localhost:11434", 
                 timeout: int = 30, max_retries: int = 3):
        self.base_url = base_url.rstrip('/')
        self.timeout = timeout
        self.max_retries = max_retries
        self.session = requests.Session()
        
        # Set default headers
        self.session.headers.update({
            'Content-Type': 'application/json',
            'User-Agent': 'Slashdot-Ollama-Client/1.0'
        })
    
    def _make_request(self, endpoint: str, payload: dict) -> dict:
        """Make a request with retry logic and error handling."""
        
        url = f"{self.base_url}{endpoint}"
        last_error = None
        
        for attempt in range(self.max_retries):
            try:
                console.print(f"🔄 Attempt {attempt + 1}/{self.max_retries}", style="blue")
                
                response = self.session.post(
                    url,
                    json=payload,
                    timeout=self.timeout
                )
                
                if response.status_code == 200:
                    return response.json()
                elif response.status_code == 404:
                    raise ValueError(f"Model '{payload.get('model')}' not found")
                elif response.status_code == 500:
                    raise RuntimeError("Internal server error - check Ollama logs")
                else:
                    raise RuntimeError(f"HTTP {response.status_code}: {response.text}")
                    
            except requests.exceptions.Timeout:
                last_error = f"Request timed out after {self.timeout} seconds"
                console.print(f"⏰ {last_error}", style="yellow")
                
            except requests.exceptions.ConnectionError:
                last_error = "Cannot connect to Ollama server"
                console.print(f"🔌 {last_error}", style="yellow")
                
            except Exception as e:
                last_error = str(e)
                console.print(f"❌ {last_error}", style="red")
                
            if attempt < self.max_retries - 1:
                wait_time = 2 ** attempt  # Exponential backoff
                console.print(f"⏳ Waiting {wait_time}s before retry...", style="blue")
                time.sleep(wait_time)
        
        raise RuntimeError(f"All {self.max_retries} attempts failed. Last error: {last_error}")
    
    def safe_generate(self, model: str, prompt: str, **kwargs) -> Optional[str]:
        """Generate text with comprehensive error handling."""
        
        # Validate inputs
        if not model or not prompt:
            console.print("❌ Model and prompt are required", style="red")
            return None
        
        # Prepare payload with defaults
        payload = {
            "model": model,
            "prompt": prompt,
            "stream": False,
            "options": {
                "temperature": kwargs.get('temperature', 0.7),
                "top_p": kwargs.get('top_p', 0.9),
                "max_tokens": kwargs.get('max_tokens', 1000)
            }
        }
        
        try:
            console.print(f"🎯 Generating with {model}...", style="cyan")
            
            result = self._make_request("/api/generate", payload)
            
            response_text = result.get('response', '')
            
            # Log generation stats
            stats = {
                'eval_count': result.get('eval_count', 0),
                'eval_duration': result.get('eval_duration', 0),
                'prompt_eval_count': result.get('prompt_eval_count', 0)
            }
            
            console.print(Panel(
                f"Generated {stats['eval_count']} tokens in {stats['eval_duration'] / 1e9:.2f}s",
                title="📊 Generation Stats",
                border_style="green"
            ))
            
            return response_text
            
        except Exception as e:
            console.print(f"💥 Generation failed: {e}", style="red")
            return None
    
    def __del__(self):
        """Clean up session on deletion."""
        if hasattr(self, 'session'):
            self.session.close()

# Example: Robust client usage
if available_models:
    robust_client = RobustOllamaClient(
        timeout=15,
        max_retries=2
    )
    
    # Test with various scenarios
    test_prompts = [
        "Explain quantum computing in simple terms:",
        "Write a Python function to reverse a string:",
        "What are the key principles of good software design?"
    ]
    
    for prompt in test_prompts:
        result = robust_client.safe_generate(
            model=available_models[0],
            prompt=prompt,
            temperature=0.7
        )
        
        if result:
            console.print(Panel(
                result[:200] + "..." if len(result) > 200 else result,
                title=f"✅ Response Preview",
                border_style="green"
            ))
        else:
            console.print("❌ Failed to generate response", style="red")
        
        console.print("─" * 50, style="dim")
else:
    console.print("⚠️ No models available for robust client demo.", style="yellow")

## 🎯 Step 8: Putting It All Together - Interactive Demo

Let's create a final interactive demonstration that showcases all the features we've learned.

In [None]:
# Final demonstration
if available_models:
    console.print("\n🎮 Quick Demo - Ask the AI a question!", style="bold cyan")
    
    demo_chat = OllamaChat(model=available_models[0])
    demo_chat.set_system_prompt(
        "You are a helpful AI assistant for the Slashdot Programming Club at IISER Kolkata. "
        "Be friendly, informative, and encourage learning about programming and technology."
    )
    
    demo_response = demo_chat.chat("What's the most exciting thing about AI programming?")
    
    console.print("\n✨ This concludes our comprehensive Ollama Python tutorial!", style="bold green")
    console.print("You now have all the tools to build amazing AI applications!", style="green")
else:
    console.print("\n⚠️ No models available for demo.", style="yellow")
    console.print("Download a model with 'ollama pull llama2' to try the examples!", style="cyan")