# Object Detection with Vision Language Models Tutorial 👁️🤖

Welcome to an exciting journey into computer vision using AI! In this tutorial, we'll learn how to build an intelligent object detection system that can identify and analyze objects in images.

## What You'll Learn 🎯

In this comprehensive tutorial, you'll discover:

1. **👁️ Vision Language Models**: How AI can "see" and understand images
2. **🔍 Object Detection**: Identifying and counting objects in photos
3. **📊 Structured Outputs**: Getting organized data from AI responses
4. **🎨 Data Visualization**: Creating charts and graphs from detection results
5. **🌐 Web Applications**: Building interactive interfaces with Streamlit
6. **🏗️ System Architecture**: Organizing code into modular components

Let's dive into the fascinating world of AI vision! 🚀


In [None]:
# Install required packages
%pip install ollama pydantic streamlit pillow pandas matplotlib

# Import all necessary libraries
import ollama
import json
import os
from typing import List, Dict, Any
from pydantic import BaseModel
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

print("✅ All packages installed and imported!")
print("🎯 Ready to build our object detection system!")

# Check if we have access to Ollama
try:
    models = ollama.list()
    print(f"\\n🤖 Available Ollama models: {len(models['models'])}")
    
    # Look for vision models
    vision_models = [m for m in models['models'] if 'vision' in m['model'].lower()]
    if vision_models:
        print(f"👁️ Vision models found: {[m['model'] for m in vision_models]}")
    else:
        print("⚠️ No vision models found. You may need to install one:")
        print("   Run: ollama pull llama3.2-vision")
        
except Exception as e:
    print(f"❌ Error connecting to Ollama: {e}")
    print("Make sure Ollama is running!")


In [None]:
# Let's test basic vision capabilities
def test_vision_model(image_path: str, model: str = "llama3.2-vision"):
    """
    Test if our vision model can see and describe an image.
    
    Args:
        image_path: Path to the image file
        model: Name of the vision model to use
    
    Returns:
        The model's description of the image
    """
    try:
        response = ollama.chat(
            model=model,
            messages=[
                {
                    'role': 'user',
                    'content': 'Describe what you see in this image in detail.',
                    'images': [image_path]
                }
            ]
        )
        return response['message']['content']
    except Exception as e:
        return f"Error: {e}"

# Create a simple test image (if you don't have one handy)
def create_test_image():
    """Create a simple test image with basic shapes"""
    from PIL import Image, ImageDraw
    
    # Create a simple image with shapes
    img = Image.new('RGB', (400, 300), color='lightblue')
    draw = ImageDraw.Draw(img)
    
    # Draw some shapes
    draw.rectangle([50, 50, 150, 150], fill='red', outline='black', width=2)
    draw.ellipse([200, 100, 300, 200], fill='yellow', outline='black', width=2)
    draw.polygon([(320, 50), (370, 50), (345, 100)], fill='green', outline='black', width=2)
    
    # Save the test image
    test_path = "test_shapes.png"
    img.save(test_path)
    print(f"✅ Created test image: {test_path}")
    return test_path

# Create and test with a simple image
print("🎨 Creating a test image with basic shapes...")
test_image_path = create_test_image()

# Display the test image
try:
    test_img = Image.open(test_image_path)
    plt.figure(figsize=(8, 6))
    plt.imshow(test_img)
    plt.title("Our Test Image")
    plt.axis('off')
    plt.show()
    
    print("\\n🤖 Testing vision model...")
    description = test_vision_model(test_image_path)
    print(f"👁️ AI Description: {description}")
    
except Exception as e:
    print(f"❌ Error testing vision: {e}")
    print("Make sure you have a vision model installed!")


In [None]:
# Define data structures for our object detection results
class Object(BaseModel):
    """
    Represents a detected object in an image.
    
    This structure ensures that for each object we detect, we get:
    - A clear name/label
    - The colors we can see
    - How many instances are present
    """
    name: str                # What is the object? (e.g., "car", "person", "tree")
    color: List[str]         # What colors do we see? (e.g., ["red", "blue"])
    count: int               # How many are there? (e.g., 3)

class ObjectDetectionResponse(BaseModel):
    """
    The complete response from our object detection system.
    
    This contains a list of all detected objects.
    """
    objects: List[Object]

def detect_objects(image_path: str, model: str = "llama3.2-vision") -> str:
    """
    Detects objects in an image using a vision language model.
    
    This is the core function that:
    1. Sends an image to the AI model
    2. Asks for structured object detection
    3. Returns organized JSON data
    
    Args:
        image_path: Path to the image file to analyze
        model: Name of the vision model to use
    
    Returns:
        JSON string containing detected objects with their properties
    """
    try:
        # Send the image and structured prompt to the AI model
        response = ollama.chat(
            model=model,
            messages=[
                {
                    'role': 'user',
                    'content': \"\"\"Your task is to perform object detection on the image and return a structured output in JSON format. For each detected object, include the following attributes:
                    Name: The name of the detected object (e.g., 'cat', 'car', 'person').
                    Count: The total number of detected instances of this object type in the image.
                    Color: The dominant color or primary colors of the object.
                    
                    Return the results as a JSON object with an 'objects' array.\"\"\",
                    'images': [image_path]
                }
            ],
            format="json",          # This tells Ollama to return JSON
            options={'temperature': 0}  # Low temperature for consistent results
        )
        return response['message']['content']
    
    except Exception as e:
        # Return error information in a structured format
        error_response = {
            "objects": [],
            "error": str(e)
        }
        return json.dumps(error_response)

# Test our structured detection
print("🔍 Testing structured object detection...")

try:
    # Use our test image from before
    detection_result = detect_objects(test_image_path)
    print("\\n📊 Raw JSON Response:")
    print(detection_result)
    
    # Parse the JSON to make it more readable
    try:
        parsed_result = json.loads(detection_result)
        print("\\n✨ Parsed Results:")
        
        if "objects" in parsed_result:
            for i, obj in enumerate(parsed_result["objects"], 1):
                print(f"  {i}. Object: {obj.get('name', 'Unknown')}")
                print(f"     Count: {obj.get('count', 0)}")
                print(f"     Colors: {', '.join(obj.get('color', []))}")
                print()
        else:
            print("No objects detected or error in response.")
            
    except json.JSONDecodeError:
        print("⚠️ Response was not valid JSON")
        
except Exception as e:
    print(f"❌ Error during detection: {e}")

print("\\n💡 The power of structured outputs:")
print("✅ Consistent data format")
print("✅ Easy to process programmatically") 
print("✅ Can be stored in databases")
print("✅ Perfect for building applications")


In [None]:
def create_object_count_chart(detection_data: str) -> plt.Figure:
    """
    Create a bar chart showing the count of each detected object type.
    
    Args:
        detection_data: JSON string from our object detection function
        
    Returns:
        A matplotlib Figure object containing the chart
    """
    try:
        # Parse the JSON data
        data = json.loads(detection_data)
        
        if "objects" not in data or not data["objects"]:
            print("No objects found to visualize")
            return None
            
        # Extract object names and counts
        objects = data["objects"]
        names = [obj.get("name", "Unknown") for obj in objects]
        counts = [obj.get("count", 0) for obj in objects]
        
        # Create the bar chart
        fig, ax = plt.subplots(figsize=(10, 6))
        bars = ax.bar(names, counts, color='skyblue', edgecolor='navy', linewidth=1.2)
        
        # Customize the chart
        ax.set_xlabel('Object Type', fontsize=12, fontweight='bold')
        ax.set_ylabel('Count', fontsize=12, fontweight='bold')
        ax.set_title('Detected Objects Count', fontsize=14, fontweight='bold')
        ax.grid(axis='y', alpha=0.3)
        
        # Add count labels on top of bars
        for bar, count in zip(bars, counts):
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height + 0.05,
                   f'{count}', ha='center', va='bottom', fontweight='bold')
        
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        return fig
        
    except Exception as e:
        print(f"Error creating chart: {e}")
        return None

def create_color_distribution_chart(detection_data: str) -> plt.Figure:
    """
    Create a pie chart showing the distribution of colors in detected objects.
    
    Args:
        detection_data: JSON string from our object detection function
        
    Returns:
        A matplotlib Figure object containing the pie chart
    """
    try:
        # Parse the JSON data
        data = json.loads(detection_data)
        
        if "objects" not in data or not data["objects"]:
            print("No objects found to visualize")
            return None
            
        # Count all colors mentioned
        color_counts = {}
        for obj in data["objects"]:
            colors = obj.get("color", [])
            count = obj.get("count", 1)
            
            for color in colors:
                color_lower = color.lower()
                color_counts[color_lower] = color_counts.get(color_lower, 0) + count
        
        if not color_counts:
            print("No colors found to visualize")
            return None
            
        # Create the pie chart
        fig, ax = plt.subplots(figsize=(8, 8))
        colors_list = list(color_counts.keys())
        counts_list = list(color_counts.values())
        
        # Use actual colors where possible
        color_map = {
            'red': '#FF6B6B', 'blue': '#4ECDC4', 'green': '#45B7D1',
            'yellow': '#FFA07A', 'orange': '#FF8C42', 'purple': '#9B59B6',
            'pink': '#FF69B4', 'brown': '#8B4513', 'black': '#2C3E50',
            'white': '#ECF0F1', 'gray': '#95A5A6', 'grey': '#95A5A6'
        }
        
        pie_colors = [color_map.get(color, '#BDC3C7') for color in colors_list]
        
        wedges, texts, autotexts = ax.pie(counts_list, labels=colors_list, colors=pie_colors,
                                         autopct='%1.1f%%', startangle=90)
        
        ax.set_title('Color Distribution in Detected Objects', fontsize=14, fontweight='bold')
        plt.tight_layout()
        return fig
        
    except Exception as e:
        print(f"Error creating color chart: {e}")
        return None

def display_detection_results(detection_data: str):
    """
    Display comprehensive results from object detection including charts.
    
    This function creates a complete analysis dashboard showing:
    - Summary statistics
    - Object count chart
    - Color distribution chart
    - Raw data table
    """
    print("\\n📊 OBJECT DETECTION RESULTS")
    print("=" * 50)
    
    try:
        # Parse the detection data
        data = json.loads(detection_data)
        
        if "error" in data:
            print(f"❌ Error in detection: {data['error']}")
            return
            
        if "objects" not in data or not data["objects"]:
            print("🔍 No objects detected in the image.")
            return
            
        objects = data["objects"]
        
        # Summary statistics
        total_objects = sum(obj.get("count", 0) for obj in objects)
        unique_types = len(objects)
        
        print(f"📈 Summary:")
        print(f"   Total Objects: {total_objects}")
        print(f"   Unique Types: {unique_types}")
        print()
        
        # Detailed object list
        print("🔍 Detected Objects:")
        for i, obj in enumerate(objects, 1):
            name = obj.get("name", "Unknown")
            count = obj.get("count", 0)
            colors = obj.get("color", [])
            print(f"   {i}. {name.title()}")
            print(f"      Count: {count}")
            print(f"      Colors: {', '.join(colors) if colors else 'Not specified'}")
            print()
        
        # Create and display charts
        print("📊 Creating visualizations...")
        
        # Object count chart
        count_fig = create_object_count_chart(detection_data)
        if count_fig:
            plt.figure(count_fig.number)
            plt.show()
        
        # Color distribution chart  
        color_fig = create_color_distribution_chart(detection_data)
        if color_fig:
            plt.figure(color_fig.number)
            plt.show()
            
    except json.JSONDecodeError:
        print("❌ Invalid JSON data received")
    except Exception as e:
        print(f"❌ Error displaying results: {e}")

# Test our visualization functions
print("🎨 Testing our visualization system...")

# Create some sample detection data for testing
sample_data = {
    "objects": [
        {"name": "rectangle", "count": 1, "color": ["red"]},
        {"name": "circle", "count": 1, "color": ["yellow"]},
        {"name": "triangle", "count": 1, "color": ["green"]}
    ]
}

sample_json = json.dumps(sample_data)
print("\\n🧪 Using sample data for demonstration:")

# Display the results
display_detection_results(sample_json)


In [None]:
# Complete Streamlit Application Code
# This is the code that would go in your app.py file

streamlit_app_code = '''
import streamlit as st
from PIL import Image
import os
import json
from detector import detect_objects
from visualize import display_detection_results

# --- Page Configuration ---
st.set_page_config(
    page_title="Object Detection",
    page_icon="🤖",
    layout="wide"
)

# --- App Title ---
st.title("🤖 Object Detection")
st.write("Upload an image to detect objects using a local vision model.")

# --- Sidebar with Information ---
st.sidebar.title("ℹ️ About")
st.sidebar.write("""
This application uses AI vision models to:
- 🔍 Detect objects in images
- 📊 Count instances of each object
- 🎨 Identify colors
- 📈 Create visualizations

**How to use:**
1. Upload an image (JPG, PNG, JPEG)
2. Click "Detect Objects"
3. View the results and charts!
""")

# --- File Uploader ---
uploaded_file = st.file_uploader(
    "Choose an image...", 
    type=["jpg", "png", "jpeg"]
)

if uploaded_file is not None:
    # Create two columns for layout
    col1, col2 = st.columns([1, 1])
    
    with col1:
        # Display the uploaded image
        image = Image.open(uploaded_file)
        st.image(image, caption="Uploaded Image", use_column_width=True)
    
    with col2:
        # Show image information
        st.write("**Image Information:**")
        st.write(f"- **Filename:** {uploaded_file.name}")
        st.write(f"- **Size:** {image.size}")
        st.write(f"- **Format:** {image.format}")
        st.write(f"- **Mode:** {image.mode}")

    # A button to trigger the analysis
    if st.button("🔍 Detect Objects", type="primary"):
        with st.spinner("Detecting objects... This may take a moment."):
            try:
                # Save the uploaded file temporarily to pass its path to the model
                temp_dir = "temp"
                if not os.path.exists(temp_dir):
                    os.makedirs(temp_dir)
                
                file_path = os.path.join(temp_dir, uploaded_file.name)
                with open(file_path, "wb") as f:
                    f.write(uploaded_file.getbuffer())

                # Call the object detection function
                detection_data = detect_objects(file_path)
                
                # Parse and display results
                try:
                    data = json.loads(detection_data)
                    
                    if "error" in data:
                        st.error(f"Detection error: {data['error']}")
                    elif "objects" in data and data["objects"]:
                        st.success("✅ Objects detected successfully!")
                        
                        # Display summary
                        objects = data["objects"]
                        total_objects = sum(obj.get("count", 0) for obj in objects)
                        
                        col1, col2, col3 = st.columns(3)
                        with col1:
                            st.metric("Total Objects", total_objects)
                        with col2:
                            st.metric("Unique Types", len(objects))
                        with col3:
                            all_colors = []
                            for obj in objects:
                                all_colors.extend(obj.get("color", []))
                            st.metric("Colors Found", len(set(all_colors)))
                        
                        # Display detailed results
                        st.subheader("📋 Detected Objects")
                        for i, obj in enumerate(objects, 1):
                            with st.expander(f"{obj.get('name', 'Unknown').title()} ({obj.get('count', 0)} found)"):
                                st.write(f"**Count:** {obj.get('count', 0)}")
                                colors = obj.get('color', [])
                                if colors:
                                    st.write(f"**Colors:** {', '.join(colors)}")
                                else:
                                    st.write("**Colors:** Not specified")
                        
                        # Create visualizations (simplified for Streamlit)
                        st.subheader("📊 Visualizations")
                        
                        # Object counts as a simple chart
                        chart_data = {obj.get("name", "Unknown"): obj.get("count", 0) for obj in objects}
                        st.bar_chart(chart_data)
                        
                    else:
                        st.info("🔍 No objects detected in the image.")
                        
                except json.JSONDecodeError:
                    st.error("❌ Invalid response from detection model")

                # Clean up the temporary file
                if os.path.exists(file_path):
                    os.remove(file_path)

            except Exception as e:
                st.error(f"An error occurred during analysis: {e}")

else:
    st.info("👆 Please upload an image file to get started.")
    
    # Show example images or instructions
    st.subheader("💡 Tips for Better Results")
    st.write("""
    - Use clear, well-lit images
    - Ensure objects are clearly visible
    - Try different types of images (indoor, outdoor, people, animals, etc.)
    - The AI works best with common objects it has been trained on
    """)
'''

print("🌐 Complete Streamlit Application Code:")
print("=" * 50)
print("This code creates a full web application with:")
print("✅ File upload interface")
print("✅ Image display and information")
print("✅ Object detection processing")
print("✅ Results visualization")
print("✅ Error handling")
print("✅ User-friendly interface")

print("\\n💡 To run this as a web app:")
print("1. Save the code above to a file called 'app.py'")
print("2. Make sure you have the detector.py and visualize.py modules")
print("3. Run: streamlit run app.py")
print("4. Open your browser to the provided URL")

print("\\n🎯 Key Features of Our Web App:")
features = [
    "📤 Drag-and-drop file upload",
    "🖼️ Image preview and metadata",
    "🔍 Real-time object detection",
    "📊 Interactive charts and metrics",
    "📱 Responsive design",
    "⚠️ Comprehensive error handling",
    "💡 User guidance and tips"
]

for feature in features:
    print(f"  {feature}")

# Create a simple demo function for notebook use
def analyze_image_simple(image_path: str):
    """
    Simplified version of our detection system for notebook use.
    """
    print(f"🔍 Analyzing image: {os.path.basename(image_path)}")
    
    try:
        # Run detection
        result = detect_objects(image_path)
        
        # Display results in notebook-friendly format
        display_detection_results(result)
        
        return result
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return None

print("\\n🧪 You can test the detection system in this notebook using:")
print("analyze_image_simple('path_to_your_image.jpg')")


In [None]:
# Let's create different test scenarios to explore our system

def create_complex_test_image():
    """Create a more complex test image with multiple objects"""
    from PIL import Image, ImageDraw, ImageFont
    
    # Create a larger, more complex image
    img = Image.new('RGB', (600, 400), color='lightgray')
    draw = ImageDraw.Draw(img)
    
    # Draw various objects
    # Houses
    draw.rectangle([50, 200, 120, 280], fill='brown', outline='black', width=2)
    draw.polygon([(35, 200), (85, 150), (135, 200)], fill='red', outline='black', width=2)
    
    draw.rectangle([200, 220, 270, 300], fill='yellow', outline='black', width=2)
    draw.polygon([(185, 220), (235, 170), (285, 220)], fill='blue', outline='black', width=2)
    
    # Trees
    draw.rectangle([150, 250, 170, 320], fill='brown')  # trunk
    draw.ellipse([130, 200, 190, 260], fill='green', outline='darkgreen', width=2)
    
    draw.rectangle([350, 240, 370, 310], fill='brown')  # trunk
    draw.ellipse([330, 190, 390, 250], fill='green', outline='darkgreen', width=2)
    
    # Cars
    draw.rectangle([400, 280, 480, 320], fill='red', outline='black', width=2)
    draw.ellipse([410, 310, 430, 330], fill='black')  # wheel
    draw.ellipse([460, 310, 480, 330], fill='black')  # wheel
    
    draw.rectangle([500, 270, 570, 310], fill='blue', outline='black', width=2)
    draw.ellipse([510, 300, 530, 320], fill='black')  # wheel
    draw.ellipse([550, 300, 570, 320], fill='black')  # wheel
    
    # Sun
    draw.ellipse([500, 50, 550, 100], fill='yellow', outline='orange', width=3)
    
    # Clouds
    draw.ellipse([100, 80, 160, 120], fill='white', outline='lightgray')
    draw.ellipse([120, 70, 180, 110], fill='white', outline='lightgray')
    draw.ellipse([300, 60, 380, 110], fill='white', outline='lightgray')
    
    complex_path = "complex_scene.png"
    img.save(complex_path)
    print(f"✅ Created complex test image: {complex_path}")
    return complex_path

def experiment_with_prompts():
    """Test different prompting strategies for object detection"""
    
    print("🧪 EXPERIMENT: Different Prompting Strategies")
    print("=" * 60)
    
    # Create our test image
    complex_image = create_complex_test_image()
    
    # Display the image
    try:
        img = Image.open(complex_image)
        plt.figure(figsize=(10, 6))
        plt.imshow(img)
        plt.title("Complex Test Scene")
        plt.axis('off')
        plt.show()
    except:
        print("Could not display image")
    
    # Different prompting approaches
    prompts = {
        "Basic": """Detect and list all objects in this image. For each object, provide:
        - Name of the object
        - Count of how many you see
        - Primary colors
        Return as JSON.""",
        
        "Detailed": """You are an expert computer vision system. Analyze this image thoroughly and detect ALL visible objects. 
        For each object type, provide:
        - Specific name (e.g., 'house', 'car', 'tree' not just 'building', 'vehicle', 'plant')
        - Exact count of instances
        - All visible colors
        - Brief description if helpful
        Be comprehensive and accurate. Return structured JSON data.""",
        
        "Category-focused": """Analyze this image and categorize all objects you see into these groups:
        1. Buildings/Structures
        2. Vehicles
        3. Nature/Plants
        4. Weather/Sky elements
        5. Other objects
        
        For each object, specify its category, name, count, and colors. Return as JSON."""
    }
    
    print("\\n🎯 Testing different prompt strategies...")
    
    for strategy, prompt in prompts.items():
        print(f"\\n--- {strategy} Prompting Strategy ---")
        
        try:
            response = ollama.chat(
                model="llama3.2-vision",
                messages=[{
                    'role': 'user',
                    'content': prompt,
                    'images': [complex_image]
                }],
                format="json",
                options={'temperature': 0}
            )
            
            result = response['message']['content']
            print(f"✅ {strategy} result:")
            
            # Try to parse and display nicely
            try:
                parsed = json.loads(result)
                print(json.dumps(parsed, indent=2))
            except:
                print(result[:200] + "..." if len(result) > 200 else result)
                
        except Exception as e:
            print(f"❌ Error with {strategy}: {e}")
    
    return complex_image

def compare_models():
    """Compare different vision models if available"""
    
    print("\\n🤖 EXPERIMENT: Model Comparison")
    print("=" * 50)
    
    # Check available models
    try:
        models = ollama.list()
        vision_models = [m['model'] for m in models['models'] if 'vision' in m['model'].lower()]
        
        if len(vision_models) > 1:
            print(f"Found {len(vision_models)} vision models: {vision_models}")
            
            # Test each model on the same image
            test_image = "complex_scene.png"
            
            for model in vision_models[:2]:  # Test first 2 models
                print(f"\\n--- Testing {model} ---")
                try:
                    result = detect_objects(test_image, model)
                    data = json.loads(result)
                    
                    if "objects" in data:
                        print(f"Detected {len(data['objects'])} object types:")
                        for obj in data['objects']:
                            print(f"  - {obj.get('name', 'Unknown')}: {obj.get('count', 0)}")
                    else:
                        print("No objects detected")
                        
                except Exception as e:
                    print(f"Error: {e}")
        else:
            print(f"Only {len(vision_models)} vision model(s) available: {vision_models}")
            print("Install more models to compare: ollama pull llava:latest")
            
    except Exception as e:
        print(f"Error checking models: {e}")

def test_different_image_types():
    """Test our system with different types of content"""
    
    print("\\n📸 EXPERIMENT: Different Image Types")
    print("=" * 50)
    
    # Create different test scenarios
    test_scenarios = []
    
    # Scenario 1: Simple geometric shapes
    print("Creating geometric shapes test...")
    try:
        from PIL import Image, ImageDraw
        img = Image.new('RGB', (300, 300), 'white')
        draw = ImageDraw.Draw(img)
        
        # Various shapes
        draw.rectangle([50, 50, 100, 100], fill='red')
        draw.ellipse([150, 50, 200, 100], fill='blue')
        draw.polygon([(250, 50), (275, 100), (225, 100)], fill='green')
        draw.rectangle([50, 150, 100, 200], fill='yellow')
        draw.ellipse([150, 150, 200, 200], fill='purple')
        
        shapes_path = "geometric_shapes.png"
        img.save(shapes_path)
        test_scenarios.append(("Geometric Shapes", shapes_path))
        
    except Exception as e:
        print(f"Error creating shapes: {e}")
    
    # Test each scenario
    for scenario_name, image_path in test_scenarios:
        print(f"\\n🔍 Testing: {scenario_name}")
        try:
            result = detect_objects(image_path)
            data = json.loads(result)
            
            if "objects" in data and data["objects"]:
                print(f"✅ Detected {len(data['objects'])} object types:")
                for obj in data["objects"]:
                    print(f"  - {obj.get('name', 'Unknown')}: {obj.get('count', 0)} ({', '.join(obj.get('color', []))})")
            else:
                print("❌ No objects detected")
                
        except Exception as e:
            print(f"❌ Error: {e}")

# Run our experiments
print("🚀 Starting Object Detection Experiments!")
print("\\nNote: These experiments will test different aspects of our system:")
print("1. Complex scene analysis")
print("2. Different prompting strategies") 
print("3. Model comparisons (if multiple models available)")
print("4. Various image types")

# Uncomment the experiments you want to run:

# Experiment 1: Complex scene with different prompts
# complex_img = experiment_with_prompts()

# Experiment 2: Compare different models
# compare_models()

# Experiment 3: Test different image types
# test_different_image_types()

print("\\n💡 Uncomment the experiment functions above to run them!")
print("Each experiment will teach you something different about computer vision!")


In [None]:
print("🚀 ADVANCED OBJECT DETECTION APPLICATIONS")
print("=" * 60)

print("""
🎯 REAL-WORLD APPLICATIONS:

1. 🏪 RETAIL & INVENTORY
   - Automatic product counting
   - Shelf monitoring systems
   - Quality control inspection
   - Customer behavior analysis

2. 🚗 AUTONOMOUS VEHICLES
   - Traffic sign detection
   - Pedestrian identification
   - Vehicle tracking
   - Road condition assessment

3. 🏥 HEALTHCARE
   - Medical image analysis
   - Equipment monitoring
   - Patient safety systems
   - Diagnostic assistance

4. 🔒 SECURITY & SURVEILLANCE
   - Intrusion detection
   - Crowd monitoring
   - Suspicious activity alerts
   - Access control systems

5. 🌾 AGRICULTURE
   - Crop monitoring
   - Pest detection
   - Harvest optimization
   - Livestock tracking

6. 🏭 MANUFACTURING
   - Defect detection
   - Assembly line monitoring
   - Safety compliance
   - Quality assurance
""")

print("\\n🛠️ TECHNICAL EXTENSIONS YOU CAN BUILD:")
print("=" * 60)

extensions = [
    "📱 Mobile App Integration",
    "🎥 Real-time Video Processing", 
    "🗃️ Database Storage & Analytics",
    "🔔 Alert & Notification Systems",
    "📊 Advanced Data Visualization",
    "🤖 Multi-Model Ensemble Systems",
    "🌐 API Development & Integration",
    "📈 Performance Monitoring",
    "🎛️ Custom Training Pipelines",
    "🔄 Automated Retraining Systems"
]

for i, ext in enumerate(extensions, 1):
    print(f"{i:2d}. {ext}")

def create_advanced_detection_system():
    """
    Template for an advanced object detection system with additional features.
    """
    
    print("\\n🏗️ ADVANCED SYSTEM ARCHITECTURE")
    print("=" * 50)
    
    advanced_system_code = '''
    class AdvancedObjectDetectionSystem:
        """
        An advanced object detection system with additional capabilities.
        """
        
        def __init__(self, models: List[str], confidence_threshold: float = 0.7):
            self.models = models
            self.confidence_threshold = confidence_threshold
            self.detection_history = []
            self.performance_metrics = {}
            
        def multi_model_detection(self, image_path: str) -> Dict[str, Any]:
            """Use multiple models and combine results for better accuracy."""
            results = {}
            
            for model in self.models:
                try:
                    result = detect_objects(image_path, model)
                    results[model] = json.loads(result)
                except Exception as e:
                    results[model] = {"error": str(e)}
            
            # Combine results using ensemble methods
            combined_result = self.ensemble_results(results)
            return combined_result
            
        def ensemble_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
            """Combine results from multiple models."""
            # Implement voting, averaging, or other ensemble methods
            # This is a simplified version
            
            all_objects = {}
            
            for model, result in results.items():
                if "objects" in result:
                    for obj in result["objects"]:
                        name = obj.get("name", "unknown")
                        count = obj.get("count", 0)
                        
                        if name in all_objects:
                            all_objects[name]["votes"] += 1
                            all_objects[name]["total_count"] += count
                        else:
                            all_objects[name] = {
                                "votes": 1,
                                "total_count": count,
                                "colors": obj.get("color", [])
                            }
            
            # Filter by confidence (number of models that detected the object)
            final_objects = []
            for name, data in all_objects.items():
                confidence = data["votes"] / len(self.models)
                if confidence >= self.confidence_threshold:
                    final_objects.append({
                        "name": name,
                        "count": data["total_count"] // data["votes"],  # Average count
                        "color": data["colors"],
                        "confidence": confidence
                    })
            
            return {"objects": final_objects}
            
        def track_performance(self, detection_time: float, accuracy: float):
            """Track system performance metrics."""
            self.performance_metrics["avg_detection_time"] = detection_time
            self.performance_metrics["accuracy"] = accuracy
            
        def save_detection_history(self, image_path: str, results: Dict[str, Any]):
            """Save detection results for analysis."""
            self.detection_history.append({
                "timestamp": datetime.now().isoformat(),
                "image_path": image_path,
                "results": results
            })
    '''
    
    print("This advanced system includes:")
    print("✅ Multi-model ensemble detection")
    print("✅ Confidence-based filtering")
    print("✅ Performance tracking")
    print("✅ Detection history logging")
    print("✅ Result combination algorithms")

def create_specialized_detectors():
    """Examples of specialized detection systems."""
    
    print("\\n🎯 SPECIALIZED DETECTION SYSTEMS")
    print("=" * 50)
    
    specialized_examples = {
        "Safety Detector": {
            "description": "Detects safety equipment and hazards",
            "prompt": """Analyze this image for safety-related items:
            - Personal protective equipment (helmets, gloves, safety vests)
            - Safety hazards (spills, obstacles, dangerous equipment)
            - Emergency equipment (fire extinguishers, first aid kits)
            - Safety compliance indicators
            
            For each item, specify if it represents good safety practices or potential hazards."""
        },
        
        "Food Analyzer": {
            "description": "Analyzes food items and nutritional content",
            "prompt": """Identify all food items in this image:
            - Specific food names (not just 'food')
            - Estimated quantities/portions
            - Food categories (fruits, vegetables, proteins, etc.)
            - Freshness indicators
            - Nutritional classifications (healthy, processed, etc.)"""
        },
        
        "Document Scanner": {
            "description": "Identifies and categorizes documents",
            "prompt": """Analyze this image for documents and text:
            - Document types (forms, letters, receipts, etc.)
            - Text elements (headers, paragraphs, tables)
            - Document condition (quality, completeness)
            - Important visual elements (logos, signatures, stamps)"""
        },
        
        "Room Analyzer": {
            "description": "Analyzes room layout and furniture",
            "prompt": """Analyze this room image:
            - Furniture items and their conditions
            - Room type identification
            - Layout and organization
            - Lighting conditions
            - Cleanliness and maintenance indicators
            - Accessibility features"""
        }
    }
    
    for name, details in specialized_examples.items():
        print(f"\\n🔍 {name}:")
        print(f"   Purpose: {details['description']}")
        print(f"   Specialized prompt focuses on domain-specific detection")

def integration_examples():
    """Show how to integrate with other systems."""
    
    print("\\n🔗 INTEGRATION EXAMPLES")
    print("=" * 50)
    
    integrations = [
        {
            "name": "Database Integration",
            "description": "Store detection results in databases",
            "code": """
            import sqlite3
            
            def save_to_database(image_path, detection_results):
                conn = sqlite3.connect('detections.db')
                cursor = conn.cursor()
                
                cursor.execute('''
                    INSERT INTO detections (image_path, timestamp, results)
                    VALUES (?, ?, ?)
                ''', (image_path, datetime.now(), json.dumps(detection_results)))
                
                conn.commit()
                conn.close()
            """
        },
        
        {
            "name": "API Development",
            "description": "Create REST API for detection service",
            "code": """
            from flask import Flask, request, jsonify
            
            app = Flask(__name__)
            
            @app.route('/detect', methods=['POST'])
            def detect_api():
                if 'image' not in request.files:
                    return jsonify({'error': 'No image provided'}), 400
                
                image = request.files['image']
                # Save temporarily and process
                result = detect_objects(image_path)
                return jsonify(json.loads(result))
            """
        },
        
        {
            "name": "Alert System",
            "description": "Send notifications based on detections",
            "code": """
            import smtplib
            from email.mime.text import MIMEText
            
            def send_alert(detection_results, alert_conditions):
                for obj in detection_results.get('objects', []):
                    if obj['name'] in alert_conditions:
                        send_email_alert(f"Detected: {obj['name']}")
            """
        }
    ]
    
    for integration in integrations:
        print(f"\\n📡 {integration['name']}:")
        print(f"   {integration['description']}")

# Show all the advanced concepts
create_advanced_detection_system()
create_specialized_detectors()
integration_examples()

print("\\n🎓 LEARNING OBJECTIVES ACHIEVED:")
print("✅ Understanding Vision Language Models")
print("✅ Structured data extraction from images")
print("✅ Data visualization and analysis")
print("✅ Web application development")
print("✅ System architecture and design")
print("✅ Advanced applications and extensions")

print("\\n🚀 You're now ready to build sophisticated computer vision applications!")
print("Choose an extension idea and start building your own advanced system!")


In [None]:
# Install required packages
# %pip install ollama pydantic streamlit pillow pandas matplotlib

# Import all necessary libraries
import ollama
import json
import os
from typing import List, Dict, Any
from pydantic import BaseModel
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

print("✅ All packages imported!")
print("🎯 Ready to build our object detection system!")

# Check if we have access to Ollama
try:
    models = ollama.list()
    print(f"\\n🤖 Available Ollama models: {len(models['models'])}")
    
    # Look for vision models
    vision_models = [m for m in models['models'] if 'vision' in m['model'].lower()]
    if vision_models:
        print(f"👁️ Vision models found: {[m['model'] for m in vision_models]}")
    else:
        print("⚠️ No vision models found. You may need to install one:")
        print("   Run: ollama pull llama3.2-vision")
        
except Exception as e:
    print(f"❌ Error connecting to Ollama: {e}")
    print("Make sure Ollama is running!")


In [None]:
# Install required packages
%pip install ollama pydantic streamlit pillow pandas matplotlib

# Import all necessary libraries
import ollama
import json
import os
from typing import List, Dict, Any
from pydantic import BaseModel
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

print("✅ All packages installed and imported!")
print("🎯 Ready to build our object detection system!")

# Check if we have access to Ollama
try:
    models = ollama.list()
    print(f"\\n🤖 Available Ollama models: {len(models['models'])}")
    
    # Look for vision models
    vision_models = [m for m in models['models'] if 'vision' in m['model'].lower()]
    if vision_models:
        print(f"👁️ Vision models found: {[m['model'] for m in vision_models]}")
    else:
        print("⚠️ No vision models found. You may need to install one:")
        print("   Run: ollama pull llama3.2-vision")
        
except Exception as e:
    print(f"❌ Error connecting to Ollama: {e}")
    print("Make sure Ollama is running!")
