# Space Analytics Demo: DuckDB + Iceberg Integration

This demonstration showcases the power of combining DuckDB with Iceberg for space analytics workloads.


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import duckdb
import time
from pathlib import Path
import sys

# Add scripts to path
sys.path.append('../scripts')

# Set up plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette('viridis')

print('Libraries imported successfully!')

## 1. Connect to Database

In [None]:
# Connect to the space analytics database
conn = duckdb.connect('../space_analytics.db')
print('✓ Connected to space analytics database')

## 2. Quick Data Overview

In [None]:
# Get basic dataset information
overview = conn.execute("""
    SELECT 
        COUNT(*) as total_records,
        COUNT(DISTINCT des) as unique_objects,
        MIN(approach_year) as earliest_year,
        MAX(approach_year) as latest_year
    FROM neo_approaches
""").df()

print('📊 Dataset Overview:')
print(f'  Total Records: {overview["total_records"].iloc[0]:,}')
print(f'  Unique Objects: {overview["unique_objects"].iloc[0]:,}')
print(f'  Year Range: {overview["earliest_year"].iloc[0]} - {overview["latest_year"].iloc[0]}')

## 3. Performance Demo

In [None]:
# Demonstrate query performance
start_time = time.time()

results = conn.execute("""
    SELECT 
        approach_year,
        COUNT(*) as approaches,
        ROUND(AVG(dist), 6) as avg_distance,
        ROUND(MIN(dist), 6) as closest_approach
    FROM neo_approaches
    WHERE approach_year BETWEEN 1950 AND 1970
    GROUP BY approach_year
    ORDER BY approach_year
""").df()

duration = time.time() - start_time
print(f'✅ Query completed in {duration:.4f} seconds')
print(f'📈 Analyzed {len(results)} years of data')

## 4. Cleanup

In [None]:
# Close database connection
conn.close()
print('🧹 Database connection closed')
print('\n🎯 Demo completed successfully!')