#### Final Project: Create a Database Analytics Dashboard

Challenge = Create a script that can connect to all three database types and generate unified reports.

#### Workflow:

- Explore: Run the Database Explorer to see what tables and columns exists in each
- Customize: Modify queries in the Manual Query Builder to match the actual schema
- Execute: Run unified reports with custom queries

Database Explorer:

In [None]:
import pandas as pd
import sqlite3
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv

class DatabaseExplorer:
    """Simple tool to explore database schemas and tables"""
    
    def __init__(self):
        self.connections = {}
    
    def add_sqlite_connection(self, name, db_path):
        """Add SQLite database"""
        self.connections[name] = {
            'type': 'sqlite',
            'path': db_path
        }
        print(f"Added SQLite connection: {name}")
    
    def add_postgresql_connection(self, name, connection_string):
        """Add PostgreSQL database"""
        self.connections[name] = {
            'type': 'postgresql',
            'connection_string': connection_string
        }
        print(f"Added PostgreSQL connection: {name}")
    
    def add_sqlserver_connection(self, name, connection_url):
        """Add SQL Server connection using full SQLAlchemy URL"""
        self.connections[name] = {
            'type': 'sqlserver',
            'connection_url': connection_url,
            'connection': None
        }
        print(f"Added SQL Server connection: {name}")
    
    def explore_sqlite(self, db_path):
        """Explore SQLite database structure"""
        try:
            conn = sqlite3.connect(db_path)
            
            # Get all tables
            tables_query = "SELECT name FROM sqlite_master WHERE type='table'"
            tables_df = pd.read_sql_query(tables_query, conn)
            
            print("TABLES:")
            for table in tables_df['name']:
                print(f"  - {table}")
                
                # Get columns for each table
                columns_query = f"PRAGMA table_info({table})"
                columns_df = pd.read_sql_query(columns_query, conn)
                
                print("    COLUMNS:")
                for _, row in columns_df.iterrows():
                    print(f"      {row['name']} ({row['type']})")
                
                # Show sample data
                try:
                    sample_query = f"SELECT * FROM {table} LIMIT 3"
                    sample_df = pd.read_sql_query(sample_query, conn)
                    print(f"    SAMPLE DATA ({len(sample_df)} rows):")
                    if not sample_df.empty:
                        print(f"      {list(sample_df.columns)}")
                        for i, row in sample_df.iterrows():
                            print(f"      {list(row.values)}")
                    print()
                except Exception as e:
                    print(f"      Error getting sample data: {e}\n")
            
            conn.close()
            
        except Exception as e:
            print(f"Error exploring SQLite database: {e}")
    
    def explore_postgresql(self, connection_string):
        """Explore PostgreSQL database structure"""
        try:
            engine = create_engine(connection_string)
            
            # Get all tables
            tables_query = """
            SELECT table_name 
            FROM information_schema.tables 
            WHERE table_schema = 'public'
            ORDER BY table_name
            """
            tables_df = pd.read_sql_query(text(tables_query), engine)
            
            print("TABLES:")
            for table in tables_df['table_name']:
                print(f"  - {table}")
                
                # Get columns for each table
                columns_query = f"""
                SELECT column_name, data_type, is_nullable
                FROM information_schema.columns 
                WHERE table_name = '{table}'
                ORDER BY ordinal_position
                """
                columns_df = pd.read_sql_query(text(columns_query), engine)
                
                print("    COLUMNS:")
                for _, row in columns_df.iterrows():
                    nullable = "NULL" if row['is_nullable'] == 'YES' else "NOT NULL"
                    print(f"      {row['column_name']} ({row['data_type']}, {nullable})")
                
                # Show sample data
                try:
                    sample_query = f"SELECT * FROM {table} LIMIT 3"
                    sample_df = pd.read_sql_query(text(sample_query), engine)
                    print(f"    SAMPLE DATA ({len(sample_df)} rows):")
                    if not sample_df.empty:
                        print(f"      {list(sample_df.columns)}")
                        for i, row in sample_df.iterrows():
                            print(f"      {list(row.values)}")
                    print()
                except Exception as e:
                    print(f"      Error getting sample data: {e}\n")
            
            engine.dispose()
            
        except Exception as e:
            print(f"Error exploring PostgreSQL database: {e}")
    
    def explore_sqlserver(self, conn_info):
        """Explore SQL Server database structure with proper schema support"""
        try:
            # Check if a full connection URL is provided
            if 'connection_url' in conn_info:
                connection_string = conn_info['connection_url']
            else:
                # Build from parts (legacy style)
                server = conn_info['server']
                database = conn_info['database']
                trusted = conn_info.get('trusted', True)
                trusted_str = 'yes' if trusted else 'no'

                connection_string = (
                f"mssql+pyodbc://@{server}/{database}"
                f"?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection={trusted_str}"
                )

            engine = create_engine(connection_string)

            # Get all tables with schema information
            tables_query = """
                SELECT TABLE_SCHEMA, TABLE_NAME,
                       TABLE_SCHEMA + '.' + TABLE_NAME as FULL_TABLE_NAME
                FROM INFORMATION_SCHEMA.TABLES 
                WHERE TABLE_TYPE = 'BASE TABLE'
                ORDER BY TABLE_SCHEMA, TABLE_NAME
            """
            tables_df = pd.read_sql_query(text(tables_query), engine)
            
            print("TABLES:")
            current_schema = None
            
            for _, table_row in tables_df.iterrows():
                schema = table_row['TABLE_SCHEMA']
                table = table_row['TABLE_NAME']
                full_name = table_row['FULL_TABLE_NAME']
                
                # Print schema header when it changes
                if current_schema != schema:
                    if current_schema is not None:
                        print()  # Add spacing between schemas
                    print(f"  SCHEMA: {schema}")
                    current_schema = schema
                
                print(f"    - {full_name}")
                
                # Get columns for each table
                columns_query = f"""
                    SELECT COLUMN_NAME, DATA_TYPE, IS_NULLABLE
                    FROM INFORMATION_SCHEMA.COLUMNS 
                    WHERE TABLE_SCHEMA = '{schema}' AND TABLE_NAME = '{table}'
                    ORDER BY ORDINAL_POSITION
                """
                columns_df = pd.read_sql_query(text(columns_query), engine)
                
                print("      COLUMNS:")
                for _, row in columns_df.iterrows():
                    nullable = "NULL" if row['IS_NULLABLE'] == 'YES' else "NOT NULL"
                    print(f"        {row['COLUMN_NAME']} ({row['DATA_TYPE']}, {nullable})")
                
                # Show sample data using full schema.table name
                try:
                    sample_query = f"SELECT TOP 3 * FROM [{schema}].[{table}]"
                    sample_df = pd.read_sql_query(text(sample_query), engine)
                    print(f"      SAMPLE DATA ({len(sample_df)} rows):")
                    if not sample_df.empty:
                        print(f"        {list(sample_df.columns)}")
                        for i, row in sample_df.iterrows():
                            # Convert values to string and truncate if too long
                            values = []
                            for val in row.values:
                                str_val = str(val)
                                if len(str_val) > 50:
                                    str_val = str_val[:47] + "..."
                                values.append(str_val)
                            print(f"        {values}")
                    print()
                except Exception as e:
                    print(f"      Error getting sample data: {str(e)[:100]}...\n")
            
            engine.dispose()
            
        except Exception as e:
            print(f"Error exploring SQL Server database: {e}")
    
    def explore_all_databases(self):
        """Explore all connected databases"""
        for name, conn_info in self.connections.items():
            print(f"\n{'='*50}")
            print(f"EXPLORING DATABASE: {name.upper()}")
            print(f"TYPE: {conn_info['type'].upper()}")
            print(f"{'='*50}")
            
            if conn_info['type'] == 'sqlite':
                self.explore_sqlite(conn_info['path'])
            elif conn_info['type'] == 'postgresql':
                self.explore_postgresql(conn_info['connection_string'])
            elif conn_info['type'] == 'sqlserver':
                self.explore_sqlserver(conn_info) 

# Usage Example
explorer = DatabaseExplorer()

# Load environment variables
load_dotenv()

# Add all your database connections
# SQLite connection (uncomment if you have the file)
# explorer.add_sqlite_connection('local_db', 'business_data.db')

# PostgreSQL connection (uncomment if you have PostgreSQL)
# postgres_url = os.getenv('DATABASE_URL')
# if postgres_url:
#     explorer.add_postgresql_connection('pg_db', postgres_url)

# SQL Server connection
sqlserver_url = os.getenv('SQLSERVER_URL')
if sqlserver_url:
    explorer.add_sqlserver_connection('AdventureWorks2022', sqlserver_url)

# Explore all databases
print("EXPLORING ALL CONFIGURED DATABASES:")
print("="*60)
explorer.explore_all_databases()

# You can also explore individual databases
print("\n" + "="*60)
print("INDIVIDUAL DATABASE EXPLORATION")
print("="*60)

# Explore just SQL Server database directly
print("\nExploring SQL Server AdventureWorks2022 database:")
sqlserver_url_direct = "mssql+pyodbc://localhost\\SQLEXPRESS/AdventureWorks2022?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes"
explorer.add_sqlserver_connection('AdventureWorks2022', sqlserver_url_direct)
explorer.explore_sqlserver(explorer.connections['AdventureWorks2022'])

print("\n" + "="*60)
print("INSTRUCTIONS:")
print("="*60)
print("✅ Database Explorer is working!")
print("📋 Tables are organized by schema (Person, Sales, Production, etc.)")
print("🔍 Sample data is shown for each table") 
print("💡 Use schema.table format in queries (e.g., Person.Address)")
print("🚀 Ready for the next step: Manual Query Builder!")
print("="*60)


In [4]:
import pandas as pd
import sqlite3
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

class MultiDatabaseQueryBuilder:
    """Advanced tool for running custom queries across multiple databases"""
    
    def __init__(self):
        self.connections = {}
        self.query_history = []
        load_dotenv()
        
    def add_sqlite_connection(self, name, db_path):
        """Add SQLite database connection"""
        try:
            # Test connection
            conn = sqlite3.connect(db_path)
            conn.close()
            
            self.connections[name] = {
                'type': 'sqlite',
                'path': db_path,
                'status': 'connected'
            }
            print(f"✅ SQLite connection '{name}' added successfully")
            return True
        except Exception as e:
            print(f"❌ Failed to add SQLite connection '{name}': {e}")
            return False
    
    def add_postgresql_connection(self, name, connection_string):
        """Add PostgreSQL database connection"""
        try:
            # Test connection
            engine = create_engine(connection_string)
            with engine.connect():
                pass
            engine.dispose()
            
            self.connections[name] = {
                'type': 'postgresql',
                'connection_string': connection_string,
                'status': 'connected'
            }
            print(f"✅ PostgreSQL connection '{name}' added successfully")
            return True
        except Exception as e:
            print(f"❌ Failed to add PostgreSQL connection '{name}': {e}")
            return False
    
    def add_sqlserver_connection(self, name, connection_url):
        """Add SQL Server database connection"""
        try:
            # Test connection
            engine = create_engine(connection_url)
            with engine.connect():
                pass
            engine.dispose()
            
            self.connections[name] = {
                'type': 'sqlserver',
                'connection_url': connection_url,
                'status': 'connected'
            }
            print(f"✅ SQL Server connection '{name}' added successfully")
            return True
        except Exception as e:
            print(f"❌ Failed to add SQL Server connection '{name}': {e}")
            return False
    
    def list_connections(self):
        """List all available database connections"""
        if not self.connections:
            print("No database connections configured.")
            return
        
        print("AVAILABLE DATABASE CONNECTIONS:")
        print("=" * 50)
        for name, info in self.connections.items():
            status_icon = "🟢" if info['status'] == 'connected' else "🔴"
            print(f"{status_icon} {name} ({info['type'].upper()})")
    
    def execute_query(self, database_name, query, limit=None, export_to=None):
        """Execute a custom query on specified database"""
        
        if database_name not in self.connections:
            print(f"❌ Database '{database_name}' not found. Available: {list(self.connections.keys())}")
            return None
        
        conn_info = self.connections[database_name]
        
        try:
            print(f"🔍 Executing query on {database_name} ({conn_info['type'].upper()})...")
            print(f"📝 Query: {query[:100]}..." if len(query) > 100 else f"📝 Query: {query}")
            print("-" * 60)
            
            # Execute based on database type
            if conn_info['type'] == 'sqlite':
                result_df = self._execute_sqlite_query(conn_info, query, limit)
            elif conn_info['type'] == 'postgresql':
                result_df = self._execute_postgresql_query(conn_info, query, limit)
            elif conn_info['type'] == 'sqlserver':
                result_df = self._execute_sqlserver_query(conn_info, query, limit)
            else:
                print(f"❌ Unsupported database type: {conn_info['type']}")
                return None
            
            # Log query to history
            self.query_history.append({
                'timestamp': datetime.now(),
                'database': database_name,
                'query': query,
                'rows_returned': len(result_df) if result_df is not None else 0
            })
            
            # Display results
            if result_df is not None and not result_df.empty:
                print(f"✅ Query executed successfully! Returned {len(result_df)} rows")
                print(f"📊 Columns: {list(result_df.columns)}")
                print("\n📋 RESULTS:")
                print("=" * 80)
                
                # Display with better formatting
                pd.set_option('display.max_columns', None)
                pd.set_option('display.width', None)
                pd.set_option('display.max_colwidth', 50)
                print(result_df.to_string(index=False))
                
                # Export if requested
                if export_to:
                    self._export_results(result_df, export_to)
                
                return result_df
            else:
                print("✅ Query executed successfully but returned no results")
                return pd.DataFrame()
                
        except Exception as e:
            print(f"❌ Query execution failed: {e}")
            return None
    
    def _execute_sqlite_query(self, conn_info, query, limit):
        """Execute query on SQLite database"""
        conn = sqlite3.connect(conn_info['path'])
        
        # Add LIMIT if specified and not already in query
        if limit and 'LIMIT' not in query.upper():
            query = f"{query.rstrip(';')} LIMIT {limit}"
        
        result_df = pd.read_sql_query(query, conn)
        conn.close()
        return result_df
    
    def _execute_postgresql_query(self, conn_info, query, limit):
        """Execute query on PostgreSQL database"""
        engine = create_engine(conn_info['connection_string'])
        
        # Add LIMIT if specified and not already in query
        if limit and 'LIMIT' not in query.upper():
            query = f"{query.rstrip(';')} LIMIT {limit}"
        
        result_df = pd.read_sql_query(text(query), engine)
        engine.dispose()
        return result_df
    
    def _execute_sqlserver_query(self, conn_info, query, limit):
        """Execute query on SQL Server database"""
        engine = create_engine(conn_info['connection_url'])
        
        # Add TOP if specified and not already in query
        if limit and 'TOP' not in query.upper():
            # Insert TOP clause after SELECT
            query_upper = query.upper()
            select_pos = query_upper.find('SELECT')
            if select_pos != -1:
                insert_pos = select_pos + 6  # After "SELECT"
                query = query[:insert_pos] + f" TOP {limit}" + query[insert_pos:]
        
        result_df = pd.read_sql_query(text(query), engine)
        engine.dispose()
        return result_df
    
    def _export_results(self, df, export_path):
        """Export query results to file"""
        try:
            if export_path.endswith('.csv'):
                df.to_csv(export_path, index=False)
                print(f"💾 Results exported to {export_path}")
            elif export_path.endswith(('.xlsx', '.xls')):
                df.to_excel(export_path, index=False)
                print(f"💾 Results exported to {export_path}")
            else:
                print(f"❌ Unsupported export format. Use .csv or .xlsx")
        except Exception as e:
            print(f"❌ Export failed: {e}")
    
    def quick_table_query(self, database_name, table_name, columns="*", where_clause="", limit=10):
        """Generate and execute a quick SELECT query"""
        
        if where_clause and not where_clause.upper().strip().startswith('WHERE'):
            where_clause = f"WHERE {where_clause}"
        
        query = f"SELECT {columns} FROM {table_name} {where_clause}".strip()
        
        return self.execute_query(database_name, query, limit=limit)
    
    def show_query_history(self):
        """Display query execution history"""
        if not self.query_history:
            print("No query history available.")
            return
        
        print("QUERY HISTORY:")
        print("=" * 80)
        for i, entry in enumerate(self.query_history[-10:], 1):  # Show last 10
            timestamp = entry['timestamp'].strftime("%Y-%m-%d %H:%M:%S")
            query_preview = entry['query'][:50] + "..." if len(entry['query']) > 50 else entry['query']
            print(f"{i}. [{timestamp}] {entry['database']} - {entry['rows_returned']} rows")
            print(f"   Query: {query_preview}")
            print()
    
    def get_database_info(self, database_name):
        """Get quick info about a database"""
        if database_name not in self.connections:
            print(f"❌ Database '{database_name}' not found")
            return
        
        conn_info = self.connections[database_name]
        
        try:
            if conn_info['type'] == 'sqlite':
                query = "SELECT name FROM sqlite_master WHERE type='table'"
            elif conn_info['type'] == 'postgresql':
                query = "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
            elif conn_info['type'] == 'sqlserver':
                query = """SELECT TABLE_SCHEMA + '.' + TABLE_NAME as full_name 
                          FROM INFORMATION_SCHEMA.TABLES 
                          WHERE TABLE_TYPE = 'BASE TABLE' 
                          ORDER BY TABLE_SCHEMA, TABLE_NAME"""
            
            tables_df = self.execute_query(database_name, query, limit=50)
            
            if tables_df is not None:
                print(f"\n📊 Database '{database_name}' has {len(tables_df)} tables")
                
        except Exception as e:
            print(f"❌ Error getting database info: {e}")

# Initialize Query Builder
print("🚀 MULTI-DATABASE QUERY BUILDER")
print("=" * 60)

query_builder = MultiDatabaseQueryBuilder()

# Auto-setup connections from environment variables
print("Setting up database connections...")

# SQL Server connection (primary)
sqlserver_url = os.getenv('SQLSERVER_URL')
if sqlserver_url:
    query_builder.add_sqlserver_connection('adventureworks', sqlserver_url)
else:
    # Fallback to direct connection
    sqlserver_url = "mssql+pyodbc://localhost\\SQLEXPRESS/AdventureWorks2022?driver=ODBC+Driver+17+for+SQL+Server&trusted_connection=yes"
    query_builder.add_sqlserver_connection('adventureworks', sqlserver_url)

# PostgreSQL connection (if available)
postgres_url = os.getenv('DATABASE_URL')
if postgres_url:
    query_builder.add_postgresql_connection('postgres_db', postgres_url)

# SQLite connection (if file exists)
sqlite_files = ['business_data.db', 'sample.db', 'data.db']
for db_file in sqlite_files:
    if os.path.exists(db_file):
        query_builder.add_sqlite_connection(f'sqlite_{db_file.replace(".", "_")}', db_file)
        break

print("\n" + "=" * 60)
query_builder.list_connections()

print("\n" + "=" * 60)
print("EXAMPLE USAGE:")
print("=" * 60)

print("""
# List all connections
query_builder.list_connections()

# Get database info
query_builder.get_database_info('adventureworks')

# Quick table query
query_builder.quick_table_query('adventureworks', 'dbo.AWBuildVersion')

# Custom SQL query
query = '''
SELECT TABLE_SCHEMA, COUNT(*) as table_count 
FROM INFORMATION_SCHEMA.TABLES 
WHERE TABLE_TYPE = 'BASE TABLE'
GROUP BY TABLE_SCHEMA
'''
query_builder.execute_query('adventureworks', query)

# Query with export
query_builder.execute_query('adventureworks', 
    'SELECT TOP 100 * FROM dbo.DatabaseLog ORDER BY PostTime DESC',
    export_to='database_log.csv')

# Complex query example for AdventureWorks
complex_query = '''
SELECT 
    p.Name as ProductName,
    pc.Name as CategoryName,
    p.ListPrice,
    p.Color
FROM Production.Product p
JOIN Production.ProductSubcategory ps ON p.ProductSubcategoryID = ps.ProductSubcategoryID  
JOIN Production.ProductCategory pc ON ps.ProductCategoryID = pc.ProductCategoryID
WHERE p.ListPrice > 0
ORDER BY p.ListPrice DESC
'''
query_builder.execute_query('adventureworks', complex_query, limit=20)

# Show query history
query_builder.show_query_history()
""")

print("\n" + "=" * 60)
print("💡 TIPS:")
print("=" * 60)
print("• Use schema.table format for SQL Server (e.g., 'Person.Address')")
print("• Queries automatically get LIMIT/TOP clauses for safety")
print("• Export results to CSV or Excel with export_to parameter")
print("• Use quick_table_query() for simple SELECT statements")
print("• Check query_history to see past executions")
print("• SQL Server tables: Person, Sales, Production, HumanResources schemas")
print("=" * 60)

🚀 MULTI-DATABASE QUERY BUILDER
Setting up database connections...
✅ SQL Server connection 'adventureworks' added successfully
✅ PostgreSQL connection 'postgres_db' added successfully
✅ SQLite connection 'sqlite_business_data_db' added successfully

AVAILABLE DATABASE CONNECTIONS:
🟢 adventureworks (SQLSERVER)
🟢 postgres_db (POSTGRESQL)
🟢 sqlite_business_data_db (SQLITE)

EXAMPLE USAGE:

# List all connections
query_builder.list_connections()

# Get database info
query_builder.get_database_info('adventureworks')

# Quick table query
query_builder.quick_table_query('adventureworks', 'dbo.AWBuildVersion')

# Custom SQL query
query = '''
SELECT TABLE_SCHEMA, COUNT(*) as table_count 
FROM INFORMATION_SCHEMA.TABLES 
WHERE TABLE_TYPE = 'BASE TABLE'
GROUP BY TABLE_SCHEMA
'''
query_builder.execute_query('adventureworks', query)

# Query with export
query_builder.execute_query('adventureworks', 
    'SELECT TOP 100 * FROM dbo.DatabaseLog ORDER BY PostTime DESC',
    export_to='database_log.csv')
