# Database Performance Optimization: Practical Examples

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
from sqlalchemy import create_engine
import time

In [5]:
# Database Performance Optimization: Practical Examples

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
from sqlalchemy import create_engine
import time

# Create a sample database
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()

# Create a large table for demonstration
cursor.execute('''
CREATE TABLE orders (
    id INTEGER PRIMARY KEY,
    customer_id INTEGER,
    order_date TEXT,
    total_amount FLOAT
)
''')

# Generate sample data
np.random.seed(42)
num_records = 1000000
customer_ids = np.random.randint(1, 10001, num_records)
order_dates = pd.date_range(start='2020-01-01', end='2023-12-31', periods=num_records).strftime('%Y-%m-%d')
total_amounts = np.random.uniform(10, 1000, num_records)

# Insert data into the table
cursor.executemany(
    'INSERT INTO orders (customer_id, order_date, total_amount) VALUES (?, ?, ?)',
    zip(customer_ids, order_dates, total_amounts)
)
conn.commit()

print(f"Created a table with {num_records} records.")


Created a table with 1000000 records.


## Example 1: Demonstrating the impact of indexing


In [6]:
# Function to measure query execution time
def measure_query_time(query):
    start_time = time.time()
    cursor.execute(query)
    end_time = time.time()
    return end_time - start_time

print("\nExample 1: Impact of Indexing")

# Query without index
query_no_index = "SELECT * FROM orders WHERE customer_id = 5000"
time_no_index = measure_query_time(query_no_index)
print(f"Query time without index: {time_no_index:.4f} seconds")



Example 1: Impact of Indexing
Query time without index: 0.2332 seconds


In [7]:
# Create an index on customer_id
cursor.execute("CREATE INDEX idx_customer_id ON orders (customer_id)")
conn.commit()

# Query with index
time_with_index = measure_query_time(query_no_index)
print(f"Query time with index: {time_with_index:.4f} seconds")
print(f"Performance improvement: {(time_no_index - time_with_index) / time_no_index * 100:.2f}%")

Query time with index: 0.0000 seconds
Performance improvement: 99.99%


## Example 2: Query Optimization


In [8]:
print("\nExample 2: Query Optimization")

# Inefficient query
inefficient_query = """
SELECT customer_id, COUNT(*) as order_count
FROM orders
WHERE total_amount > 500
GROUP BY customer_id
HAVING COUNT(*) > 10
ORDER BY order_count DESC
"""

# Efficient query
efficient_query = """
SELECT customer_id, COUNT(*) as order_count
FROM orders
WHERE total_amount > 500
GROUP BY customer_id
HAVING order_count > 10
ORDER BY order_count DESC
"""

time_inefficient = measure_query_time(inefficient_query)
time_efficient = measure_query_time(efficient_query)

print(f"Inefficient query time: {time_inefficient:.4f} seconds")
print(f"Efficient query time: {time_efficient:.4f} seconds")
print(f"Performance improvement: {(time_inefficient - time_efficient) / time_inefficient * 100:.2f}%")



Example 2: Query Optimization
Inefficient query time: 0.7784 seconds
Efficient query time: 0.5202 seconds
Performance improvement: 33.17%


## Example 3: Caching Strategy


In [9]:
print("\nExample 3: Caching Strategy")

import functools

# Simple cache implementation
def simple_cache(func):
    cache = {}
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        key = str(args) + str(kwargs)
        if key not in cache:
            cache[key] = func(*args, **kwargs)
        return cache[key]
    return wrapper

# Function to get total orders for a customer
@simple_cache
def get_total_orders(customer_id):
    cursor.execute("SELECT COUNT(*) FROM orders WHERE customer_id = ?", (customer_id,))
    return cursor.fetchone()[0]

# Measure time without cache
start_time = time.time()
for _ in range(1000):
    get_total_orders(5000)
end_time = time.time()
print(f"Time without cache (1000 calls): {end_time - start_time:.4f} seconds")

# Measure time with cache
start_time = time.time()
for _ in range(1000):
    get_total_orders(5000)
end_time = time.time()
print(f"Time with cache (1000 calls): {end_time - start_time:.4f} seconds")



Example 3: Caching Strategy
Time without cache (1000 calls): 0.0021 seconds
Time with cache (1000 calls): 0.0003 seconds


## Example 4: Batch Processing


In [10]:
print("\nExample 4: Batch Processing")

# Function to insert records one by one
def insert_individual(records):
    start_time = time.time()
    for record in records:
        cursor.execute("INSERT INTO orders (customer_id, order_date, total_amount) VALUES (?, ?, ?)", record)
    conn.commit()
    end_time = time.time()
    return end_time - start_time

# Function to insert records in batch
def insert_batch(records):
    start_time = time.time()
    cursor.executemany("INSERT INTO orders (customer_id, order_date, total_amount) VALUES (?, ?, ?)", records)
    conn.commit()
    end_time = time.time()
    return end_time - start_time

# Generate sample records
sample_records = [
    (np.random.randint(1, 10001), 
     (pd.Timestamp('2023-01-01') + pd.Timedelta(days=i)).strftime('%Y-%m-%d'), 
     np.random.uniform(10, 1000))
    for i in range(10000)
]

individual_time = insert_individual(sample_records[:100])  # Using only 100 records for individual insert to save time
batch_time = insert_batch(sample_records)

print(f"Time for individual inserts (100 records): {individual_time:.4f} seconds")
print(f"Time for batch insert (10000 records): {batch_time:.4f} seconds")
print(f"Batch insert is approximately {individual_time / (batch_time / 100):.2f} times faster per record")



Example 4: Batch Processing
Time for individual inserts (100 records): 0.0055 seconds
Time for batch insert (10000 records): 0.2049 seconds
Batch insert is approximately 2.68 times faster per record
