# Sample Data Analysis with SQL Server

This notebook demonstrates how to connect to SQL Server and perform data analysis using pandas.

In [None]:
# Import required libraries
import sys
sys.path.append('/app')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from db_connection import get_db_connection

# Set up plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

In [None]:
# Connect to database
db = get_db_connection()
print("✅ Connected to SQL Server!")

In [None]:
# Load data into DataFrames
customers = db.query_to_dataframe("SELECT * FROM Customers")
products = db.query_to_dataframe("SELECT * FROM Products")
orders = db.query_to_dataframe("SELECT * FROM Orders")
order_details = db.query_to_dataframe("SELECT * FROM OrderDetails")

print("Data loaded successfully!")
print(f"Customers: {len(customers)} rows")
print(f"Products: {len(products)} rows")
print(f"Orders: {len(orders)} rows")
print(f"Order Details: {len(order_details)} rows")

In [None]:
# Display sample data
print("Sample Customers:")
display(customers.head())

print("\nSample Products:")
display(products.head())

In [None]:
# Perform analysis: Sales by product
sales_query = """
SELECT 
    p.ProductName,
    p.Category,
    p.Price,
    SUM(od.Quantity) as TotalQuantity,
    SUM(od.Quantity * p.Price) as TotalRevenue
FROM Products p
JOIN OrderDetails od ON p.ProductID = od.ProductID
GROUP BY p.ProductID, p.ProductName, p.Category, p.Price
ORDER BY TotalRevenue DESC
"""

sales_data = db.query_to_dataframe(sales_query)
display(sales_data)

In [None]:
# Visualization: Revenue by product
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.barplot(data=sales_data, x='TotalRevenue', y='ProductName')
plt.title('Revenue by Product')
plt.xlabel('Total Revenue ($)')

plt.subplot(1, 2, 2)
category_revenue = sales_data.groupby('Category')['TotalRevenue'].sum()
plt.pie(category_revenue.values, labels=category_revenue.index, autopct='%1.1f%%')
plt.title('Revenue by Category')

plt.tight_layout()
plt.show()

In [None]:
# Customer analysis
customer_orders_query = """
SELECT 
    c.CustomerName,
    c.City,
    COUNT(o.OrderID) as OrderCount,
    SUM(od.Quantity * p.Price) as TotalSpent
FROM Customers c
LEFT JOIN Orders o ON c.CustomerID = o.CustomerID
LEFT JOIN OrderDetails od ON o.OrderID = od.OrderID
LEFT JOIN Products p ON od.ProductID = p.ProductID
GROUP BY c.CustomerID, c.CustomerName, c.City
ORDER BY TotalSpent DESC
"""

customer_analysis = db.query_to_dataframe(customer_orders_query)
display(customer_analysis)

In [None]:
# Close database connection
db.close()
print("🔌 Database connection closed")