In [None]:
import pandas as pd
import subprocess
from sqlalchemy import create_engine, text
import os

DB_NAME = 'dealership_final_db'
DB_USER = 'student'
DB_HOST = 'localhost'
DB_PORT = '5432'

# Set the path to your data folder containing the cleaned CSV files
DATA_PATH = '/workspaces/Fall2025-MS3083-Base_Template/data/'

connection_string = f'postgresql://{DB_USER}@{DB_HOST}:{DB_PORT}/{DB_NAME}'

print(f"✓ Data path: {DATA_PATH}")
print(f"✓ Database will be created: {DB_NAME}")
print(f"✓ Connection parameters set")

# Verify data path exists
if os.path.exists(DATA_PATH):
    print(f"✓ Data folder verified and accessible")
else:
    print(f"⚠ Warning: Data folder not found at {DATA_PATH}")

**Instructions:** Drop any existing database and create a fresh database for the project.
- Use subprocess to run the `dropdb` command with `--if-exists` flag to remove any existing database
- Use subprocess to run the `createdb` command to create a new database
- Check the return code to verify success
- If successful, create a SQLAlchemy engine and print confirmation messages
- If there's an error, print the error message

In [None]:
# Drop and create database
subprocess.run(['dropdb', '--if-exists', DB_NAME, '-U', DB_USER], capture_output=True)
result = subprocess.run(['createdb', DB_NAME, '-U', DB_USER], capture_output=True, text=True)

if result.returncode == 0:
    print(f"✓ Database '{DB_NAME}' created")
    engine = create_engine(connection_string)
    print("✓ Connection established")
else:
    print(f"Error: {result.stderr}")

## Part 2: Create Tables

**Instructions:** Create the customers table with appropriate columns and data types.
- Write a CREATE TABLE statement for the customers table
- Include columns: customer_id (SERIAL PRIMARY KEY), first_name, last_name, full_name, email, phone, state, zip_code, registration_date
- Use appropriate data types: VARCHAR for text fields, DATE for dates
- Set NOT NULL constraints where appropriate
- Execute the SQL statement using the engine connection
- Commit the transaction and print a confirmation message

In [None]:
# Create customers table
create_customers = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_customers))
    conn.commit()
    
print("✓ customers table created")

**Instructions:** Create the salespeople table to store employee information.
- Write a CREATE TABLE statement for the salespeople table
- Include columns: salesperson_id (SERIAL PRIMARY KEY), salesperson_name, hire_date, email, phone, commission_rate, department, status
- Use DECIMAL(4,3) for commission_rate to store percentages with precision
- Execute the SQL and commit the transaction
- Print a confirmation message

In [None]:
# Create salespeople table
create_salespeople = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_salespeople))
    conn.commit()
    
print("✓ salespeople table created")

**Instructions:** Create the vehicles table to store vehicle inventory.
- Write a CREATE TABLE statement for the vehicles table
- Include columns: vehicle_id (SERIAL PRIMARY KEY), vin (UNIQUE), make, model, year, color, mileage, condition, purchase_price, lot_date
- Set vin as UNIQUE and NOT NULL
- Use INTEGER for year and mileage, DECIMAL(10,2) for purchase_price
- Execute the SQL and commit
- Print a confirmation message

In [None]:
# Create vehicles table
create_vehicles = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_vehicles))
    conn.commit()
    
print("✓ vehicles table created")

**Instructions:** Create the sales table to store sales transactions.
- Write a CREATE TABLE statement for the sales table
- Include columns: sale_id (SERIAL PRIMARY KEY), customer_name, vehicle_make, vehicle_model, sale_date, sale_price, salesperson, payment_method, trade_in_value, year
- Use appropriate data types: VARCHAR for text fields, DATE for sale_date, DECIMAL(10,2) for prices, INTEGER for year
- Set NOT NULL for key fields like sale_date and sale_price
- Execute the SQL and commit
- Print a confirmation message

In [None]:
# Create sales table
create_sales = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_sales))
    conn.commit()
    
print("✓ sales table created")

**Instructions:** Create the service_records table to store vehicle service history.
- Write a CREATE TABLE statement for the service_records table
- Include columns: service_id (SERIAL PRIMARY KEY), vin, service_date, service_type, mechanic_name, labor_cost, parts_cost, notes
- Use appropriate data types: VARCHAR for text, DATE for service_date, DECIMAL(8,2) for costs, TEXT for notes
- Set service_date as NOT NULL
- Execute the SQL and commit
- Print a confirmation message

In [None]:
# Create service_records table
create_service = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_service))
    conn.commit()
    
print("✓ service_records table created")

**Instructions:** Create the financing table to store loan and financing information.
- Write a CREATE TABLE statement for the financing table
- Include columns: financing_id (SERIAL PRIMARY KEY), sale_id, lender_name, loan_amount, interest_rate, term_months, monthly_payment, approval_date, down_payment
- Use DECIMAL(10,2) for money amounts, DECIMAL(4,2) for interest_rate, INTEGER for term_months, DATE for approval_date
- Execute the SQL and commit
- Print a confirmation message

In [None]:
# Create financing table
create_financing = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_financing))
    conn.commit()
    
print("✓ financing table created")

**Instructions:** Create the warranties table to store warranty coverage details.
- Write a CREATE TABLE statement for the warranties table
- Include columns: warranty_id (SERIAL PRIMARY KEY), vehicle_id, warranty_type, provider, start_date, end_date, coverage_amount, deductible, status
- Use appropriate data types: VARCHAR for text fields, DATE for dates, DECIMAL for monetary values
- Execute the SQL and commit
- Print confirmation message indicating all 7 tables are created

In [None]:
# Create warranties table
create_warranties = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    conn.execute(text(create_warranties))
    conn.commit()
    
print("✓ warranties table created")
print("\n✓ All 7 tables created successfully!")

## Part 3: Load Data with PostgreSQL COPY

We'll use PostgreSQL's native COPY command for efficient bulk loading of CSV data.

**Instructions:** Load customer data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_customer_data.csv' using os.path.join()
- Create a COPY command string using the `\copy` syntax for client-side loading
- Specify all column names in order: customer_id, first_name, last_name, full_name, email, phone, state, zip_code, registration_date
- Use FORMAT CSV and HEADER TRUE options
- Execute the COPY command using subprocess.run() with psql
- If successful, query the count of loaded records
- Reset the sequence using setval() with the MAX(customer_id) value
- Print the number of customers loaded
- If there's an error, print the error message

In [None]:
# Load customers using PostgreSQL COPY command
customers_file = os.path.join(DATA_PATH, 'clean_customer_data.csv')

copy_command = f"""\\copy customers(customer_id, first_name, last_name, full_name, email, phone, state, zip_code, registration_date) 
FROM '{customers_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM customers")).scalar()
        max_id = conn.execute(text("SELECT MAX(customer_id) FROM customers")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('customers_customer_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} customers using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load salesperson data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_salesperson_info.csv'
- Create a COPY command with columns: salesperson_id, salesperson_name, hire_date, email, phone, commission_rate, department, status
- Execute using subprocess.run() with psql
- Query the count and reset the sequence
- Print the number of salespeople loaded or error message

In [None]:
# Load salespeople using PostgreSQL COPY command
salespeople_file = os.path.join(DATA_PATH, 'clean_salesperson_info.csv')

copy_command = f"""\\copy salespeople(salesperson_id, salesperson_name, hire_date, email, phone, commission_rate, department, status) 
FROM '{salespeople_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM salespeople")).scalar()
        max_id = conn.execute(text("SELECT MAX(salesperson_id) FROM salespeople")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('salespeople_salesperson_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} salespeople using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load vehicle data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_vehicle_inventory.csv'
- Create a COPY command with columns: vehicle_id, vin, make, model, year, color, mileage, condition, purchase_price, lot_date
- Execute using subprocess.run() with psql
- Query the count and reset the sequence using setval()
- Print the number of vehicles loaded or error message

In [None]:
# Load vehicles using PostgreSQL COPY command
vehicles_file = os.path.join(DATA_PATH, 'clean_vehicle_inventory.csv')

copy_command = f"""\\copy vehicles(vehicle_id, vin, make, model, year, color, mileage, condition, purchase_price, lot_date) 
FROM '{vehicles_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM vehicles")).scalar()
        max_id = conn.execute(text("SELECT MAX(vehicle_id) FROM vehicles")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('vehicles_vehicle_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} vehicles using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load sales data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_dealership_sales.csv'
- Create a COPY command with columns: sale_id, customer_name, vehicle_make, vehicle_model, year, sale_date, sale_price, salesperson, payment_method, trade_in_value
- Execute using subprocess.run() with psql
- Query the count and reset the sequence
- Print the number of sales loaded or error message

In [None]:
# Load sales using PostgreSQL COPY command
sales_file = os.path.join(DATA_PATH, 'clean_dealership_sales.csv')

copy_command = f"""\\copy sales(sale_id, customer_name, vehicle_make, vehicle_model, year, sale_date, sale_price, salesperson, payment_method, trade_in_value) 
FROM '{sales_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM sales")).scalar()
        max_id = conn.execute(text("SELECT MAX(sale_id) FROM sales")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('sales_sale_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} sales using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load service records data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_service_records.csv'
- Create a COPY command with columns: service_id, vin, service_date, service_type, mechanic_name, labor_cost, parts_cost, notes
- Execute using subprocess.run() with psql
- Query the count and reset the sequence
- Print the number of service records loaded or error message

In [None]:
# Load service_records using PostgreSQL COPY command
service_file = os.path.join(DATA_PATH, 'clean_service_records.csv')

copy_command = f"""\\copy service_records(service_id, vin, service_date, service_type, mechanic_name, labor_cost, parts_cost, notes) 
FROM '{service_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM service_records")).scalar()
        max_id = conn.execute(text("SELECT MAX(service_id) FROM service_records")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('service_records_service_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} service_records using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load financing data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_financing_details.csv'
- Create a COPY command with columns: financing_id, sale_id, lender_name, loan_amount, interest_rate, term_months, monthly_payment, approval_date, down_payment
- Execute using subprocess.run() with psql
- Query the count and reset the sequence
- Print the number of financing records loaded or error message

In [None]:
# Load financing using PostgreSQL COPY command
financing_file = os.path.join(DATA_PATH, 'clean_financing_details.csv')

copy_command = f"""\\copy financing(financing_id, sale_id, lender_name, loan_amount, interest_rate, term_months, monthly_payment, approval_date, down_payment) 
FROM '{financing_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM financing")).scalar()
        max_id = conn.execute(text("SELECT MAX(financing_id) FROM financing")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('financing_financing_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} financing records using COPY")
else:
    print(f"Error: {result.stderr}")

**Instructions:** Load warranty data using PostgreSQL's COPY command.
- Construct the full file path to 'clean_warranty_info.csv'
- Create a COPY command with columns: warranty_id, vehicle_id, warranty_type, provider, start_date, end_date, coverage_amount, deductible, status
- Execute using subprocess.run() with psql
- Query the count and reset the sequence
- Print the number of warranties loaded or error message
- Print a final success message indicating all data has been loaded

In [None]:
# Load warranties using PostgreSQL COPY command
warranties_file = os.path.join(DATA_PATH, 'clean_warranty_info.csv')

copy_command = f"""\\copy warranties(warranty_id, vehicle_id, warranty_type, provider, start_date, end_date, coverage_amount, deductible, status) 
FROM '{warranties_file}' 
WITH (FORMAT CSV, HEADER TRUE);"""

result = subprocess.run(
    ['psql', '-d', DB_NAME, '-U', DB_USER],
    input=copy_command,
    capture_output=True,
    text=True
)

if result.returncode == 0:
    with engine.connect() as conn:
        count = conn.execute(text("SELECT COUNT(*) FROM warranties")).scalar()
        max_id = conn.execute(text("SELECT MAX(warranty_id) FROM warranties")).scalar()
        if max_id is not None:
            conn.execute(text(f"SELECT setval('warranties_warranty_id_seq', {max_id});"))
        conn.commit()
    print(f"✓ Loaded {count} warranties using COPY")
    print("\n✓ All data loaded successfully!")
else:
    print(f"Error: {result.stderr}")

## Part 4: Basic SELECT Queries

**Instructions:** Write a SELECT query to list all active salespeople.
- Select columns: salesperson_id, salesperson_name, hire_date, email, commission_rate
- Filter WHERE status = 'active'
- ORDER BY hire_date
- Use pd.read_sql() to execute the query and display results

In [None]:
# Exercise 4.1: List active salespeople
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Active Salespeople:")
print(result)

**Instructions:** Write a SELECT query with JOIN to list vehicles with active warranties.
- Join vehicles and warranties tables on vehicle_id
- Select: make, model, year, warranty_type, end_date
- Filter WHERE status = 'active'
- ORDER BY end_date
- Execute and display results

In [None]:
# Exercise 4.2: List vehicles with active warranties
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Vehicles with Active Warranties:")
print(result)

## Part 5: Multi-Table JOINs

**Instructions:** Write a complex query joining sales and financing tables to show complete sales information.
- Join sales and financing tables on sale_id
- Select: sale_id, sale_date, customer_name, vehicle_make || ' ' || vehicle_model AS vehicle, salesperson, sale_price, lender_name, loan_amount, interest_rate
- Use LEFT JOIN for financing (some sales may not have financing)
- ORDER BY sale_date DESC
- Display the first 10 results

In [None]:
# Exercise 5.1: Complete sales information with financing
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Complete Sales with Financing:")
print(result.head(10))

**Instructions:** Write a query to show complete vehicle service history.
- Join vehicles and service_records tables on vin
- Select: v.vin, make, model, year, service_date, service_type, mechanic_name
- Calculate total_cost as labor_cost + parts_cost
- ORDER BY vin, then service_date
- Display first 15 results

In [None]:
# Exercise 5.2: Vehicle service history
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Vehicle Service History:")
print(result.head(15))

## Part 6: Aggregate Functions and GROUP BY

**Instructions:** Write a query analyzing salesperson performance with commissions.
- Join salespeople and sales tables on salesperson name
- GROUP BY salesperson_name and commission_rate
- Calculate: COUNT sale_id as total_sales, SUM(sale_price) as total_revenue, AVG(sale_price) as avg_sale
- Calculate total_commission as SUM(sale_price * commission_rate)
- Filter WHERE status = 'active'
- ORDER BY total_revenue DESC
- Execute and display all results

In [None]:
# Exercise 6.1: Salesperson performance with commission
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Salesperson Performance with Commission:")
print(result)

**Instructions:** Write a query analyzing service revenue by mechanic.
- Use service_records table
- GROUP BY mechanic_name
- Calculate: COUNT(service_id), SUM(labor_cost), SUM(parts_cost), SUM(labor_cost + parts_cost) as total_revenue, AVG(labor_cost + parts_cost) as avg_service_cost
- ORDER BY total_revenue DESC
- Execute and display results

In [None]:
# Exercise 6.2: Service costs by mechanic
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Service Revenue by Mechanic:")
print(result)

**Instructions:** Write a query analyzing financing by lender.
- Use financing table
- GROUP BY lender_name
- Calculate: COUNT(financing_id) as loan_count, SUM(loan_amount) as total_financed, AVG(interest_rate), AVG(term_months)
- ORDER BY total_financed DESC
- Execute and display results

In [None]:
# Exercise 6.3: Financing by lender
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Financing by Lender:")
print(result)

## Part 7: Advanced Queries with CTEs and Subqueries

**Instructions:** Write a query using CTEs to find top-performing salespeople above average.
- Create CTE 'sales_stats' to calculate sale_count and total_revenue per salesperson from sales table
- Create CTE 'avg_performance' to calculate the average revenue across all salespeople
- Join salespeople with sales_stats CTE on salesperson_name
- Cross join with avg_performance
- Calculate how much above average each top performer is
- Filter to show only those with revenue > average
- ORDER BY total_revenue DESC
- Execute and display results

In [None]:
# Exercise 7.1: Top performing salespeople (above average)
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Top Performers (Above Average):")
print(result)

**Instructions:** Write a query using CTE to find vehicles with high service costs.
- Create CTE 'vehicle_service_costs' to calculate total_service_cost (labor + parts) and service_count per vin from service_records
- Join vehicles with the CTE on vin
- Calculate cost_percentage as (total_service_cost / purchase_price * 100)
- Filter WHERE total_service_cost > 200
- ORDER BY total_service_cost DESC
- Execute and display results

In [None]:
# Exercise 7.2: Vehicles with high service costs
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Vehicles with High Service Costs:")
print(result)

## Part 8: Window Functions

**Instructions:** Write a query using window functions to rank salespeople by monthly performance.
- Use DATE_TRUNC('month', sale_date) to group by month
- Use sales table
- GROUP BY month and salesperson
- Calculate sales_count and monthly_revenue
- Use RANK() OVER (PARTITION BY month ORDER BY revenue DESC) to rank within each month
- ORDER BY month, then rank
- Display first 20 results

In [None]:
# Exercise 8.1: Ranking salespeople by monthly sales
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Monthly Sales Rankings:")
print(result.head(20))

## Part 9: UPDATE Operations

**Instructions:** Write an UPDATE statement to update warranty status for expired warranties.
- UPDATE the warranties table
- SET status = 'expired'
- WHERE end_date < CURRENT_DATE AND status = 'active'
- Execute using engine.connect() and text()
- Commit the transaction
- Print the number of rows updated (rowcount)

In [None]:
# Exercise 9.1: Expire old warranties
update_query = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    result = conn.execute(text(update_query))
    conn.commit()
    print(f"✓ Updated {result.rowcount} warranties to expired status")

**Instructions:** Write an UPDATE statement to increase commission rates for top performers.
- UPDATE the salespeople table
- SET commission_rate = commission_rate * 1.1 (10% increase)
- WHERE salesperson_name IN (subquery selecting top 2 salespeople by total sales revenue)
- Use a subquery that joins sales table, groups by salesperson, and orders by total revenue DESC with LIMIT 2
- Execute, commit, and print the rowcount

In [None]:
# Exercise 9.2: Increase commission for top performers
update_query = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    result = conn.execute(text(update_query))
    conn.commit()
    print(f"✓ Updated {result.rowcount} salespeople commission rates")

## Part 10: INSERT Operations

**Instructions:** Write an INSERT statement to add a new salesperson.
- INSERT INTO salespeople table
- Add values for: salesperson_name ('Jennifer Adams'), hire_date (CURRENT_DATE), email ('jadams@dealership.com'), phone ('555-0199'), commission_rate (0.045), department ('Sales'), status ('active')
- Use RETURNING clause to get back the salesperson_id and salesperson_name
- Execute, fetch the result, and commit
- Print the new salesperson information

In [None]:
# Exercise 10.1: Add new salesperson
insert_query = """
# YOUR CODE HERE
"""

with engine.connect() as conn:
    result = conn.execute(text(insert_query))
    new_salesperson = result.fetchone()
    conn.commit()
    print(f"✓ Added salesperson: {new_salesperson[1]} (ID: {new_salesperson[0]})")

## Part 11: Comprehensive Business Analytics

**Instructions:** Write a comprehensive query using UNION ALL to create a dealership performance report.
- Create multiple SELECT statements combined with UNION ALL
- Each SELECT should return: category, metric_value (as TEXT), metric_name
- Include metrics for:
  - Sales: total transactions, total revenue
  - Service: total services, service revenue
  - Financing: total loans, amount financed
  - Inventory: total vehicles
- Use TO_CHAR() for currency formatting
- Execute and display with formatted headers

In [None]:
# Exercise 11.1: Complete dealership performance report
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("="*80)
print("DEALERSHIP PERFORMANCE REPORT")
print("="*80)
print(result)
print("="*80)

**Instructions:** Write a query to analyze customer purchase patterns with tier classification.
- Create CTE 'customer_purchases' to calculate purchase_count and total_spent per customer from sales table (GROUP BY customer_name)
- Use CASE statement to classify customers into tiers:
  - 'Premium' if total_spent > 50000
  - 'High Value' if total_spent > 30000
  - 'Standard' if total_spent > 0
  - 'No Purchases' otherwise
- Filter to show only customers with purchases (purchase_count > 0)
- ORDER BY total_spent DESC
- Execute and display results

In [None]:
# Exercise 11.2: Customer purchase analysis
query = """
# YOUR CODE HERE
"""

result = pd.read_sql(query, engine)
print("Customer Purchase Analysis:")
print(result)

## Summary

**Congratulations! You've completed the comprehensive final project.**

### ✓ Part 1 (R) - Data Cleaning:
- Cleaned 7 messy CSV files with 100+ total records
- Applied text standardization, date parsing, missing value handling
- Calculated missing values using formulas
- Exported clean CSV files ready for database loading

### ✓ Part 2 (PostgreSQL) - Database Operations:
- Created 7-table database with proper data types and constraints
- Loaded all cleaned data using PostgreSQL COPY commands
- Performed 15+ advanced SQL exercises:
  - Basic SELECT queries with filtering
  - Multi-table JOINs (2-3 tables)
  - Aggregate functions with GROUP BY
  - CTEs and subqueries
  - Window functions and rankings
  - UPDATE and INSERT operations
  - Comprehensive business analytics with UNION ALL

### Skills Mastered:
- Data quality assessment and cleaning (R)
- Database design with appropriate table structures
- PostgreSQL COPY command for bulk data loading
- Advanced SQL query techniques
- Business intelligence and reporting
- End-to-end data workflow from messy CSVs to actionable insights

**Database Structure:**
- customers (30 records)
- salespeople (10 records)
- vehicles (25 records)
- sales (25 transactions)
- service_records (20 records)
- financing (15 records)
- warranties (20 records)

**This project demonstrates professional-level data management and SQL analysis skills!**