In [1]:
############### Customer Report
# Importing Necessary libraries
import pyodbc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import urllib
from sqlalchemy import create_engine

server = 'LAPTOP-LFBT0G3K'
database = 'DataWarehouse'
driver = '{ODBC Driver 18 for SQL Server}'

In [None]:
try:
    quoted_driver = urllib.parse.quote_plus(driver)
    connection_uri = (
        f'mssql+pyodbc:///?odbc_connect='
        f'DRIVER={quoted_driver};'
        f'SERVER={server};'
        f'DATABASE={database};'
        f'Trusted_Connection=yes;'
        f'Encrypt=yes;'
        f'TrustServerCertificate=yes;'
    )
    sql_analysis_query="""
                            WITH base_query AS(
                            -- 1) Base Query: Retrieves core columns from tables
                                SELECT
                                    f.order_number,
                                    f.product_key,
                                    f.order_date,
                                    f.sales_amount,
                                    f.quantity,
                                    c.customer_key,
                                    CONCAT(c.first_name, ' ', c.last_name) AS customer_name,
                                    DATEDIFF(year, c.birthdate, GETDATE()) age
                                
                                FROM gold.fact_sales f
                                LEFT JOIN gold.dim_customer c
                                
                                ON c.customer_key = f.customer_key
                                WHERE order_date IS NOT NULL),

                            customer_aggregation AS (
                            -- 2) Customer Aggregations: Summarizes key metrics at the customer level
                                SELECT
                                    customer_key,
                                    customer_name,
                                    age,
                                    COUNT(DISTINCT order_number) AS total_orders,
                                    SUM(sales_amount) AS total_sales,
                                    SUM(quantity) AS total_quantity,
                                    COUNT(DISTINCT product_key) AS total_products,
                                    MAX(order_date) AS last_order_date,
                                    DATEDIFF(month, MIN(order_date), MAX(order_date)) AS lifespan

                                FROM base_query
                                GROUP BY
                                    customer_key,
                                    customer_name,
                                    age
                                )

                            SELECT
                                customer_key,
                                customer_name,
                                age,
                                
                                CASE
                                    WHEN age < 20 THEN 'Under 20'
                                    WHEN age between 20 and 29 THEN '20-29'
                                    WHEN age between 30 and 39 THEN '30-39'
                                    WHEN age between 40 and 49 THEN '40-49'
                                    ELSE '50 and above'
                                END AS age_group,
                                
                                CASE
                                    WHEN lifespan >= 12 AND total_sales > 5000 THEN 'VIP'
                                    WHEN lifespan >= 12 AND total_sales <= 5000 THEN 'Regular'
                                    ELSE 'New'
                                END AS customer_segment,
                                
                                last_order_date,
                                DATEDIFF(month, last_order_date, GETDATE()) AS recency,
                                total_orders,
                                total_sales,
                                total_quantity,
                                total_products
                                lifespan,

                            -- Compute average order value (AVO)
                            CASE WHEN total_sales = 0 THEN 0
                                ELSE total_sales / total_orders
                            END AS avg_order_value,

                            -- Compute average monthly spend
                            CASE WHEN lifespan = 0 THEN total_sales
                                ELSE total_sales / lifespan
                            END AS avg_monthly_spend
                            FROM customer_aggregation;
    """
    print(f"Attempting to connect to {server}/{database} using driver: {driver}")
    engine = create_engine(connection_uri)
    print("Successfully connected to SQL Server!")
    
    df_customer_report=pd.read_sql(sql_analysis_query,engine)


except Exception as e:
    print(f"An unexpected error occurred: {e}")

Attempting to connect to LAPTOP-LFBT0G3K/DataWarehouse using driver: {ODBC Driver 18 for SQL Server}
Successfully connected to SQL Server!


In [3]:
# Test for Data
print('first 5 rows fo sales analysis Data : ')
print(df_customer_report.head())
print('\nSales Analysis Data info : ')
print(df_customer_report.info())

first 5 rows fo sales analysis Data : 


NameError: name 'df_customer_report' is not defined