In [0]:
%sql
SELECT * FROM silver.sales_2k
WHERE region = 'South'


OrderID,OrderDate,CustomerID,CustomerName,ProductID,ProductName,Category,SubCategory,Quantity,UnitPrice,TotalAmount,Region
12,2024-11-28,C7377,Matthew Parker,P7700,Office Chair,Furniture,Tables,8,890.58,7124.64,South
15,2024-10-30,C4738,David Horton,P4655,Printer,Electronics,Computers,2,755.93,1511.86,South
20,2025-04-17,C7273,Lisa Goodman,P7777,Monitor,Electronics,Peripherals,2,317.39,634.78,South
22,2024-10-14,C8356,Steven Mcbride,P9631,Webcam,Electronics,Computers,3,577.57,1732.71,South
27,2025-02-09,C9664,Kevin Daugherty,P4686,Headphones,Accessories,Audio,8,594.79,4758.32,South
28,2024-11-02,C5097,Leslie Johnston,P6673,Keyboard,Accessories,Input Devices,8,108.44,867.52,South
34,2024-08-23,C2871,Omar Rogers,P4044,Monitor,Electronics,Peripherals,4,481.49,1925.96,South
37,2024-12-15,C6988,Miss Sonya Brown DDS,P5301,Mouse,Accessories,Audio,9,378.77,3408.93,South
40,2024-12-02,C2135,Derrick Weiss,P1763,USB Drive,Accessories,Storage Devices,6,178.52,1071.12,South
43,2024-11-15,C7216,Kelly Santiago,P8141,Bookshelf,Furniture,Tables,1,436.53,436.53,South


Databricks visualization. Run in Databricks to view.

## Sales over the period of year where we can see a decline in sales which tells the need of investigation in customer behaviour and trends

In [0]:
%sql
SELECT 
  DATE_TRUNC('month', OrderDate) AS month,
  SUM(TotalAmount) AS total_sales
FROM silver.sales_2k
GROUP BY 1
ORDER BY 1


month,total_sales
2024-06-01T00:00:00.000Z,2975035.3899999973
2024-07-01T00:00:00.000Z,4719052.829999999
2024-08-01T00:00:00.000Z,4622362.570000004
2024-09-01T00:00:00.000Z,4682603.240000001
2024-10-01T00:00:00.000Z,4801087.530000003
2024-11-01T00:00:00.000Z,4621143.399999996
2024-12-01T00:00:00.000Z,4885781.960000011
2025-01-01T00:00:00.000Z,4593673.190000002
2025-02-01T00:00:00.000Z,4193491.570000003
2025-03-01T00:00:00.000Z,4739078.989999994


Databricks visualization. Run in Databricks to view.

## Knowing the top selling products 

In [0]:
%sql
select ProductName , 
SUM(TotalAmount) as total_sales
from silver.sales_2k
group by ProductName
order by total_sales desc
limit 10

ProductName,total_sales
Headphones,4879957.299999997
Bookshelf,4723274.889999993
Desk,4596539.670000001
Office Chair,4591802.759999999
Table Lamp,4561252.570000003
Keyboard,4538122.489999999
USB Drive,4536790.12
Mouse,4454574.329999993
Monitor,3844119.120000001
Webcam,3787194.0100000016


Databricks visualization. Run in Databricks to view.

## Sales by category  and sub-category

In [0]:
%sql
SELECT 
  Category,
  SubCategory,
  SUM(TotalAmount) AS total_sales
FROM silver.sales_2k
WHERE SubCategory != 'Unknown' AND TotalAmount > 2000
GROUP BY Category, SubCategory
ORDER BY SUM(TotalAmount) DESC

Category,SubCategory,total_sales
Accessories,Input Devices,5419969.879999995
Electronics,Peripherals,5385899.920000007
Furniture,Storage,5245206.790000008
Furniture,Seating,5229691.420000002
Electronics,Computers,5214331.880000006
Accessories,Storage Devices,5094123.619999996
Furniture,Tables,5078722.559999994
Electronics,Mobile Devices,4957146.92
Accessories,Audio,4888878.420000003


Databricks visualization. Run in Databricks to view.

## Regional Sales Distribution

In [0]:
%sql
SELECT 
  region,
  SUM(TotalAmount) AS total_sales
FROM silver.sales_2k
GROUP BY region
ORDER BY total_sales DESC


region,total_sales
East,14313926.94999999
South,14031896.57000001
West,13953700.170000017
North,13695282.900000053


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
describe gold.sales_monthly 

col_name,data_type,comment
month_key,string,
Region,string,
Category,string,
order_count,bigint,
total_sales,double,
avg_order_value,double,
# Partition Information,,
# col_name,data_type,comment
month_key,string,


## Customers purchase frequency 

In [0]:
%sql
SELECT 
  order_count,
  COUNT(*) AS customer_count
FROM (
  SELECT 
    CustomerID,
    COUNT(DISTINCT orderID) AS order_count
  FROM silver.sales_2k
  GROUP BY CustomerID
) AS customer_orders
GROUP BY order_count
ORDER BY order_count


order_count,customer_count
1,2200
2,2366
3,1754
4,983
5,450
6,203
7,46
8,7
9,2
10,1


In [0]:
%sql
SELECT 
  CASE 
    WHEN order_count BETWEEN 1 AND 3 THEN '1–3 orders'
    WHEN order_count BETWEEN 4 AND 6 THEN '4–6 orders'
    ELSE '7+ orders'
  END AS order_range,
  COUNT(*) AS customer_count
FROM (
  SELECT 
    customerID,
    COUNT(DISTINCT orderID) AS order_count
  FROM silver.sales_2k
  GROUP BY customerID
) AS customer_orders
GROUP BY order_range
ORDER BY 
  CASE 
    WHEN order_range = '1–3 orders' THEN 1
    WHEN order_range = '4–6 orders' THEN 2
    ELSE 3
  END


order_range,customer_count
1–3 orders,6320
4–6 orders,1636
7+ orders,56


Databricks visualization. Run in Databricks to view.

## Top customer spendings for early access discounts or products


In [0]:
%sql
SELECT 
  customerID,
  COUNT(DISTINCT orderID) AS total_orders,
  SUM(TotalAmount) AS total_spent
FROM silver.sales_2k
GROUP BY customerID
ORDER BY total_spent DESC
LIMIT 10


customerID,total_orders,total_spent
C5150,8,38617.100000000006
C6955,6,37114.11
C9957,6,35930.35
C2068,9,34395.88
C6393,5,33379.03
C2357,9,32719.02
C5126,8,32685.320000000003
C4945,6,31711.17
C1759,6,30758.72
C2054,7,30756.950000000004


Databricks visualization. Run in Databricks to view.

## Average Order Value

In [0]:
%sql
SELECT 
  DATE_FORMAT(OrderDate, 'yyyy-MM') AS month,
  SUM(TotalAmount) / COUNT(DISTINCT OrderID) AS avg_order_value
FROM silver.sales_2k
GROUP BY DATE_FORMAT(OrderDate, 'yyyy-MM')
ORDER BY month


month,avg_order_value
2024-06,2844.2020936902454
2024-07,2819.027974910397
2024-08,2777.8621213942347
2024-09,2855.2458780487777
2024-10,2765.603415898617
2024-11,2792.231661631417
2024-12,2809.535342150663
2025-01,2787.423052184468
2025-02,2707.225029051003
2025-03,2805.849017169924


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
SELECT 
  DATE_FORMAT(OrderDate, 'yyyy-MM') AS month,
  ROUND(SUM(TotalAmount) / COUNT(DISTINCT OrderID), 2) AS latest_aov
FROM silver.sales_2k
GROUP BY month
ORDER BY month DESC
LIMIT 1


month,latest_aov
2025-06,2668.05


Databricks visualization. Run in Databricks to view.

## Quantity ~ TotalSales ~ Day of the week

In [0]:
%sql
SELECT 
  date_format(OrderDate, 'EEEE') AS day_of_week,  
  COUNT(OrderID) AS order_count,
  SUM(Quantity) AS total_quantity,
  SUM(TotalAmount) AS total_sales
FROM silver.sales_2k
GROUP BY 
  date_format(OrderDate, 'EEEE')


day_of_week,order_count,total_quantity,total_sales
Tuesday,2762,15023,7796889.860000007
Saturday,2841,15840,8050475.380000001
Thursday,2944,16221,8377894.650000011
Friday,2907,16225,8221742.260000006
Sunday,2817,15622,7793449.250000002
Wednesday,2814,15213,7811416.4499999955
Monday,2915,15736,7942938.739999993


Databricks visualization. Run in Databricks to view.

## Product peformance by the region

In [0]:
%sql
SELECT 
  ProductName,
  region,
  SUM(TotalAmount) AS total_sales
FROM silver.sales_2k
GROUP BY ProductName, region


ProductName,region,total_sales
USB Drive,South,1139366.87
Office Chair,South,1179038.35
Table Lamp,East,1145568.8300000012
Printer,North,948892.32
Desk,East,1145457.2700000007
Smartphone,North,857351.5900000007
Bookshelf,North,1076271.16
Laptop,North,882445.8799999998
Desk,South,1289125.0000000002
Bookshelf,West,1186954.0600000003


Databricks visualization. Run in Databricks to view.

## Discount Impact Analysis
### the discount is applied to the products at the same rate so we will check if the lowered price items sell more
### depending on this data there is very less impact of dicounting 

In [0]:
%sql
SELECT 
CORR(unitprice, quantity) AS price_quantity_corr,
  FLOOR(unitprice * 2) / 2 AS price_bucket, 
  SUM(quantity) AS total_quantity_sold
FROM silver.sales_2k
WHERE quantity > 0
GROUP BY price_bucket
ORDER BY price_bucket

limit 50


price_quantity_corr,price_bucket,total_quantity_sold
0.1280719864416188,20.0,59
0.4798709064331247,20.5,40
0.2271422686476346,21.0,39
-0.2406831457891013,21.5,56
0.5254910318244435,22.0,67
-0.0474036867601324,22.5,59
-0.2347123722235741,23.0,92
-0.5710235890668331,23.5,40
0.0268550715528947,24.0,61
0.3295104686782301,24.5,67


Databricks visualization. Run in Databricks to view.

In [0]:
%sql
WITH first_orders AS (
  SELECT customerID, MIN(OrderDate) AS first_order_date
  FROM silver.sales_2k
  GROUP BY customerID
),
tagged_orders AS (
  SELECT 
    s.customerID,
    s.orderID,
    s.OrderDate,
    s.TotalAmount,
    CASE 
      WHEN s.OrderDate = f.first_order_date THEN 'New'
      ELSE 'Returning'
    END AS customer_type
  FROM silver.sales_2k s
  JOIN first_orders f ON s.customerID = f.customerID
)

SELECT 
  customer_type,
  COUNT(DISTINCT customerID) AS customer_count,
  COUNT(orderID) AS total_orders,
  SUM(TotalAmount) AS total_revenue
FROM tagged_orders
GROUP BY customer_type


customer_type,customer_count,total_orders,total_revenue
Returning,5810,11972,33702505.27999996
New,8012,8028,22292301.309999976


Databricks visualization. Run in Databricks to view.