** Project Title** Retail Sales Analysis
Level: Beginner
Database: p1_retail_db
This project is made to practice SQL skills and techniques that are used by data analysts to explore, clean, and analyze retail sales data. This project involves setting up a retail sales database, performing exploratory data analysis(EDA), and answering specific business questions using SQL queries.
- Setup a retail sales database: Create and populate a retail sales database with provided sales data
- Data Cleaning Identify and remove any records with missing or null values
- Exploratory Data Analysis (EDA): Perform basic exploratory data analysis to understand the dataset
- Business Analysis: Use SQL to answer specific business questions and derive insights from sales data
- Database Creation: The Project starts by creating a databse named
p1_retail_db. - Table Creation: A table named
retail_salesis created to store the sales data. The table structure includes columns for transaction ID, sale date, sale time, customer ID, gender, age, product catefory, quantity sold, price per unit, cost of goods sold (COGS), and total sale amount.
CREATE TABLE retail_sales
(
transactions_id INT,
sale_date DATE,
sale_time TIME,
customer_id INT,
gender VARCHAR(25),
age INT,
category VARCHAR(30),
quantity INT,
price_per_unit FLOAT,
cogs FLOAT,
total_sale FLOAT
);
SELECT * FROM retail_sales
LIMIT 10;
-- checking how many records are in database
SELECT COUNT(*) FROM retail_sales
-- Checking for null values
SELECT * FROM retail_sales
WHERE
transactions_id IS NULL
OR
sale_date IS NULL
OR
sale_time IS NULL
OR
customer_id IS NULL
OR
gender IS NULL
OR
age IS NULL
OR
category IS NULL
OR
quantity IS NULL
OR
price_per_unit IS NULL
OR
cogs IS NULL
OR
total_sale IS NULL
-- Data Cleaning
DELETE FROM retail_sales
WHERE
transactions_id IS NULL
OR
sale_date IS NULL
OR
sale_time IS NULL
OR
customer_id IS NULL
OR
gender IS NULL
OR
age IS NULL
OR
category IS NULL
OR
quantity IS NULL
OR
price_per_unit IS NULL
OR
cogs IS NULL
OR
total_sale IS NULL
-- Data exploration:
-- How many sales have been made?
SELECT COUNT(*) AS total_sale FROM retail_sales
-- How many customers are present in data?
SELECT COUNT(DISTINCT customer_id) AS total_sale FROM retail_sales
-- we use distinct to only count unique ids, as one customer may have visted
-- multiple times
-- what categories are customers purchasing?
SELECT DISTINCT category FROM retail_sales
-- Data Analysis & Business Problems and Answers:
-- Q.1 Write a sql query to retrieve all columns and sales made on '2022-11-05':
SELECT * FROM retail_sales
WHERE sale_date = '2022-11-05';
-- Q.2 write a SQL query where to retireve all transactions where the category is clothing and the
-- quantity sold is more than 4 in the month of Nov-2022
SELECT
*
FROM retail_sales
WHERE category = 'Clothing'
AND
TO_CHAR(sale_date, 'YYYY-MM') = '2022-11'
AND
quantity >= 4
-- Q.3 Write a sql query to calculate total sales for each caegory
SELECT
category,
SUM(total_sale) as net_sale,
COUNT(*) as total_orders
FROM retail_sales
GROUP BY 1
-- Q.4 Write a sql query to find average age of customers who purchased from the beauty category
SELECT
ROUND(AVG(age), 2) as avg_age
FROM retail_sales
WHERE category = 'Beauty'
-- Q.5 write a sql query to find all transactions where total_sale is greater than 1000
SELECT * FROM retail_sales
WHERE total_sale > 1000
-- Q.6 write a sql query to find total num of transactions made by each gender in each category
SELECT
category,
gender,
COUNT(*) as total_trans
FROM retail_sales
GROUP
BY
category,
gender
ORDER BY 1
-- Q.7 write a sql query to calculate the average sale for each month. Find the best selling month of the year
SELECT
year,
month,
avg_sale
FROM
(
SELECT
EXTRACT(YEAR FROM sale_date) as year,
EXTRACT(MONTH FROM sale_date) as month,
AVG(total_sale) as avg_sale,
RANK() OVER(PARTITION BY EXTRACT (YEAR FROM sale_date) ORDER BY AVG(total_sale)DESC) as rank
FROM retail_sales
GROUP BY 1,2
) as t1
WHERE rank = 1
-- Q.8 write a sql query to find top 5 customers based on highest total sales
SELECT
customer_id,
SUM(total_sale) as total_sales
FROM retail_sales
GROUP BY 1
ORDER BY 2 DESC
LIMIT 5
-- Q.9 write a sql query to find the number of unique customers who purchased items from each category
SELECT
category,
COUNT(DISTINCT customer_id) AS unique_cust
FROM retail_sales
GROUP BY category
-- Q.10 write a sql query to create each shift and number of order (EX: morning <= 12, Afternoon Between 12 and 17, Evening >17)
WITH hourly_sale
AS
(
SELECT *,
CASE
WHEN EXTRACT(HOUR FROM sale_time) < 12 THEN 'Morning'
WHEN EXTRACT(HOUR FROM sale_time) BETWEEN 12 AND 17 THEN 'Afternoon'
ELSE 'Evening'
END as shift
FROM retail_sales
)
SELECT
shift,
COUNT(*) as total_orders
FROM hourly_sale
GROUP BY shift