# Retail Sales Mini Project

In [None]:
# Retail Sales Mini Project - Module 8

## üì¶ Load Required Libraries and Data
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt

# Load CSV files
sales_df = pd.read_csv("sales.csv")
stock_df = pd.read_csv("StockDetails.csv")

## üóÑÔ∏è Create In-Memory SQLite Database
# Create SQLite DB in memory
conn = sqlite3.connect(":memory:")
sales_df.to_sql("sales", conn, index=False, if_exists="replace")
stock_df.to_sql("stockDetails", conn, index=False, if_exists="replace")
cursor = conn.cursor()

## üßÆ A. Basic SQL Queries

### 1. Stock items containing "T-LIGHT"
cursor.execute("""
SELECT * FROM stockDetails
WHERE Description LIKE '%T-LIGHT%'
""")
print(cursor.fetchall())

### 2. Total quantity sold per StockCode
cursor.execute("""
SELECT StockCode, SUM(Quantity) AS TotalQuantitySold
FROM sales
GROUP BY StockCode
""")
print(cursor.fetchall())

### 3. Total revenue per CustomerID
cursor.execute("""
SELECT CustomerID, SUM(Quantity * UnitPrice) AS TotalRevenue
FROM sales
GROUP BY CustomerID
""")
print(cursor.fetchall())

### 4. Count of distinct items per invoice
cursor.execute("""
SELECT InvoiceNo, COUNT(DISTINCT StockCode) AS DistinctItems
FROM sales
GROUP BY InvoiceNo
""")
print(cursor.fetchall())

### 5. Join sales and stock details + revenue per item
cursor.execute("""
SELECT s.StockCode, sd.Description, SUM(s.Quantity * s.UnitPrice) AS TotalRevenue
FROM sales s
INNER JOIN stockDetails sd ON s.StockCode = sd.StockCode
GROUP BY s.StockCode, sd.Description
""")
print(cursor.fetchall())

## üìä B. EDA Using Pandas

### 1. Data info, missing values, duplicates
print(sales_df.info())
print(sales_df.isnull().sum())
print(sales_df.duplicated().sum())

print(stock_df.info())
print(stock_df.isnull().sum())
print(stock_df.duplicated().sum())

### 2. Convert and extract datetime features
sales_df['InvoiceDate'] = pd.to_datetime(sales_df['InvoiceDate'])
sales_df['InvoiceDay'] = sales_df['InvoiceDate'].dt.date
sales_df['InvoiceMonth'] = sales_df['InvoiceDate'].dt.month
sales_df['InvoiceHour'] = sales_df['InvoiceDate'].dt.hour

### 3. Add TotalPrice column
sales_df['TotalPrice'] = sales_df['Quantity'] * sales_df['UnitPrice']

### 4. Top 3 bestselling items
top_items = sales_df.groupby('StockCode')['Quantity'].sum().nlargest(3)
print(top_items)

### 5. Unique customers and average quantity per invoice
unique_customers = sales_df['CustomerID'].nunique()
avg_qty_per_invoice = sales_df.groupby('InvoiceNo')['Quantity'].sum().mean()
print("Unique Customers:", unique_customers)
print("Average Quantity per Invoice:", avg_qty_per_invoice)

## üìà C. Visualisations

### 1. Bar chart of top 10 items by quantity sold
top10 = sales_df.groupby('StockCode')['Quantity'].sum().nlargest(10)
top10.plot(kind='bar', figsize=(10,6), title='Top 10 Items by Quantity Sold')
plt.ylabel("Quantity")
plt.show()

### 2. Total sales per hour
hourly_sales = sales_df.groupby('InvoiceHour')['TotalPrice'].sum()
hourly_sales.plot(kind='bar', figsize=(10,6), title='Total Sales by Hour')
plt.ylabel("Revenue")
plt.show()

### 3. Pie chart for top 5 customers by revenue
top5_customers = sales_df.groupby('CustomerID')['TotalPrice'].sum().nlargest(5)
top5_customers.plot(kind='pie', autopct='%1.1f%%', figsize=(8,8), title='Revenue Share of Top 5 Customers')
plt.ylabel("")
plt.show()

### 4. Monthly revenue trend line
monthly_revenue = sales_df.groupby('InvoiceMonth')['TotalPrice'].sum()
monthly_revenue.plot(kind='line', marker='o', figsize=(10,6), title='Monthly Revenue Trend')
plt.ylabel("Revenue")
plt.show()

### 5. Stacked bar chart of top 5 invoices by revenue
invoice_total = sales_df.groupby('InvoiceNo')['TotalPrice'].sum().nlargest(5).index
invoice_items = sales_df[sales_df['InvoiceNo'].isin(invoice_total)]
pivot_table = invoice_items.pivot_table(index='InvoiceNo', columns='StockCode', values='TotalPrice', aggfunc='sum', fill_value=0)
pivot_table.plot(kind='bar', stacked=True, figsize=(12,6), title='Top 5 Invoices by Revenue')
plt.ylabel("Revenue")
plt.show()

## üí° D. Business Insights

### 1. Highest revenue product
top_product = sales_df.groupby('StockCode')['TotalPrice'].sum().idxmax()
print("Top Revenue Product:", top_product)

### 2. Average order value
avg_order_value = sales_df.groupby('InvoiceNo')['TotalPrice'].sum().mean()
print("Average Order Value:", avg_order_value)

### 3. Customer with most transactions
top_customer = sales_df['CustomerID'].value_counts().idxmax()
print("Customer with Most Transactions:", top_customer)

### 4. % of stock items sold
sold_items = sales_df['StockCode'].nunique()
total_stock = stock_df['StockCode'].nunique()
percentage_sold = (sold_items / total_stock) * 100
print(f"Percentage of Stock Items Sold: {percentage_sold:.2f}%")

### 5. Sales products not in stock file
missing_items = sales_df[~sales_df['StockCode'].isin(stock_df['StockCode'])]['StockCode'].nunique()
print("Unmatched StockCodes in Sales:", missing_items)
