<a href="https://colab.research.google.com/github/Dee-Nwanjah/SQL-Database-Fundamental-Projects/blob/main/4.)Revenue_Analysis_with_Date_Functions_%26_CTEs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# =============================================================================
# SETUP
# =============================================================================

# Setup and Data Creation

!pip install ipython-sql pandas numpy

import pandas as pd
import numpy as np
import sqlite3
from IPython import get_ipython
from datetime import datetime, timedelta

# Create database connection
conn = sqlite3.connect('revenue_analysis.db')
print("✅ Database connection created!")

# Load SQL magic
get_ipython().run_line_magic('load_ext', 'sql')
get_ipython().run_line_magic('sql', 'sqlite:///revenue_analysis.db')
print("✅ SQL magic loaded!")

# =============================================================================
# CREATING AN ENHANCED DATA FOR REVENUE ANALYSIS
# =============================================================================

print("📊 Creating revenue analysis data...")
np.random.seed(42)

# Create date range for better analysis (1 year of data)
date_range = pd.date_range('2023-01-01', '2024-01-31', freq='D')

# Products data
products_data = {
    'product_id': range(1, 101),
    'product_name': [f'Product_{i}' for i in range(1, 101)],
    'category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Home', 'Sports'], 100),
    'price': np.round(np.random.uniform(10, 500, 100), 2)
}

# Customers with registration dates
customers_data = {
    'customer_id': range(1, 501),
    'first_name': [f'Customer_{i}' for i in range(1, 501)],
    'last_name': [f'LastName_{i}' for i in range(1, 501)],
    'city': np.random.choice(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'], 500),
    'registration_date': np.random.choice(pd.date_range('2023-01-01', '2024-01-31'), 500)
}

# Orders with realistic date distribution (more recent orders)
order_dates = []
for _ in range(5000):
    if np.random.random() < 0.3:  # 30% recent orders
        order_dates.append(np.random.choice(pd.date_range('2024-01-01', '2024-01-31')))
    else:  # 70% distributed across the year
        order_dates.append(np.random.choice(date_range))

orders_data = {
    'order_id': range(1, 5001),
    'customer_id': np.random.randint(1, 501, 5000),
    'product_id': np.random.randint(1, 101, 5000),
    'order_date': order_dates,
    'quantity': np.random.randint(1, 5, 5000),
    'total_amount': np.round(np.random.uniform(20, 800, 5000), 2)
}

# Create DataFrames and save to database
products_df = pd.DataFrame(products_data)
customers_df = pd.DataFrame(customers_data)
orders_df = pd.DataFrame(orders_data)

# Convert dates to string format for SQLite
customers_df['registration_date'] = pd.to_datetime(customers_df['registration_date']).dt.strftime('%Y-%m-%d')
orders_df['order_date'] = pd.to_datetime(orders_df['order_date']).dt.strftime('%Y-%m-%d')

# Save to database
products_df.to_sql('products', conn, if_exists='replace', index=False)
customers_df.to_sql('customers', conn, if_exists='replace', index=False)
orders_df.to_sql('orders', conn, if_exists='replace', index=False)

print("✅ Revenue analysis data created!")
print(f"   - Products: {len(products_df)} records")
print(f"   - Customers: {len(customers_df)} records")
print(f"   - Orders: {len(orders_df)} records")
print(f"   - Date range: {orders_df['order_date'].min()} to {orders_df['order_date'].max()}")

# Test the setup
test_result = pd.read_sql("SELECT COUNT(*) as total_orders FROM orders", conn)
print(f"✅ Setup verified: {test_result.iloc[0]['total_orders']} orders ready for analysis")

✅ Database connection created!
The sql extension is already loaded. To reload it, use:
  %reload_ext sql
✅ SQL magic loaded!
📊 Creating revenue analysis data...
✅ Revenue analysis data created!
   - Products: 100 records
   - Customers: 500 records
   - Orders: 5000 records
   - Date range: 2023-01-01 to 2024-01-31
✅ Setup verified: 5000 orders ready for analysis


In [12]:
import pandas as pd

# Basic Date Functions
sql_query_basic_date_functions = """
SELECT
    order_date,
    strftime('%Y', order_date) as year,
    strftime('%m', order_date) as month,
    strftime('%d', order_date) as day,
    strftime('%w', order_date) as day_of_week,
    CASE strftime('%w', order_date)
        WHEN '0' THEN 'Sunday'
        WHEN '1' THEN 'Monday'
        WHEN '2' THEN 'Tuesday'
        WHEN '3' THEN 'Wednesday'
        WHEN '4' THEN 'Thursday'
        WHEN '5' THEN 'Friday'
        WHEN '6' THEN 'Saturday'
    END as day_name,
    total_amount
FROM orders
WHERE order_date >= '2024-01-01'
ORDER BY order_date DESC
LIMIT 10;
"""

# Executed the query using pandas.read_sql
try:
    basic_date_functions_df = pd.read_sql(sql_query_basic_date_functions, conn)
    print("✅ Query executed successfully!")
    display(basic_date_functions_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,order_date,year,month,day,day_of_week,day_name,total_amount
0,2024-01-31,2024,1,31,3,Wednesday,126.43
1,2024-01-31,2024,1,31,3,Wednesday,422.27
2,2024-01-31,2024,1,31,3,Wednesday,495.16
3,2024-01-31,2024,1,31,3,Wednesday,596.68
4,2024-01-31,2024,1,31,3,Wednesday,206.22
5,2024-01-31,2024,1,31,3,Wednesday,766.84
6,2024-01-31,2024,1,31,3,Wednesday,351.86
7,2024-01-31,2024,1,31,3,Wednesday,537.7
8,2024-01-31,2024,1,31,3,Wednesday,699.48
9,2024-01-31,2024,1,31,3,Wednesday,683.47


In [14]:
import pandas as pd

# Monthly Revenue Analysis
sql_query_monthly_revenue = """
SELECT
    strftime('%Y-%m', order_date) as year_month,
    COUNT(order_id) as total_orders,
    SUM(total_amount) as monthly_revenue,
    AVG(total_amount) as avg_order_value,
    COUNT(DISTINCT customer_id) as unique_customers
FROM orders
GROUP BY strftime('%Y-%m', order_date)
ORDER BY year_month DESC;
"""

# Executed the query using pandas.read_sql
try:
    monthly_revenue_df = pd.read_sql(sql_query_monthly_revenue, conn)
    print("✅ Query executed successfully!")
    display(monthly_revenue_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,year_month,total_orders,monthly_revenue,avg_order_value,unique_customers
0,2024-01,1822,767016.2,420.974863,488
1,2023-12,275,112675.75,409.73,213
2,2023-11,272,111242.04,408.978088,215
3,2023-10,253,100740.52,398.183874,194
4,2023-09,260,108174.96,416.057538,200
5,2023-08,295,129344.15,438.454746,231
6,2023-07,286,114986.72,402.051469,223
7,2023-06,273,121052.08,443.414212,205
8,2023-05,246,104586.05,425.146545,190
9,2023-04,275,113950.48,414.365382,212


In [16]:
import pandas as pd

# Daily Revenue with Running Totals
sql_query_daily_revenue_running_total = """
SELECT
    order_date,
    SUM(total_amount) as daily_revenue,
    COUNT(order_id) as daily_orders,
    SUM(SUM(total_amount)) OVER (ORDER BY order_date) as running_total
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY order_date
ORDER BY order_date;
"""

# Executed the query using pandas.read_sql
try:
    daily_revenue_running_total_df = pd.read_sql(sql_query_daily_revenue_running_total, conn)
    print("✅ Query executed successfully!")
    display(daily_revenue_running_total_df.head())
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,order_date,daily_revenue,daily_orders,running_total
0,2024-01-01,23323.72,55,23323.72
1,2024-01-02,22786.54,59,46110.26
2,2024-01-03,25119.47,60,71229.73
3,2024-01-04,22637.65,62,93867.38
4,2024-01-05,19325.91,48,113193.29


In [18]:
import pandas as pd

# Revenue Growth Analysis (Month-over-Month) SQL query
sql_query_revenue_growth = """
WITH monthly_revenue AS (
    SELECT
        strftime('%Y-%m', order_date) as year_month,
        SUM(total_amount) as monthly_total
    FROM orders
    GROUP BY strftime('%Y-%m', order_date)
),
growth_analysis AS (
    SELECT
        year_month,
        monthly_total,
        LAG(monthly_total, 1) OVER (ORDER BY year_month) as prev_month_total
    FROM monthly_revenue
)
SELECT
    year_month,
    ROUND(monthly_total, 2) as current_month,
    ROUND(prev_month_total, 2) as previous_month,
    ROUND(
        CASE
            WHEN prev_month_total > 0
            THEN ((monthly_total - prev_month_total) / prev_month_total * 100)
            ELSE 0
        END, 2
    ) as growth_percentage
FROM growth_analysis
WHERE prev_month_total IS NOT NULL
ORDER BY year_month DESC;
"""

# Executed the query using pandas.read_sql
try:
    revenue_growth_df = pd.read_sql(sql_query_revenue_growth, conn)
    print("✅ Query executed successfully!")
    display(revenue_growth_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,year_month,current_month,previous_month,growth_percentage
0,2024-01,767016.2,112675.75,580.73
1,2023-12,112675.75,111242.04,1.29
2,2023-11,111242.04,100740.52,10.42
3,2023-10,100740.52,108174.96,-6.87
4,2023-09,108174.96,129344.15,-16.37
5,2023-08,129344.15,114986.72,12.49
6,2023-07,114986.72,121052.08,-5.01
7,2023-06,121052.08,104586.05,15.74
8,2023-05,104586.05,113950.48,-8.22
9,2023-04,113950.48,102487.54,11.18


In [20]:
import pandas as pd

# Weekend vs Weekday Sales SQL query from cell 2Kc_2VIUopBy
sql_query_weekend_weekday_sales = """
SELECT
    CASE
        WHEN strftime('%w', order_date) IN ('0', '6') THEN 'Weekend'
        ELSE 'Weekday'
    END as day_type,
    COUNT(order_id) as total_orders,
    ROUND(SUM(total_amount), 2) as total_revenue,
    ROUND(AVG(total_amount), 2) as avg_order_value
FROM orders
GROUP BY day_type
ORDER BY total_revenue DESC;
"""

# Executed the query using pandas.read_sql
try:
    weekend_weekday_sales_df = pd.read_sql(sql_query_weekend_weekday_sales, conn)
    print("✅ Query executed successfully!")
    display(weekend_weekday_sales_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,day_type,total_orders,total_revenue,avg_order_value
0,Weekday,3608,1498673.9,415.38
1,Weekend,1392,588443.59,422.73


In [22]:
import pandas as pd

# Customer Cohort Analysis (Simple Version) SQL query
sql_query_customer_cohort = """
WITH customer_first_order AS (
    SELECT
        customer_id,
        MIN(order_date) as first_order_date,
        strftime('%Y-%m', MIN(order_date)) as cohort_month
    FROM orders
    GROUP BY customer_id
),
customer_orders AS (
    SELECT
        o.customer_id,
        o.order_date,
        cfo.cohort_month,
        (strftime('%Y', o.order_date) - strftime('%Y', cfo.first_order_date)) * 12 +
        (strftime('%m', o.order_date) - strftime('%m', cfo.first_order_date)) as months_since_first_order
    FROM orders o
    JOIN customer_first_order cfo ON o.customer_id = cfo.customer_id
)
SELECT
    cohort_month,
    months_since_first_order,
    COUNT(DISTINCT customer_id) as active_customers
FROM customer_orders
WHERE months_since_first_order >= 0 AND months_since_first_order <= 12
GROUP BY cohort_month, months_since_first_order
ORDER BY cohort_month, months_since_first_order
LIMIT 20;
"""

# Executed the query using pandas.read_sql
try:
    customer_cohort_df = pd.read_sql(sql_query_customer_cohort, conn)
    print("✅ Query executed successfully!")
    display(customer_cohort_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,cohort_month,months_since_first_order,active_customers
0,2023-01,0,206
1,2023-01,1,74
2,2023-01,2,87
3,2023-01,3,86
4,2023-01,4,71
5,2023-01,5,79
6,2023-01,6,86
7,2023-01,7,96
8,2023-01,8,87
9,2023-01,9,75


In [24]:
import pandas as pd

# Seasonal Analysis SQL query
sql_query_seasonal_analysis = """
SELECT
    strftime('%m', order_date) as month_number,
    CASE strftime('%m', order_date)
        WHEN '01' THEN 'January'
        WHEN '02' THEN 'February'
        WHEN '03' THEN 'March'
        WHEN '04' THEN 'April'
        WHEN '05' THEN 'May'
        WHEN '06' THEN 'June'
        WHEN '07' THEN 'July'
        WHEN '08' THEN 'August'
        WHEN '09' THEN 'September'
        WHEN '10' THEN 'October'
        WHEN '11' THEN 'November'
        WHEN '12' THEN 'December'
    END as month_name,
    COUNT(order_id) as total_orders,
    ROUND(SUM(total_amount), 2) as total_revenue,
    ROUND(AVG(total_amount), 2) as avg_order_value
FROM orders
GROUP BY strftime('%m', order_date), month_name
ORDER BY month_number;
"""

# Executed the query using pandas.read_sql
try:
    seasonal_analysis_df = pd.read_sql(sql_query_seasonal_analysis, conn)
    print("✅ Query executed successfully!")
    display(seasonal_analysis_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,month_number,month_name,total_orders,total_revenue,avg_order_value
0,1,January,2085,874661.04,419.5
1,2,February,220,93216.16,423.71
2,3,March,260,102487.54,394.18
3,4,April,275,113950.48,414.37
4,5,May,246,104586.05,425.15
5,6,June,273,121052.08,443.41
6,7,July,286,114986.72,402.05
7,8,August,295,129344.15,438.45
8,9,September,260,108174.96,416.06
9,10,October,253,100740.52,398.18


In [26]:
import pandas as pd

# Top Revenue Days SQL query
sql_query_top_revenue_days = """
SELECT
    order_date,
    strftime('%w', order_date) as day_of_week,
    CASE strftime('%w', order_date)
        WHEN '0' THEN 'Sunday'
        WHEN '1' THEN 'Monday'
        WHEN '2' THEN 'Tuesday'
        WHEN '3' THEN 'Wednesday'
        WHEN '4' THEN 'Thursday'
        WHEN '5' THEN 'Friday'
        WHEN '6' THEN 'Saturday'
    END as day_name,
    COUNT(order_id) as daily_orders,
    ROUND(SUM(total_amount), 2) as daily_revenue
FROM orders
GROUP BY order_date
ORDER BY daily_revenue DESC
LIMIT 15;
"""

# Executed the query using pandas.read_sql
try:
    top_revenue_days_df = pd.read_sql(sql_query_top_revenue_days, conn)
    print("✅ Query executed successfully!")
    display(top_revenue_days_df)
except Exception as e:
    print(f"❌ An error occurred during query execution: {e}")

✅ Query executed successfully!


Unnamed: 0,order_date,day_of_week,day_name,daily_orders,daily_revenue
0,2024-01-10,3,Wednesday,78,34515.86
1,2024-01-16,2,Tuesday,75,33260.27
2,2024-01-17,3,Wednesday,73,30152.65
3,2024-01-28,0,Sunday,70,30006.32
4,2024-01-14,0,Sunday,63,29159.94
5,2024-01-25,4,Thursday,64,28194.8
6,2024-01-21,0,Sunday,64,27885.93
7,2024-01-30,2,Tuesday,59,27272.24
8,2024-01-29,1,Monday,63,26830.74
9,2024-01-06,6,Saturday,63,25959.67
