<a href="https://colab.research.google.com/github/Sankytanky100/Data_Science-Projects/blob/main/Funnel_Metrics_with_SQL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install ipython-sql extension
!pip install ipython-sql

# Load the SQL extension
%load_ext sql

!pip install --upgrade prettytable # Update prettytable to the latest version

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [None]:
# Create an in-memory SQLite database
%sql sqlite://


In [None]:
%%sql

-- Create the 'browse' table
DROP TABLE IF EXISTS browse;
CREATE TABLE browse (
    browse_date DATE,
    user_id INT
);

-- Create the 'checkout' table
DROP TABLE IF EXISTS checkout;
CREATE TABLE checkout (
    user_id INT
);

-- Create the 'purchase' table
DROP TABLE IF EXISTS purchase;
CREATE TABLE purchase (
    user_id INT
);


 * sqlite://
Done.
Done.
Done.
Done.
Done.
Done.


[]

In [None]:
%%sql

-- Insert data into 'browse'
INSERT INTO browse (browse_date, user_id) VALUES
('2023-01-01', 1),
('2023-01-01', 2),
('2023-01-02', 3),
('2023-01-02', 4),
('2023-01-03', 5);

-- Insert data into 'checkout'
INSERT INTO checkout (user_id) VALUES
(1),
(2),
(4);

-- Insert data into 'purchase'
INSERT INTO purchase (user_id) VALUES
(1),
(4);


 * sqlite://
5 rows affected.
3 rows affected.
2 rows affected.


[]

In [None]:
%%sql
SELECT * FROM browse;


 * sqlite://
Done.


KeyError: 'DEFAULT'

In [None]:
%%sql
SELECT * FROM checkout;


In [None]:
%%sql
SELECT * FROM purchase;


In [None]:
%%sql

WITH funnels AS (
    SELECT
        b.browse_date,
        b.user_id,
        CASE WHEN c.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_checkout,
        CASE WHEN p.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_purchase
    FROM browse b
    LEFT JOIN checkout c ON b.user_id = c.user_id
    LEFT JOIN purchase p ON b.user_id = p.user_id
)
SELECT * FROM funnels;


In [None]:
%%sql

WITH funnels AS (
    SELECT
        b.browse_date,
        b.user_id,
        CASE WHEN c.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_checkout,
        CASE WHEN p.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_purchase
    FROM browse b
    LEFT JOIN checkout c ON b.user_id = c.user_id
    LEFT JOIN purchase p ON b.user_id = p.user_id
),
results AS (
    SELECT
        browse_date,
        COUNT(DISTINCT user_id) AS num_browse,
        SUM(is_checkout) AS num_checkout,
        SUM(is_purchase) AS num_purchase
    FROM funnels
    GROUP BY browse_date
)
SELECT
    browse_date,
    num_browse,
    num_checkout,
    num_purchase,
    ROUND(1.0 * num_checkout / num_browse, 2) AS browse_to_checkout,
    CASE
        WHEN num_checkout > 0 THEN ROUND(1.0 * num_purchase / num_checkout, 2)
        ELSE NULL
    END AS checkout_to_purchase
FROM results
ORDER BY browse_date;


In [None]:
import pandas as pd

# Run the query and store the result in a DataFrame
results = %sql WITH funnels AS (
    SELECT
        b.browse_date,
        b.user_id,
        CASE WHEN c.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_checkout,
        CASE WHEN p.user_id IS NOT NULL THEN 1 ELSE 0 END AS is_purchase
    FROM browse b
    LEFT JOIN checkout c ON b.user_id = c.user_id
    LEFT JOIN purchase p ON b.user_id = p.user_id
),
results AS (
    SELECT
        browse_date,
        COUNT(DISTINCT user_id) AS num_browse,
        SUM(is_checkout) AS num_checkout,
        SUM(is_purchase) AS num_purchase
    FROM funnels
    GROUP BY browse_date
)
SELECT
    browse_date,
    num_browse,
    num_checkout,
    num_purchase,
    ROUND(1.0 * num_checkout / num_browse, 2) AS browse_to_checkout,
    CASE
        WHEN num_checkout > 0 THEN ROUND(1.0 * num_purchase / num_checkout, 2)
        ELSE NULL
    END AS checkout_to_purchase
FROM results
ORDER BY browse_date;

# Convert to pandas DataFrame
results_df = results.DataFrame()

# Display the DataFrame
results_df


In [None]:
import matplotlib.pyplot as plt

# Plot Browse to Checkout Conversion Rate
plt.figure(figsize=(8,6))
plt.bar(results_df['browse_date'], results_df['browse_to_checkout'], color='skyblue')
plt.title('Browse to Checkout Conversion Rate')
plt.xlabel('Browse Date')
plt.ylabel('Conversion Rate')
plt.ylim(0,1)
plt.show()

# Plot Checkout to Purchase Conversion Rate
plt.figure(figsize=(8,6))
plt.bar(results_df['browse_date'], results_df['checkout_to_purchase'], color='salmon')
plt.title('Checkout to Purchase Conversion Rate')
plt.xlabel('Browse Date')
plt.ylabel('Conversion Rate')
plt.ylim(0,1)
plt.show()
