In [1]:
from pyhive import presto
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import time

from datetime import datetime, timedelta

import seaborn as sns

conn = presto.connect(
    host='presto.processing.yoda.run',
    port=80,
    username='mail_id@rapido.bike'
)

In [2]:
start_date = '2022-10-01'
end_date = '2022-10-31'

In [3]:
taxi = f"""

WITH base_month_customer AS 
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', date(run_date)), '%Y-%m-%d') AS month,
    customer_id,
    --MAX(taxi_lifetime_last_ride_date) 
    taxi_lifetime_last_ride_date,
    taxi_lifetime_first_ride_date,
    --MAX(taxi_lifetime_rides) 
    taxi_lifetime_rides

FROM datasets.iallocator_customer_segments
WHERE 
    run_date BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d')
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date)) , '%Y-%m-%d') 
    AND run_date = DATE_FORMAT(DATE_TRUNC('month', date(run_date)), '%Y-%m-%d')
    AND (taxi_lifetime_rides > 0 or taxi_lifetime_rides is not null or taxi_lifetime_last_ride_date is not null)
--GROUP BY 1,2
),

base_month_segment AS
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(month)) , '%Y-%m-%d') AS month,
    customer_id,
    CASE
    WHEN taxi_lifetime_rides IS NULL OR DATE(month) = DATE(taxi_lifetime_first_ride_date) THEN 'LTR =0'
    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 THEN 'LTR 1-4'
    WHEN (taxi_lifetime_rides BETWEEN 5 AND 20) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN 'LTR 5-20'
    WHEN (taxi_lifetime_rides BETWEEN 21 AND 50) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN 'LTR 21-50'
    WHEN (taxi_lifetime_rides > 50) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN 'LTR 50+'
    WHEN taxi_lifetime_rides >= 5 THEN 'LTR >=5'
    ELSE 'LTR =0'
    END ltr_taxi,
    CASE 
    WHEN taxi_lifetime_last_ride_date IS NULL OR DATE(month) = DATE(taxi_lifetime_first_ride_date) THEN 'LTR=0'

    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 and taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '7' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '0' DAY) THEN '1. 1_7_DAYS'
    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 and taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '14' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '8' DAY) THEN '2. 8_14_DAYS'
    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 and taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '21' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '15' DAY) THEN '3. 15_21_DAYS'
    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 and taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '22' DAY) THEN '4. 22_30_DAYS'
    

    WHEN taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY) THEN '5. LAST_30_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '45' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '31' DAY) THEN '6. 31_45_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '60' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '46' DAY) THEN '7. 46_60_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '90' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '61' DAY) THEN '8. 61_90_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '180' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '91' DAY) THEN '9. 91_180_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '365' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '181' DAY) THEN '99. 181_365_DAYS'
    WHEN taxi_lifetime_last_ride_date < (DATE_TRUNC('month', DATE(month)) - INTERVAL '365' DAY) THEN '999. 365_ABOVE'
    ELSE 'LTR=0'
    END recency_taxi
FROM base_month_customer
),

base_month_total AS
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(month)) , '%Y-%m-%d') AS month,
    COUNT(DISTINCT customer_id) existing_custr_total 
FROM base_month_customer
GROUP BY 1
),

customer_rf_daily_kpi AS
(
SELECT 
    day,
    customerid,
    rr_sessions_unique_daily,
    net_rides_daily,
    ao_sessions_unique_daily,
    fe_sessions_unique_daily,
    discount_daily,
    subtotal_daily
        
FROM datasets.customer_rf_daily_kpi
WHERE 
    day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d')
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
    AND service_name IN ('Link', 'Auto')
    AND customerid IS NOT NULL
),

rr_net AS
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(day)), '%Y-%m-%d') AS month,
    customerid as customer_id,
    SUM(rr_sessions_unique_daily) rr,
    SUM(net_rides_daily) net_rides,
    SUM(discount_daily) burn,
    SUM(subtotal_daily) subtotal
FROM customer_rf_daily_kpi
WHERE 
    day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d') 
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
GROUP BY 1,2
),

max_daily_ao AS
(
SELECT 
        day,
        customer_id,
        SUM(ao) AS ao,
        SUM(fe) AS fe
FROM
    (
    SELECT 
        customerid as customer_id,
        day,
        max(ao_sessions_unique_daily) AS ao,
        max(fe_sessions_unique_daily) AS fe
    FROM datasets.customer_rf_daily_kpi
    WHERE 
        day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d') 
        AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
        AND customerid IS NOT NULL
    GROUP BY 1,2
    )
GROUP BY 1,2
),

monthly_ao AS
(
SELECT  
    DATE_FORMAT(DATE_TRUNC('month', DATE(day)), '%Y-%m-%d') AS month,
    customer_id,
    SUM(ao) AS ao,
    SUM(fe) AS fe
FROM max_daily_ao
GROUP BY 1,2
),

base_current AS (
SELECT 
        COALESCE(b.month,c.month,r.month) month,
        -- b.month,
        COALESCE(b.customer_id,c.customer_id,r.customer_id) customers,
        b.customer_id bcustomer_id,
        COALESCE(b.ltr_taxi,'NEW') ltr_taxi,
        COALESCE(b.recency_taxi, 'NEW') recency_taxi,
        c.customer_id acustomer_id,
        r.customer_id rcustomer_id,
        CASE 
        WHEN c.ao = 0 AND r.net_rides > 0 THEN r.net_rides
        WHEN c.ao = 0 AND c.fe > 0 THEN c.fe
        WHEN c.ao = 0 AND r.rr > 0 THEN r.rr
        WHEN c.ao IS NULL THEN 0
        ELSE c.ao END ao,
        
        CASE
        WHEN c.fe = 0 AND r.net_rides > 0 THEN r.net_rides 
        WHEN c.fe = 0 AND r.rr > 0 THEN r.rr
        WHEN c.fe IS NULL THEN 0
        ELSE c.fe END fe,
        
        CASE
        WHEN r.rr = 0 AND r.net_rides > 0 THEN r.net_rides
        WHEN r.rr IS NULL THEN 0
        ELSE r.rr END rr,
        
        COALESCE(r.net_rides, 0) net_rides,
        COALESCE(r.burn, 0) burn,
        COALESCE(r.subtotal, 0) subtotal

FROM base_month_segment AS b
FULL JOIN monthly_ao AS c ON b.customer_id = c.customer_id AND b.month = c.month 
FULL JOIN rr_net AS r ON b.customer_id = r.customer_id AND c.month = r.month
),

detailed_view AS 
(
SELECT 
        c.month,
        ltr_taxi,
        case 
        when ltr_taxi = 'LTR 1-4' then 1
        when ltr_taxi = 'LTR 5-20' then 2
        when ltr_taxi = 'LTR 21-50' then 3
        when ltr_taxi = 'LTR 50+' then 4
        when ltr_taxi = 'LTR >=5' then 5
        when ltr_taxi = 'LTR =0' then 6
        when ltr_taxi = 'NEW' then 7
        end row_order,
        recency_taxi,
        COUNT(DISTINCT bcustomer_id) existing_custr_count,
        COUNT(DISTINCT CASE WHEN ao > 0 THEN acustomer_id END) ao_users,
        COUNT(DISTINCT CASE WHEN fe > 0 THEN acustomer_id END) fe_users,
        COUNT(DISTINCT CASE WHEN rr > 0 THEN rcustomer_id END) rr_users,
        COUNT(DISTINCT CASE WHEN net_rides > 0 THEN rcustomer_id END) net_users,
        SUM(net_rides) net_rides,
        SUM(burn) discount,
        SUM(subtotal) subtotal
        
FROM base_current c
GROUP BY 1,2,3,4
),

summary_view AS (
SELECT 
        c.month,
        COUNT(DISTINCT CASE WHEN net_rides > 0 THEN c.customers END) net_users,
        SUM(net_rides) net_rides,
        SUM(burn) discount,
        SUM(subtotal) subtotal
FROM base_current c
GROUP BY 1
),

a as 
(
SELECT  
        a.month,
        a.row_order,
        a.ltr_taxi,
        a.recency_taxi,
        a.existing_custr_count,
        COALESCE(try(a.existing_custr_count*100.00/b.existing_custr_total), 0) existing_custr_distr,
        a.ao_users,
        COALESCE(try(a.ao_users*100.00/a.existing_custr_count), 0) ao_conversion, 
        a.fe_users,
        COALESCE(try(a.fe_users*100.00/a.existing_custr_count), 0) fe_conversion, 
        a.rr_users,
        COALESCE(try(a.rr_users*100.00/a.existing_custr_count), 0) rr_conversion,
        COALESCE(try(a.rr_users*100.00/a.fe_users), 0) fe_rr,
        a.net_users,
        COALESCE(try(a.net_users*100.00/a.existing_custr_count), 0) net_conversion,
        COALESCE(try(a.net_users*100.00/a.rr_users), 0) rr_net,
        COALESCE(try(a.net_users*100.00/c.net_users), 0) net_users_distr,
        a.net_rides,
        COALESCE(try(a.net_rides*100.00/c.net_rides), 0) net_rides_distr,
        COALESCE(try(a.net_rides*1.00/a.net_users), 0) rpc,
        a.discount,
        a.subtotal
        
FROM detailed_view a
LEFT JOIN base_month_total b ON a.month = b.month
LEFT JOIN summary_view c ON a.month = c.month
ORDER BY 1,2,3
),

b as 
(
SELECT
        month,
        8 row_order,
        'TOTAL' ltr_taxi,
        '' recency_taxi,
        SUM(existing_custr_count) existing_custr_count,
        100.00 existing_custr_distr,
        SUM(ao_users) ao_users,
        COALESCE(try(SUM(ao_users)*100.00/SUM(existing_custr_count)), 0) ao_conversion,
        SUM(fe_users) fe_users,
        COALESCE(try(SUM(fe_users)*100.00/SUM(existing_custr_count)), 0) fe_conversion,
        SUM(rr_users) rr_users,
        COALESCE(try(SUM(rr_users)*100.00/SUM(existing_custr_count)), 0) rr_conversion,
        COALESCE(try(SUM(rr_users)*100.00/SUM(fe_users)), 0) fe_rr,
        SUM(net_users) net_users,
        COALESCE(try(SUM(net_users)*100.00/SUM(existing_custr_count)), 0) net_conversion,
        COALESCE(try(SUM(net_users)*100.00/SUM(rr_users)), 0) rr_net,
        100.00 net_users_distr,
        SUM(net_rides) net_rides,
        100.00 net_rides_distr,
        COALESCE(try(SUM(net_rides)*1.0/SUM(net_users)), 0) rpc,
        SUM(discount) discount,
        SUM(subtotal) subtotal
FROM a
GROUP BY 1
),

final as 
(
select * from a
union
select * from b
)
select 
        month,
        ltr_taxi,recency_taxi,
        existing_custr_count,existing_custr_distr,
        ao_users,ao_conversion,
        fe_users,fe_conversion,
        rr_users,rr_conversion,fe_rr "fe_rr(users)",
        net_users,net_conversion, rr_net "rr_net(users)", net_users_distr,
        net_rides,net_rides_distr,
        rpc,
        discount,
        subtotal,
        COALESCE(TRY(discount*100.0/subtotal),0) discount_perc,
        COALESCE(TRY(discount*1.0/net_rides),0) dpr
from final
order by 1,row_order,3

"""

In [4]:
df_taxi = pd.read_sql(taxi, conn)

In [5]:
df_taxi

Unnamed: 0,month,ltr_taxi,recency_taxi,existing_custr_count,existing_custr_distr,ao_users,ao_conversion,fe_users,fe_conversion,rr_users,...,net_conversion,rr_net(users),net_users_distr,net_rides,net_rides_distr,rpc,discount,subtotal,discount_perc,dpr
0,2022-10-01,LTR 1-4,1. 1_7_DAYS,660601,2.08,495417,74.99,451692,68.38,375815,...,46.02,80.89,4.7,856210,3.91,2.82,3320691.0,77313600.0,4.295093,3.87836
1,2022-10-01,LTR 1-4,2. 8_14_DAYS,465448,1.46,285186,61.27,249690,53.65,190267,...,29.54,72.26,2.13,314990,1.44,2.29,1334105.0,29517870.0,4.519651,4.235388
2,2022-10-01,LTR 1-4,3. 15_21_DAYS,415322,1.31,228049,54.91,196315,47.27,145493,...,24.54,70.05,1.58,213040,0.97,2.09,924453.0,20210490.0,4.574126,4.33934
3,2022-10-01,LTR 1-4,4. 22_30_DAYS,513512,1.61,251601,49.0,214336,41.74,154157,...,20.48,68.21,1.63,208691,0.95,1.98,917614.0,19936240.0,4.602743,4.396998
4,2022-10-01,LTR 1-4,6. 31_45_DAYS,726310,2.28,311764,42.92,260105,35.81,182124,...,16.74,66.76,1.88,228544,1.04,1.88,1045152.0,21926340.0,4.766652,4.573089
5,2022-10-01,LTR 1-4,7. 46_60_DAYS,680304,2.14,263386,38.72,216138,31.77,148409,...,14.24,65.26,1.5,175114,0.8,1.81,818598.0,17167170.0,4.768392,4.674658
6,2022-10-01,LTR 1-4,8. 61_90_DAYS,1130747,3.56,358423,31.7,288068,25.48,190429,...,10.75,63.86,1.88,214389,0.98,1.76,1041444.0,20452190.0,5.09209,4.857731
7,2022-10-01,LTR 1-4,9. 91_180_DAYS,2606147,8.2,569033,21.83,450703,17.29,286531,...,6.89,62.62,2.77,308287,1.41,1.72,1589677.0,28843570.0,5.511374,5.156484
8,2022-10-01,LTR 1-4,99. 181_365_DAYS,2369857,7.45,292957,12.36,237634,10.03,146832,...,3.9,62.98,1.43,161766,0.74,1.75,894236.0,14359910.0,6.22731,5.52796
9,2022-10-01,LTR 1-4,999. 365_ABOVE,5404842,17.0,258101,4.78,214435,3.97,135998,...,1.65,65.67,1.38,168370,0.77,1.89,968963.0,14325150.0,6.76407,5.754962


## LTR = 0

In [7]:
ltr_0 = f"""

WITH base_month_customer AS 
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', date(run_date)), '%Y-%m-%d') AS month,
    customer_id,
    --MAX(taxi_lifetime_last_ride_date) 
    taxi_lifetime_last_ride_date,
    taxi_lifetime_first_ride_date,
    --MAX(taxi_lifetime_rides) 
    taxi_lifetime_rides

FROM datasets.iallocator_customer_segments
WHERE 
    run_date BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d')
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date)) , '%Y-%m-%d') 
    AND run_date = DATE_FORMAT(DATE_TRUNC('month', date(run_date)), '%Y-%m-%d')
    AND (taxi_lifetime_rides > 0 or taxi_lifetime_rides is not null or taxi_lifetime_last_ride_date is not null)
--GROUP BY 1,2
),

ao_recent_ltr0 as
(
-- SELECT
--     customer_id,
--     max(day) ao_recency
-- FROM
    (
    SELECT 
        customerid customer_id,
        max(day) ao_recency
    FROM datasets.customer_rf_daily_kpi
    WHERE 
        day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)) - INTERVAL '365' DAY, '%Y-%m-%d') 
        AND DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d') 
        -- AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
        AND customerid IN (SELECT DISTINCT customer_id FROM base_month_customer)-- where taxi_lifetime_rides not between 1 and 10000)
        AND ao_sessions_unique_daily > 0
    GROUP BY 1
    )
-- GROUP BY 1
),

base_month_segment AS
(
SELECT * FROM 
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(month)) , '%Y-%m-%d') AS month,
    base_month_customer.customer_id,
    CASE
    WHEN taxi_lifetime_rides IS NULL OR DATE(month) = DATE(taxi_lifetime_first_ride_date) THEN 'LTR=0'
    WHEN taxi_lifetime_rides BETWEEN 1 AND 4 THEN '1-4'
    WHEN (taxi_lifetime_rides BETWEEN 5 AND 20) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN '5-20'
    WHEN (taxi_lifetime_rides BETWEEN 21 AND 50) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN '21-50'
    WHEN (taxi_lifetime_rides > 50) AND (taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY)) THEN '50+'
    WHEN taxi_lifetime_rides >= 5 THEN '>=5'
    ELSE 'LTR=0'
    END ltr_taxi,
    
    CASE 
    WHEN taxi_lifetime_last_ride_date IS NULL OR DATE(month) = DATE(taxi_lifetime_first_ride_date) THEN 'LTR=0'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '7' DAY) THEN '1. LAST_7_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '14' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '8' DAY) THEN '2. 8_14_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '21' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '15' DAY) THEN '3. 15_21_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '22' DAY) THEN '4. 22_30_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '45' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '31' DAY) THEN '5. 31_45_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '60' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '46' DAY) THEN '6. 46_60_DAYS'
    WHEN (taxi_lifetime_rides BETWEEN 1 AND 4) AND taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '90' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '61' DAY) THEN '7. 61_90_DAYS'    
    
    WHEN taxi_lifetime_last_ride_date >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY) THEN '11. LAST_30_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '90' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '31' DAY) THEN '12. 31_90_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '180' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '91' DAY) THEN '8. 91_180_DAYS'
    WHEN taxi_lifetime_last_ride_date BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '365' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '181' DAY) THEN '9. 181_365_DAYS'
    WHEN taxi_lifetime_last_ride_date < (DATE_TRUNC('month', DATE(month)) - INTERVAL '365' DAY) THEN '10. 365_ABOVE'
    ELSE 'LTR=0'
    END recency_taxi,
    
    CASE
    WHEN DATE(ao_recency) >= (DATE_TRUNC('month', DATE(month)) - INTERVAL '7' DAY) THEN '1. LAST_7_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '14' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '8' DAY) THEN '2. 8_14_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '30' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '15' DAY) THEN '3. 15_30_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '60' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '31' DAY) THEN '4. 31_60_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '90' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '61' DAY) THEN '5. 61_90_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '180' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '91' DAY) THEN '6. 91_180_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '365' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '181' DAY) THEN '7. 181_365_DAYS'
    WHEN DATE(ao_recency) BETWEEN (DATE_TRUNC('month', DATE(month)) - INTERVAL '730' DAY) AND (DATE_TRUNC('month', DATE(month)) - INTERVAL '366' DAY) THEN '8.1_2_YEARS'
    ELSE 'NO_AO'
    END ao_recency
FROM base_month_customer 
LEFT JOIN ao_recent_ltr0 b on base_month_customer.customer_id = b.customer_id
-- where taxi_lifetime_rides not between 1 and 10000
)
WHERE ltr_taxi = 'LTR=0' AND recency_taxi = 'LTR=0'
),

base_month_total AS
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(month)) , '%Y-%m-%d') AS month,
    COUNT(DISTINCT customer_id) existing_custr_total 
FROM base_month_customer
GROUP BY 1
),

customer_rf_daily_kpi AS
(
SELECT 
    day,
    customerid,
    rr_sessions_unique_daily,
    net_rides_daily,
    ao_sessions_unique_daily,
    fe_sessions_unique_daily,
    discount_daily,
    subtotal_daily
        
FROM datasets.customer_rf_daily_kpi
WHERE 
    day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d')
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
    AND service_name in ('Auto','Link')
    AND customerid IS NOT NULL
    AND customerid IN (SELECT DISTINCT customer_id FROM base_month_customer)-- where taxi_lifetime_rides not between 1 and 10000)
),

rr_net AS
(
SELECT 
    DATE_FORMAT(DATE_TRUNC('month', DATE(day)), '%Y-%m-%d') AS month,
    customerid as customer_id,
    SUM(rr_sessions_unique_daily) rr,
    SUM(net_rides_daily) net_rides,
    SUM(discount_daily) burn,
    SUM(subtotal_daily) subtotal
FROM customer_rf_daily_kpi
WHERE 
    day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d') 
    AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
GROUP BY 1,2
),

max_daily_ao AS
(
SELECT 
        day,
        customer_id,
        SUM(ao) AS ao,
        SUM(fe) AS fe
FROM
    (
    SELECT 
        customerid as customer_id,
        day,
        max(ao_sessions_unique_daily) AS ao,
        max(fe_sessions_unique_daily) AS fe
    FROM datasets.customer_rf_daily_kpi
    WHERE 
        day BETWEEN DATE_FORMAT(DATE_TRUNC('month', cast('{start_date}' as date)), '%Y-%m-%d') 
        AND DATE_FORMAT(DATE_TRUNC('month', cast('{end_date}' as date) + INTERVAL '28' DAY) - INTERVAL '1' DAY, '%Y-%m-%d')
        AND customerid IS NOT NULL
        AND customerid IN (SELECT DISTINCT customer_id FROM base_month_customer)-- where taxi_lifetime_rides not between 1 and 10000)
    GROUP BY 1,2
    )
GROUP BY 1,2
),

monthly_ao AS
(
SELECT  
    DATE_FORMAT(DATE_TRUNC('month', DATE(day)), '%Y-%m-%d') AS month,
    customer_id,
    SUM(ao) AS ao,
    SUM(fe) AS fe
FROM max_daily_ao
GROUP BY 1,2
),

base_current AS (
SELECT 
        COALESCE(b.month,c.month,r.month) month,
        -- b.month,
        COALESCE(b.customer_id,c.customer_id,r.customer_id) customers,
        b.customer_id bcustomer_id,
        COALESCE(b.ltr_taxi,'NEW') ltr_taxi,
        COALESCE(b.recency_taxi, 'NEW') recency_taxi,
        ao_recency,
        c.customer_id acustomer_id,
        r.customer_id rcustomer_id,
        CASE 
        WHEN c.ao = 0 AND r.net_rides > 0 THEN r.net_rides
        WHEN c.ao = 0 AND c.fe > 0 THEN c.fe
        WHEN c.ao = 0 AND r.rr > 0 THEN r.rr
        WHEN c.ao IS NULL THEN 0
        ELSE c.ao END ao,
        
        CASE
        WHEN c.fe = 0 AND r.net_rides > 0 THEN r.net_rides 
        WHEN c.fe = 0 AND r.rr > 0 THEN r.rr
        WHEN c.fe IS NULL THEN 0
        ELSE c.fe END fe,
        
        CASE
        WHEN r.rr = 0 AND r.net_rides > 0 THEN r.net_rides
        WHEN r.rr IS NULL THEN 0
        ELSE r.rr END rr,
        
        COALESCE(r.net_rides, 0) net_rides,
        COALESCE(r.burn, 0) burn,
        COALESCE(r.subtotal, 0) subtotal

FROM base_month_segment AS b
FULL JOIN monthly_ao AS c ON b.customer_id = c.customer_id AND b.month = c.month 
FULL JOIN rr_net AS r ON b.customer_id = r.customer_id AND c.month = r.month
),

detailed_view AS 
(
SELECT 
        c.month,
        ltr_taxi,
        case 
        when ltr_taxi = '1-4' then 1
        when ltr_taxi = '5-20' then 2
        when ltr_taxi = '21-50' then 3
        when ltr_taxi = '50+' then 4
        when ltr_taxi = '>=5' then 5
        when ltr_taxi = 'LTR=0' then 6
        when ltr_taxi = 'NEW' then 7
        end row_order,
        recency_taxi,
        ao_recency,
        COUNT(DISTINCT bcustomer_id) existing_custr_count,
        COUNT(DISTINCT CASE WHEN ao > 0 THEN acustomer_id END) ao_users,
        COUNT(DISTINCT CASE WHEN fe > 0 THEN acustomer_id END) fe_users,
        COUNT(DISTINCT CASE WHEN rr > 0 THEN rcustomer_id END) rr_users,
        COUNT(DISTINCT CASE WHEN net_rides > 0 THEN rcustomer_id END) net_users,
        SUM(net_rides) net_rides,
        SUM(burn) discount,
        SUM(subtotal) subtotal
        
FROM base_current c
GROUP BY 1,2,3,4,5
),

summary_view AS (
SELECT 
        c.month,
        COUNT(DISTINCT CASE WHEN net_rides > 0 THEN c.customers END) net_users,
        SUM(net_rides) net_rides,
        SUM(burn) discount,
        SUM(subtotal) subtotal
FROM base_current c
GROUP BY 1
),

a as 
(
SELECT  
        a.month,
        a.row_order,
        a.ltr_taxi,
        a.recency_taxi,
        ao_recency,
        a.existing_custr_count,
        COALESCE(try(a.existing_custr_count*100.00/b.existing_custr_total), 0) existing_custr_distr,
        a.ao_users,
        COALESCE(try(a.ao_users*100.00/a.existing_custr_count), 0) ao_conversion, 
        a.fe_users,
        COALESCE(try(a.fe_users*100.00/a.existing_custr_count), 0) fe_conversion, 
        a.rr_users,
        COALESCE(try(a.rr_users*100.00/a.existing_custr_count), 0) rr_conversion,
        COALESCE(try(a.rr_users*100.00/a.fe_users), 0) fe_rr,
        a.net_users,
        COALESCE(try(a.net_users*100.00/a.existing_custr_count), 0) net_conversion,
        COALESCE(try(a.net_users*100.00/a.rr_users), 0) rr_net,
        COALESCE(try(a.net_users*100.00/c.net_users), 0) net_users_distr,
        a.net_rides,
        COALESCE(try(a.net_rides*100.00/c.net_rides), 0) net_rides_distr,
        COALESCE(try(a.net_rides*1.00/a.net_users), 0) rpc,
        a.discount,
        a.subtotal
        
FROM detailed_view a
LEFT JOIN base_month_total b ON a.month = b.month
LEFT JOIN summary_view c ON a.month = c.month
ORDER BY 1,2,3
),

final as 
(
select * from a
)
select 
        month,
        ltr_taxi,recency_taxi,ao_recency,
        existing_custr_count,existing_custr_distr,
        ao_users,ao_conversion,
        fe_users,fe_conversion,
        rr_users,rr_conversion,fe_rr "fe_rr(users)",
        net_users,net_conversion, rr_net "rr_net(users)", net_users_distr,
        net_rides,net_rides_distr,
        rpc,
        discount,
        subtotal,
        COALESCE(TRY(discount*100.0/subtotal),0) discount_perc,
        COALESCE(TRY(discount*1.0/net_rides),0) dpr
from final
order by 1,row_order,4

"""

In [8]:
df_ltr_0 = pd.read_sql(ltr_0, conn)

In [9]:
df_ltr_0

Unnamed: 0,month,ltr_taxi,recency_taxi,ao_recency,existing_custr_count,existing_custr_distr,ao_users,ao_conversion,fe_users,fe_conversion,...,net_conversion,rr_net(users),net_users_distr,net_rides,net_rides_distr,rpc,discount,subtotal,discount_perc,dpr
0,2022-10-01,LTR=0,LTR=0,1. LAST_7_DAYS,355730,1.12,250029,70.29,210647,59.22,...,29.83,63.68,1.96,250066,1.26,2.36,1708563.0,23726490.0,7.201077,6.832448
1,2022-10-01,LTR=0,LTR=0,2. 8_14_DAYS,176956,0.56,74979,42.37,54977,31.07,...,11.91,57.19,0.39,40279,0.2,1.91,342269.0,3992911.0,8.571917,8.497455
2,2022-10-01,LTR=0,LTR=0,3. 15_30_DAYS,321775,1.01,103246,32.09,74535,23.16,...,8.74,56.65,0.52,50769,0.26,1.8,441981.0,5242222.0,8.431177,8.705726
3,2022-10-01,LTR=0,LTR=0,4. 31_60_DAYS,465797,1.46,94046,20.19,69299,14.88,...,5.59,56.99,0.48,46656,0.24,1.79,397697.0,4783322.0,8.314243,8.524027
4,2022-10-01,LTR=0,LTR=0,5. 61_90_DAYS,348674,1.1,43882,12.59,33606,9.64,...,3.7,59.0,0.24,23265,0.12,1.8,201325.0,2272564.0,8.858936,8.653557
5,2022-10-01,LTR=0,LTR=0,6. 91_180_DAYS,793628,2.5,60419,7.61,50730,6.39,...,2.54,59.85,0.37,36998,0.19,1.84,322895.0,3569473.0,9.046013,8.727364
6,2022-10-01,LTR=0,LTR=0,7. 181_365_DAYS,764726,2.4,35400,4.63,31422,4.11,...,1.75,63.15,0.25,25365,0.13,1.89,220692.0,2293270.0,9.623463,8.700651
7,2022-10-01,LTR=0,LTR=0,NO_AO,1601416,5.04,42417,2.65,37942,2.37,...,1.13,68.08,0.34,36617,0.18,2.01,299885.0,3241243.0,9.25216,8.189775
8,2022-10-01,NEW,NEW,,0,0.0,9182918,0.0,8219132,0.0,...,0.0,78.35,95.47,19283538,97.42,3.72,60004065.0,1617399000.0,3.709911,3.111673
