In [1]:
from pyhive import presto
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import time

from datetime import datetime, timedelta

import seaborn as sns

conn = presto.connect(
    host='presto.processing.yoda.run', 
    ## presto.processing.yoda.run
    ## bi-presto.serving.data.production.internal
    port=80,
    username='manoj.ravirajan@rapido.bike'
)

In [7]:
start_date = '20221226'
end_date = '20230326' 

In [8]:
query = f"""

with order_logs_snapshot as (
select 
    DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
    service_obj_service_name service, 
    order_id, captain_id, customer_id, 
    captain_obj_device_id, customer_obj_device_device_id,discount
from orders.order_logs_snapshot
where
    yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
    and service_obj_service_name in ('Auto', 'Link')
    and order_status = 'dropped' 
    and customer_obj_device_device_id != '00000000'
    and customer_obj_device_device_id != ''
    and customer_obj_device_device_id is not null 
),

agg_data as (
select
    time_level,
    count(distinct order_id) total_orders,
    count(distinct customer_id) total_customer
    
from order_logs_snapshot
group by 1
),

customer_device_threshold as (
select    
    time_level,
    customer_id
from order_logs_snapshot    
where customer_obj_device_device_id in 
    (
    select distinct device_id from
    (
    select
        time_level,
        customer_obj_device_device_id device_id,
        count(distinct customer_id) mapped_device
    from order_logs_snapshot
    where (customer_obj_device_device_id is not null and customer_obj_device_device_id <> '')
    group by 1,2
    having count(distinct customer_id) > 4
    )
    )
),

fraud_cus as (
select 
    time_level, 
    count(distinct customer_id) fraud_customer,
    count(distinct order_id) fraud_customer_orders,
    count(distinct case when discount > 0 then order_id end) discounted_orders,
    sum(discount) discount
from order_logs_snapshot
where customer_id in (select distinct customer_id from customer_device_threshold)
group by 1
),

coin_burn as
(
select 
    time_level,
    subscription_debit,
    adjustment_debit as adjustments,
    
    customercashback_creditcoinlocationoffer_credit + customercashback_creditcoinrideoffer_credit + scratchcard_creditcoinscratchcard_credit 
            + customercashback_credit + locationoffer_creditcoin_credit + locationoffer_creditcoinlocationoffer_credit + rideoffer_creditcoin_credit + rideoffer_creditcoinrideoffer_credit as coin_credited,
            
    utilization_dashboardcoinscredit + utilization_giftoffer + utilization_walletrechargeoffer as coin_utilized,
    coinexpired_locationoffer_debit + coinexpired_rideoffer_debit + coinexpired_scratchcardoffer_debit + coinexpired_debit as coin_expired,
    
    customerrefund_credit as refund
    from
    (
    select
        DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
        -- city_name,
        -- customer_id,
        -- service_name,
        sum(coalesce(subscription_debit, 0)) as subscription_debit,
        sum(coalesce(adjustment_debit, 0)) as adjustment_debit,
        sum(coalesce(customercashback_creditcoinlocationoffer_credit, 0)) as customercashback_creditcoinlocationoffer_credit,
        sum(coalesce(customercashback_creditcoinrideoffer_credit, 0)) as customercashback_creditcoinrideoffer_credit,
        sum(coalesce(scratchcard_creditcoinscratchcard_credit, 0)) as scratchcard_creditcoinscratchcard_credit,
        sum(coalesce(utilization_dashboardcoinscredit, 0)) as utilization_dashboardcoinscredit,
        sum(coalesce(utilization_giftoffer, 0)) as utilization_giftoffer,
        sum(coalesce(utilization_referraloffer, 0)) as utilization_referraloffer,
        sum(coalesce(utilization_walletrechargeoffer, 0)) as utilization_walletrechargeoffer,
        sum(coalesce(coinexpired_locationoffer_debit, 0)) as coinexpired_locationoffer_debit,
        sum(coalesce(coinexpired_rideoffer_debit, 0)) as coinexpired_rideoffer_debit,
        sum(coalesce(coinexpired_scratchcardoffer_debit, 0)) as coinexpired_scratchcardoffer_debit,
        sum(coalesce(customerrefund_credit, 0)) as customerrefund_credit,
        
        sum(coalesce(coinexpired_debit,0)) as coinexpired_debit,
        sum(coalesce(customercashback_credit,0)) as customercashback_credit,
        sum(coalesce(locationoffer_creditcoin_credit,0)) as locationoffer_creditcoin_credit,
        sum(coalesce(locationoffer_creditcoinlocationoffer_credit,0)) as locationoffer_creditcoinlocationoffer_credit,
        sum(coalesce(rideoffer_creditcoin_credit,0)) as rideoffer_creditcoin_credit,
        sum(coalesce(rideoffer_creditcoinrideoffer_credit,0)) as rideoffer_creditcoinrideoffer_credit
        from datasets.customer_financial_transactions_kpi
    where 
        yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
        and customer_id in (select distinct customer_id from customer_device_threshold)
        and service_name in ('Auto', 'Link')
    group by 1
    )
)

select 
    agg_data.*,
    fraud_cus.fraud_customer,
    fraud_cus.fraud_customer_orders,
    fraud_cus.discounted_orders,
    fraud_cus.discount,
    coin_burn.coin_credited,
    coin_burn.coin_utilized,
    coin_burn.coin_expired
    
from agg_data 
join fraud_cus on agg_data.time_level = fraud_cus.time_level
join coin_burn on agg_data.time_level = coin_burn.time_level
order by 1 desc

"""

In [9]:
df_code = pd.read_sql(query, conn) 

In [6]:
df_oct_dec = df_code
df_oct_dec

Unnamed: 0,time_level,total_orders,total_customer,fraud_customer,fraud_customer_orders,discounted_orders,discount,coin_credited,coin_utilized,coin_expired
0,2022-12-19,5812417,2784249,165,467,158,1345.0,72.0,297.0,62.0
1,2022-12-12,5808238,2802748,195,567,199,1800.0,254.0,277.0,107.0
2,2022-12-05,5638506,2748267,175,512,143,1414.0,516.0,427.0,119.0
3,2022-11-28,5738617,2736700,173,485,153,1668.0,374.0,260.0,239.0
4,2022-11-21,5600555,2633003,164,382,125,1200.0,369.0,271.0,403.0
5,2022-11-14,5636437,2647073,175,441,121,1285.0,92.0,21.0,30.0


In [10]:
df_dec_mar = df_code
df_dec_mar

Unnamed: 0,time_level,total_orders,total_customer,fraud_customer,fraud_customer_orders,discounted_orders,discount,coin_credited,coin_utilized,coin_expired
0,2023-03-20,5331923,2636557,288,586,324,5297.0,1340.0,376.0,378.0
1,2023-03-13,5632327,2736823,441,798,399,6026.0,1830.0,23.0,118.0
2,2023-03-06,5340797,2688015,325,629,288,4890.0,1077.0,65.0,80.0
3,2023-02-27,5870934,2811664,162,379,122,1466.0,329.0,120.0,55.0
4,2023-02-20,5825493,2794369,135,394,134,1299.0,163.0,30.0,5.0
5,2023-02-13,5718383,2784105,126,338,110,1287.0,423.0,56.0,77.0
6,2023-02-06,5956803,2867544,133,326,92,910.0,85.0,0.0,102.0
7,2023-01-30,5840191,2804659,140,321,91,878.0,101.0,50.0,117.0
8,2023-01-23,5399276,2708799,131,331,96,930.0,194.0,30.0,20.0
9,2023-01-16,5184353,2561013,157,385,97,929.0,47.0,33.0,19.0


In [13]:
frames = [df_oct_dec, df_dec_mar]
result = pd.concat(frames)
result.to_csv("cus 2.csv", index = False)

In [14]:
result

Unnamed: 0,time_level,total_orders,total_customer,fraud_customer,fraud_customer_orders,discounted_orders,discount,coin_credited,coin_utilized,coin_expired
0,2022-12-19,5812417,2784249,165,467,158,1345.0,72.0,297.0,62.0
1,2022-12-12,5808238,2802748,195,567,199,1800.0,254.0,277.0,107.0
2,2022-12-05,5638506,2748267,175,512,143,1414.0,516.0,427.0,119.0
3,2022-11-28,5738617,2736700,173,485,153,1668.0,374.0,260.0,239.0
4,2022-11-21,5600555,2633003,164,382,125,1200.0,369.0,271.0,403.0
5,2022-11-14,5636437,2647073,175,441,121,1285.0,92.0,21.0,30.0
0,2023-03-20,5331923,2636557,288,586,324,5297.0,1340.0,376.0,378.0
1,2023-03-13,5632327,2736823,441,798,399,6026.0,1830.0,23.0,118.0
2,2023-03-06,5340797,2688015,325,629,288,4890.0,1077.0,65.0,80.0
3,2023-02-27,5870934,2811664,162,379,122,1466.0,329.0,120.0,55.0
