In [1]:
from pyhive import presto
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import time

from datetime import datetime, timedelta

import seaborn as sns

conn = presto.connect(
    host='presto.processing.yoda.run', 
    ## presto.processing.yoda.run
    ## bi-presto.serving.data.production.internal
    port=80,
    username='manoj.ravirajan@rapido.bike'
)

In [7]:
start_date = '20221226'
end_date = '20230326' 

In [8]:
query = f"""

with order_logs_snapshot as (
select 
    DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
    service_obj_service_name service, 
    order_id, captain_id, customer_id, 
    captain_obj_device_id, customer_obj_device_device_id
from orders.order_logs_snapshot
where
    yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
    and service_obj_service_name in ('Auto', 'Link')
    and order_status = 'dropped' 
    and captain_obj_device_id != '00000000'
    and captain_obj_device_id != ''
    and captain_obj_device_id is not null 
),

agg_data as (
select
    time_level,
    count(distinct order_id) total_orders,
    count(distinct captain_id) total_captain
    
from order_logs_snapshot
group by 1
),

captain_device_threshold as (
select    
    time_level,
    captain_id
from order_logs_snapshot    
where captain_obj_device_id in 
    (
    select distinct device_id from
    (
    select
        time_level,
        captain_obj_device_id device_id,
        count(distinct captain_id) mapped_captain
    from order_logs_snapshot
    where (captain_obj_device_id is not null and captain_obj_device_id <> '')
    group by 1,2
    having count(distinct captain_id) > 4
    )
    )
group by 1,2
),

fraud_cap as (
select 
    time_level, 
    count(distinct captain_id) fraud_captain,
    count(distinct order_id) fraud_captains_orders
from order_logs_snapshot
where captain_id in (select distinct captain_id from captain_device_threshold)
group by 1
),

incentive_cap as (
select 
    time_level,
    count(order_id) as incentive_orders, 
    sum(amount) as incentive_amount
from 
    (
    select  
        DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
        _id, rider_id captain_id, order_id,
        transaction_type,transaction_category, 
        cast(amount as double) as amount
    from captain.captain_transactions
    where 
        yyyymmdd BETWEEN '{start_date}' and '{end_date}'  
        and transaction_type = 'incentives'
        and status = 'success'
        and service in ('Auto', 'Link')
        and (amount <> 0 and amount is not null)
        
        and rider_id in (select distinct captain_id from captain_device_threshold)
    )
group by 1

),

credited_amount as (
select 
    time_level,
    sum(amount) as credited_amount
from
    (
    select 
        DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
        status, user_id captain_id, id, approver,amount
    from payments.redeem_transactions_snapshot
    where 
        yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
        and status in ('Credited')--, 'rejected')
        and user_id in (select distinct captain_id from captain_device_threshold)
    ) as a
group by 1
)


select 
    agg_data.*,
    fraud_cap.fraud_captain,
    fraud_cap.fraud_captains_orders,
    incentive_cap.incentive_orders,
    incentive_cap.incentive_amount,
    credited_amount.credited_amount    
from agg_data 
join fraud_cap on agg_data.time_level = fraud_cap.time_level
join incentive_cap on agg_data.time_level = incentive_cap.time_level
join credited_amount on agg_data.time_level = credited_amount.time_level

order by 1 desc

"""

In [9]:
df_code = pd.read_sql(query, conn) 

In [6]:
df_oct_dec = df_code
df_oct_dec

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2022-12-19,5815059,444841,2943,54211,2965,23839.0,1572972.0
1,2022-12-12,5811120,430847,2972,61642,3254,26914.0,1933120.0
2,2022-12-05,5641735,419509,2825,60870,3170,27738.0,2012259.0
3,2022-11-28,5740838,434425,2698,58865,3164,29276.0,2007600.0
4,2022-11-21,5602664,440964,2679,55311,2979,26909.0,1772319.0
5,2022-11-14,5638883,436582,2689,50064,2952,27315.0,1472528.0
6,2022-11-07,5434994,423777,2657,48105,3098,28158.0,1435861.0
7,2022-10-31,5229771,412705,2402,40797,2807,24824.0,1286665.0
8,2022-10-24,4100551,365981,2206,40486,2213,19650.0,1402867.0
9,2022-10-17,5519496,417785,2696,55012,3163,29790.0,1942492.0


In [10]:
df_dec_mar = df_code
df_dec_mar

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2023-03-20,5334222,421554,2124,36705,3631,29802.0,1035855.0
1,2023-03-13,5634719,421240,2269,41900,4068,32971.0,1131400.0
2,2023-03-06,5343264,403492,2324,43026,4198,33998.0,1269733.0
3,2023-02-27,5873616,431262,2579,48582,4513,35990.0,1646501.0
4,2023-02-20,5827884,434819,2588,50083,4496,35015.0,1655067.0
5,2023-02-13,5721947,427309,2838,51747,4784,39627.0,1635879.0
6,2023-02-06,5960368,430038,2926,59447,4800,38711.0,1938277.0
7,2023-01-30,5843379,424996,2858,61767,4488,36019.0,2042995.0
8,2023-01-23,5402057,411731,3039,61025,4191,33124.0,1821792.0
9,2023-01-16,5187101,392171,2731,59342,3718,28884.0,1795547.0


In [17]:
frames = [df_oct_dec, df_dec_mar]
result = pd.concat(frames)
result.to_csv("cap 2.csv", index = False)

In [18]:
result

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2022-12-19,5815059,444841,2943,54211,2965,23839.0,1572972.0
1,2022-12-12,5811120,430847,2972,61642,3254,26914.0,1933120.0
2,2022-12-05,5641735,419509,2825,60870,3170,27738.0,2012259.0
3,2022-11-28,5740838,434425,2698,58865,3164,29276.0,2007600.0
4,2022-11-21,5602664,440964,2679,55311,2979,26909.0,1772319.0
5,2022-11-14,5638883,436582,2689,50064,2952,27315.0,1472528.0
6,2022-11-07,5434994,423777,2657,48105,3098,28158.0,1435861.0
7,2022-10-31,5229771,412705,2402,40797,2807,24824.0,1286665.0
8,2022-10-24,4100551,365981,2206,40486,2213,19650.0,1402867.0
9,2022-10-17,5519496,417785,2696,55012,3163,29790.0,1942492.0
