In [1]:
from pyhive import presto
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import time

from datetime import datetime, timedelta

import seaborn as sns

conn = presto.connect(
    host='presto.processing.yoda.run', 
    ## presto.processing.yoda.run
    ## bi-presto.serving.data.production.internal
    port=80,
    username='manoj.ravirajan@rapido.bike'
)

In [7]:
start_date = '20221226'
end_date = '20230326' 

In [8]:
query = f"""


with order_logs_snapshot as (
select 
    DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
    service_obj_service_name service, 
    order_id, captain_id, customer_id, 
    captain_obj_device_id, customer_obj_device_device_id
from orders.order_logs_snapshot
where
    yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
    and service_obj_service_name in ('Auto', 'Link')
    and order_status = 'dropped' 
    and captain_obj_device_id != '00000000'
    and captain_obj_device_id != ''
    and captain_obj_device_id is not null 
),

agg_data as (
select
    time_level,
    count(distinct order_id) total_orders,
    count(distinct captain_id) total_captain
    
from order_logs_snapshot
group by 1
),

captain_device_threshold as (
select
    time_level,
    captain_id,
    count(distinct captain_obj_device_id) mapped_device
from order_logs_snapshot
where (captain_id is not null and captain_id <> '')
group by 1,2
having count(distinct captain_obj_device_id) > 4
),

fraud_cap as (
select 
    time_level, 
    count(distinct captain_id) fraud_captain,
    count(distinct order_id) fraud_captains_orders
from order_logs_snapshot
where captain_id in (select distinct captain_id from captain_device_threshold)
group by 1
),

incentive_cap as (
select 
    time_level,
    count(order_id) as incentive_orders, 
    sum(amount) as incentive_amount
from 
    (
    select  
        DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
        _id, rider_id captain_id, order_id,
        transaction_type,transaction_category, 
        cast(amount as double) as amount
    from captain.captain_transactions
    where 
        yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
        and transaction_type = 'incentives'
        and status = 'success'
        and service in ('Auto', 'Link')
        and (amount <> 0 and amount is not null)
        
        and rider_id in (select distinct captain_id from captain_device_threshold)
    )
group by 1

),

credited_amount as (
select 
    time_level,
    sum(amount) as credited_amount
from
    (
    select 
        DATE_FORMAT(DATE_TRUNC('week', CAST(date_parse(yyyymmdd, '%Y%m%d') AS DATE)), '%Y-%m-%d') time_level,
        status, user_id captain_id, id, approver,amount
    from payments.redeem_transactions_snapshot
    where 
        yyyymmdd BETWEEN '{start_date}' and '{end_date}' 
        and status in ('Credited')--, 'rejected')
        and user_id in (select distinct captain_id from captain_device_threshold)
    ) as a
group by 1
)

select 
    agg_data.*,
    fraud_cap.fraud_captain,
    fraud_cap.fraud_captains_orders,
    incentive_cap.incentive_orders,
    incentive_cap.incentive_amount,
    credited_amount.credited_amount    
from agg_data 
join fraud_cap on agg_data.time_level = fraud_cap.time_level
join incentive_cap on agg_data.time_level = incentive_cap.time_level
join credited_amount on agg_data.time_level = credited_amount.time_level

order by 1 desc


"""

In [9]:
df_code = pd.read_sql(query, conn)

In [6]:
df_oct_dec = df_code
df_oct_dec

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2022-12-19,5815059,444841,59,2327,69,585.0,69213.0894
1,2022-12-12,5811120,430847,75,2538,90,639.0,66802.6574
2,2022-12-05,5641735,419509,61,2317,89,674.0,75989.9205
3,2022-11-28,5740838,434425,58,2233,105,807.0,73141.6206
4,2022-11-21,5602664,440964,54,2344,80,572.0,84802.742
5,2022-11-14,5638883,436582,60,1971,93,764.0,56976.5868
6,2022-11-07,5434994,423777,52,1968,88,638.0,67190.1235
7,2022-10-31,5229771,412705,50,1747,94,747.0,66145.1155
8,2022-10-24,4100551,365981,62,1737,40,268.0,101243.7045
9,2022-10-17,5519496,417785,66,2006,70,605.0,104255.751


In [10]:
df_dec_mar = df_code
df_dec_mar

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2023-03-20,5334222,421554,27,468,52,370.0,20280.95
1,2023-03-13,5634719,421240,39,857,67,475.0,26137.4124
2,2023-03-06,5343264,403492,50,965,101,763.0,28384.5262
3,2023-02-27,5873616,431262,46,1011,113,807.0,32303.559
4,2023-02-20,5827884,434819,41,829,90,661.0,22272.9001
5,2023-02-13,5721947,427309,44,985,111,886.0,21070.083
6,2023-02-06,5960368,430038,50,1350,106,834.0,35041.844
7,2023-01-30,5843379,424996,55,1999,135,1044.0,71813.4018
8,2023-01-23,5402057,411731,53,1937,105,778.0,57746.7628
9,2023-01-16,5187101,392171,53,1952,84,574.0,51819.182


In [15]:

frames = [df_oct_dec, df_dec_mar]
result = pd.concat(frames)
result.to_csv("cap 1.csv", index = False)

In [16]:
result

Unnamed: 0,time_level,total_orders,total_captain,fraud_captain,fraud_captains_orders,incentive_orders,incentive_amount,credited_amount
0,2022-12-19,5815059,444841,59,2327,69,585.0,69213.0894
1,2022-12-12,5811120,430847,75,2538,90,639.0,66802.6574
2,2022-12-05,5641735,419509,61,2317,89,674.0,75989.9205
3,2022-11-28,5740838,434425,58,2233,105,807.0,73141.6206
4,2022-11-21,5602664,440964,54,2344,80,572.0,84802.742
5,2022-11-14,5638883,436582,60,1971,93,764.0,56976.5868
6,2022-11-07,5434994,423777,52,1968,88,638.0,67190.1235
7,2022-10-31,5229771,412705,50,1747,94,747.0,66145.1155
8,2022-10-24,4100551,365981,62,1737,40,268.0,101243.7045
9,2022-10-17,5519496,417785,66,2006,70,605.0,104255.751
