# Fresh Products

In [1]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import seaborn as sns
import sys

# Move two levels up (to the project root) and append the `src` folder
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))

# Append src to sys.path
sys.path.append(src_path)

from src import query_engines, dataframe_visualizer

q = query_engines.QueryEngines()

## Q1: Identify all kinds of segmentation

In this first part I want to segment all the possible partners. In Specialties, TP, NTP, MFCs and All groceries. We also check what is the perc of migrated Store Address Ids for each of them.

### What is the distribution of segments_lvl 1?

In [8]:
q.prepare_query(
    'backup/possible_segmentations.sql'
    ,params=None
    ,to_load_file='possible_segmentations'
    ,load_from_to_load_file='possible_segmentations'
)

possible_segmentations  = q.query_run_starburst()

grouped = possible_segmentations.groupby('segment_1').agg(
    distinct_sad_id = ('store_address_id','nunique'),
    migrated_sad_id = ('is_migrated', lambda x: x.sum())
).reset_index()

grouped['perc_m'] = grouped['migrated_sad_id']/grouped['distinct_sad_id']
grouped.sort_values(by='distinct_sad_id', ascending=False)

Unnamed: 0,segment_1,distinct_sad_id,migrated_sad_id,perc_m
2,Specialties,15387,1442,0.093715
3,Top Partner,10546,8496,0.805614
1,Non Top Partner,5850,1682,0.287521
0,MFC,256,245,0.957031


### What is the distribution of segment_lvl 2?

In [9]:
grouped = possible_segmentations.groupby('segment_2').agg(
    distinct_sad_id = ('store_address_id','nunique'),
    migrated_sad_id = ('is_migrated', lambda x: x.sum())
).reset_index()

grouped['perc_m'] = grouped['migrated_sad_id']/grouped['distinct_sad_id']
grouped.sort_values(by='distinct_sad_id', ascending=False)

Unnamed: 0,segment_2,distinct_sad_id,migrated_sad_id,perc_m
0,Groceries Partner,16396,10178,0.620761
2,Specialties,15387,1442,0.093715
1,MFC,256,245,0.957031


# Q2: Rest of metrics

In [47]:
START_DATE = "'2024-07-01'"
END_DATE = "'2024-09-15'"
END_DATE_RETENTION = "'2024-10-15'"

params = [
    {'name':'start_date','value':START_DATE},
    {'name':'end_date','value':END_DATE},
    {'name':'end_date_retention','value':END_DATE_RETENTION}
]

q.prepare_query(
    'metrics.sql'
    ,params=params
    ,to_load_file='metrics'
    ,load_from_to_load_file=None
)

metrics  = q.query_run_starburst()
metrics.to_csv('outputs/metrics.csv')

Open the following URL in browser for the external authentication:
https://starburst.g8s-data-platform-prod.glovoint.com/oauth2/token/initiate/ccc4673de490e2945332f4a370ff82a75d1513f74e457103e06b684285d625ac


In [109]:
START_DATE = "'2024-07-01'"
END_DATE = "'2024-09-15'"
END_DATE_RETENTION = "'2024-10-15'"

params = [
    {'name':'start_date','value':START_DATE},
    {'name':'end_date','value':END_DATE},
    {'name':'end_date_retention','value':END_DATE_RETENTION}
]

q.prepare_query(
    'subsequent_all.sql'
    ,params=params
    ,to_load_file='subsequent_all'
    ,load_from_to_load_file=None
)

subsequent_all  = q.query_run_starburst()
subsequent_all

  df = pd.read_sql(self.tp__read_query, conn)


Open the following URL in browser for the external authentication:
https://starburst.g8s-data-platform-prod.glovoint.com/oauth2/token/initiate/8748e61abe1b688639265cf4320d67bbd3bd68aeb5942621ec23e6614d8f0364


Unnamed: 0,country,segment_2,f_subsequent_orders,nf_subsequent_orders,f_feedback_subsequent_orders,nf_feedback_subsequent_orders,n_f_feedback_subsequent_orders_no0,f_feedback_subsequent_orders_no0,n_nf_feedback_subsequent_orders_no0,nf_feedback_subsequent_orders_no0,n_rows,n_distinct_order_id
0,KG,Groceries Partner,3.122721,3.142644,2.574257,1.882353,51,5.098039,10,3.2,21051,21051
1,BA,Groceries Partner,2.804943,3.176738,2.357143,1.333333,23,4.304348,2,2.0,9620,9620
2,KE,MFC,8.240955,21.153826,2.942446,3.7,178,4.595506,34,5.441176,88051,88051
3,ME,Specialties,1.916667,2.120275,0.0,1.0,0,,1,1.0,2667,2667
4,PT,MFC,4.332486,3.629614,3.062356,2.258799,283,4.685512,276,3.952899,138563,138563
5,CI,MFC,4.29485,3.906178,3.171717,2.103448,135,4.651852,18,3.388889,35337,35337
6,ME,Groceries Partner,4.351816,3.217652,3.194805,1.833333,102,4.823529,4,2.75,22247,22247
7,KZ,MFC,4.743227,5.218475,3.959732,2.40625,98,6.020408,18,4.277778,57977,57977
8,RS,Specialties,2.356866,1.592056,0.625,1.0,4,1.25,2,1.5,3706,3706
9,MA,Groceries Partner,5.978839,5.168023,3.459774,3.682021,2216,5.103791,422,5.872038,535979,535979


In [98]:
subsequent_all.sort_values(by='orders_28d', ascending=False)

Unnamed: 0,p_creation_date,order_id,orders_28d
5318684,2024-07-10,100724661551,821
1718626,2024-07-09,100723806670,820
4599002,2024-07-09,100723490169,820
442077,2024-07-10,100724740092,820
2749679,2024-07-09,100723824890,820
...,...,...,...
1849933,2024-08-30,100834135883,0
1849934,2024-08-29,100830653090,0
4280704,2024-08-10,100792394728,0
1849935,2024-08-26,100825690656,0


In [86]:
subsequent_all.sum()

country                            ITBGHRPLGEMENGESUAROPTNGHRCIRSTNBAITITROPLUGES...
segment_2                          Groceries PartnerSpecialtiesGroceries PartnerG...
f_subsequent_orders                                                       1773575390
f_subsequent_customers                                                       3358566
nf_subsequent_orders                                                       969935710
f_subsequent_customers                                                       1492631
f_feedback_subsequent_orders                                                 1045792
f_feedback_subsequent_customers                                                19151
nf_feedback_subsequent_orders                                                 176523
f_feedback_subsequent_customers                                                 3061
n_rows                                                                       5902351
n_distinct_order_id                                              

In [83]:
START_DATE = "'2024-07-01'"
END_DATE = "'2024-09-15'"
END_DATE_RETENTION = "'2024-10-15'"

params = [
    {'name':'start_date','value':START_DATE},
    {'name':'end_date','value':END_DATE},
    {'name':'end_date_retention','value':END_DATE_RETENTION}
]

q.prepare_query(
    'subsequent_fresh.sql'
    ,params=params
    ,to_load_file='subsequent_fresh'
    ,load_from_to_load_file=None
)

subsequent_fresh  = q.query_run_starburst()
subsequent_fresh

Open the following URL in browser for the external authentication:
https://starburst.g8s-data-platform-prod.glovoint.com/oauth2/token/initiate/295a934f81d17cdbde58735ec1cda7efe3d1b69a2376bad6e2093716f6656202


In [84]:
subsequent_all

Unnamed: 0,country,segment_2,f_fsubsequent_orders,f_fsubsequent_customers,nf_fsubsequent_orders,f_fsubsequent_customers.1,f_feedback_fsubsequent_orders,f_feedback_fsubsequent_customers,nf_feedback_fsubsequent_orders,f_feedback_fsubsequent_customers.1,fn_rows,fn_distinct_order_id
0,CI,Specialties,11456,1220,1452,228,6,3,0,0,4259,4259
1,ES,Groceries Partner,62619247,742814,10265228,124209,200080,3618,7732,154,1084970,1084970
2,ME,Groceries Partner,981768,13784,143609,3109,7136,122,26,3,22257,22257
3,KZ,MFC,1358949,23287,882099,18544,4204,108,231,14,58116,58116
4,ME,Specialties,16319,761,3461,355,0,0,1,1,2671,2671
5,PT,Groceries Partner,15923802,180687,2909856,35873,65363,995,5263,59,289228,289228
6,MA,Groceries Partner,37448163,325344,11937840,114535,139176,2526,17220,386,536337,536337
7,BG,Groceries Partner,2457779,47024,283840,5787,11347,352,644,10,65626,65626
8,MD,Specialties,6759,519,16171,646,7,2,0,0,4076,4076
9,KG,Groceries Partner,397077,8166,170489,3664,2207,63,316,6,21078,21078


In [36]:
metrics

Unnamed: 0,p_creation_date,order_id,order_subvertical3_ret1
0,2024-07-01,100704824625,0
1,2024-08-02,100775039338,0
2,2024-09-07,100850941677,0
3,2024-08-31,100836979757,0
4,2024-10-06,100914513235,0
5,2024-07-10,100725637977,2
6,2024-07-19,100745110702,3
7,2024-07-20,100746338917,3
8,2024-09-16,100871066733,5
9,2024-09-25,100889638256,6


In [32]:
metrics

Unnamed: 0,_col0,_col1
0,5706152,5706152


In [29]:
metrics

Unnamed: 0,_col0,_col1
0,9249372,9249372


In [17]:
metrics

Unnamed: 0,_col0,_col1
0,5907066,5907066


In [24]:
metrics

Unnamed: 0,_col0,_col1
0,5706152,5706152
