In [4]:
### changes for this commit
### google
### moving to replica pricing db

In [5]:
import pandas as pd
import pencilbox as pb

In [6]:
con_rs = pb.get_connection("[Warehouse] Redshift")
con_rep = pb.get_connection("[Replica] Pricing V3")

In [7]:
sql = """

-- list of all partner and grofersowned stores

WITH MERCHANT AS
  (SELECT DISTINCT M.id,
                   m.name,
                   m.gr_id,
                   m.enabled_flag,
                   L.NAME AS city
   FROM lake_grofers_db.view_gr_merchant M
   INNER JOIN lake_grofers_db.gr_merchant_additional_info MAI ON MAI.MERCHANT_ID = M.ID
   AND (MAI.MERCHANT_STORE_TYPE = 'grofers')
   INNER JOIN lake_grofers_db.view_gr_locality L2 ON M.LOCALITY_ID = L2.ID
   AND (M.NAME NOT ILIKE '%%GROCERY%%MART%%'
        AND M.NAME NOT ILIKE '%%FRESHBURY%%'
        AND M.NAME NOT ILIKE '%%test%%'
        AND M.NAME NOT ILIKE '%%donation%%')
   INNER JOIN lake_grofers_db.view_gr_locality L1 ON L2.PARENT_LOCALITY_ID = L1.ID
   INNER JOIN lake_grofers_db.view_gr_locality L ON L1.PARENT_LOCALITY_ID = L.ID
   AND L.ID <> 2051 ),
   
   -- fetching mid name, city name and mid details
   
     mapping AS
  ( SELECT DISTINCT vmp.virtual_merchant_id::int AS frontend_id,
                    m1.gr_id AS frontend_gr_id,
                    m1.name AS frontend_name,
                    m1.city AS frontend_city,
                    vmp.real_merchant_id::int AS backend_id,
                    m.gr_id AS backend_gr_id,
                    m.name AS backend_name,
                    m.city AS backend_city,
                    co.outlet_id AS retail_outlet_id,
                    c.name AS retail_outlet_name,
                    x.name AS retail_outlet_city
   FROM lake_grofers_db.gr_virtual_to_real_merchant_mapping vmp
   INNER JOIN merchant m ON m.id=vmp.real_merchant_id
   INNER JOIN merchant m1 ON m1.id=vmp.virtual_merchant_id
   INNER JOIN lake_retail.console_outlet_cms_store co ON co.cms_store=vmp.real_merchant_id
   AND co.active=1
   INNER JOIN lake_retail.console_outlet c ON c.id=co.outlet_id
   LEFT JOIN lake_retail.console_location x ON x.id=c.tax_location_id
   AND m1.enabled_flag=TRUE
   AND vmp.enabled_flag=TRUE
   WHERE x.name IS NOT NULL
     AND x.name <>''
   ORDER BY m1.city ),
   
   -- fetching latest product images
   
     images AS (
     SELECT *,
          row_number() OVER (PARTITION BY product_id
                             ORDER BY update_ts DESC) AS rrank
   FROM lake_grofers_db.gr_product_sliding_image
   WHERE sort_order = 1 ),
   
   -- creating a base of all customers in grofers db
   
   base as (select dim_customer_key as customer_id, cart_checkout_ts_ist,merchant_id, merchant_name,dim_customer_address_key 
   from dwh.fact_sales_order_details fs join dwh.dim_merchant m on 
fs.dim_frontend_merchant_key = m.merchant_key),

-- listing all mids with min 5 orders in the past 2 days

mid_min_condition as (
select
frontend_merchant_id merchant_id,
count(distinct order_id) order_cnt from
dwh.fact_sales_order_item_details
where order_create_ts_ist >= current_date-1 and order_current_status <> 'CANCELLED'
group by 1
having order_cnt >= 5
),


 -- matching the most recent order with each mid

merc as (
select  merchant_name, a.merchant_id ,max(cart_checkout_ts_ist) as dt from 
base a
inner join
mid_min_condition b on a.merchant_id = b.merchant_id
group by 1,2),


-- lat long for merchants

merch_lat_long as (select
frontend_merchant_name frontend_name,
backend_merchant_name backend_name,
frontend_merchant_city_name frontend_city,
backend_merchant_city_name backend_city,
b.external_id merchant_id,
a.latitude,
a.longitude,
a.city
from
lake_logistics.logistics_node_address a
inner join
lake_logistics.logistics_node b on a.id = b.node_address_id
inner join 
dwh.dim_merchant_outlet_facility_mapping c on b.external_id = c.frontend_merchant_id
group by 1,2,3,4,5,6,7,8
),

-- matching each mid with users that had the most recent cart checkout and supplying his/ her lat long

SS as (
select 
m.*, b.customer_id,b.dim_customer_address_key, o.latitude location_lat, o.longitude location_lon, o.city
from 
merc m 
inner join 
base b on m.merchant_id = b.merchant_id and m.dt = b.cart_checkout_ts_ist
inner join 
merch_lat_long o on m.merchant_id = o.merchant_id
group by 1,2,3,4,5,6,7,8),

-- for each city, getting the highest price of the product and sharing the lat long of the latest order with that mid

final_agg as (
SELECT 
mll.frontend_city||'_'||mpm.product_id as id,
mpm.product_id,
replace(p.name, '%%',' percent')|| ' - '||p.unit as title,
p.name as description,
'http://grofers.s3.amazonaws.com/'||psi.image_path as image_link,
'android-app://com.grofers.customerapp/grofers/product?expr=%%22p'||p.id||'%%22&restricted=false' as mobile_android_app_link,
case when mpm.inventory_limit = 0 then 'out of stock'
when mpm.inventory_limit > 0 then 'in stock' end as availability,
mll.frontend_name AS frontend_name,
mll.merchant_id frontend_id,
p.type as custom_label_4,
p.brand as brand,
cat.name as google_product_category,
'new' as condition,
'no' as adult,
'no' as identifier_exists,
mll.frontend_city as custom_label_3,
'https://blinkit.com/prn/'||lower(replace(replace(p.name,' ','-'),'%%','percent'))||'/prid/'||p.id||'?lat='||location_lat||'&lon='||location_lon as link,
mpm.price as SP,
mpm.mrp,
mpm.product_id as item_group,
'' as custom_label_1,
'home'|| ' > '||cat.name|| ' > '||cat1.name|| ' > '||cat2.name as product_type,
lower(mll.frontend_city) as city,
location_lat as lat,
location_lon as lon,
'grofers://pdp?productId='||mpm.product_id as ios_url,
'grofers://pdp?productId='||mpm.product_id as android_url,
l0_category,
l1_category,
l2_category,
product_type ptype,
ss.merchant_id,
ss.location_lat,
ss.location_lon,
ss.city city_ss,
row_number() over(partition by mll.frontend_city,mpm.product_id order by mpm.price desc ) as S_no
FROM 
lake_grofers_db.gr_merchant_product_mapping mpm
INNER JOIN 
merch_lat_long mll on mll.merchant_id = mpm.merchant_id and mpm.enabled_flag=true and mpm.inventory_limit>0
INNER JOIN 
lake_grofers_db.gr_product p ON mpm.product_id = p.id--now mapping the category
inner join 
images psi on mpm.product_id = psi.product_id and rrank = 1
INNER JOIN 
lake_grofers_db.gr_product_category_mapping PCM ON P.ID = PCM.PRODUCT_ID
inner join 
ss on mll.merchant_id = ss.merchant_id
INNER JOIN 
lake_grofers_db.gr_category CAT2 ON PCM.CATEGORY_ID = CAT2.ID AND PCM.IS_PRIMARY=TRUE
INNER JOIN 
lake_grofers_db.gr_category CAT1 ON CAT2.PARENT_CATEGORY_ID = CAT1.ID
INNER JOIN 
lake_grofers_db.gr_category CAT ON CAT1.PARENT_CATEGORY_ID = CAT.ID and CAT.id not in (343,909)
inner join 
lake_rpc.item_product_mapping ipm on ipm.product_id=mpm.product_id 
inner join
dwh.dim_product dp on dp.product_id = mpm.product_id 
WHERE p.enabled_flag=TRUE
and dp.is_current = True
and psi.sort_order=1
and mpm.price>0
and mpm.mrp is not null
and mll.backend_name not ilike 'Super Store - B2B NCR Warehouse'
and mll.backend_city not ilike 'Haridwar'
and mll.Frontend_City not in ('%%Not in service area%%','durgapur')
group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,mpm.update_ts)
   
Select
 product_id,
 frontend_id,
 id,
 title,
 description,
 image_link,
 mobile_android_app_link,
 availability,
 ptype as custom_label_4,
 brand,
 product_type as google_product_category,
 condition,
 adult,
 identifier_exists,
 city as custom_label_3,
 link,
 SP as sale_price,
 mrp as price,
 item_group,
 l1_category as custom_label_1,
 ptype as product_type,
 city
FROM final_agg
where S_no = 1
group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22


"""

In [8]:
df = pd.read_sql(sql=sql, con=con_rs)
df.head()

Unnamed: 0,product_id,frontend_id,id,title,description,image_link,mobile_android_app_link,availability,custom_label_4,brand,...,adult,identifier_exists,custom_label_3,link,sale_price,price,item_group,custom_label_1,product_type,city
0,94055,30727,Ahmedabad_94055,McVitie's Digestive Biscuit - 1 kg,McVitie's Digestive Biscuit,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Biscuit,McVitie's,...,no,no,ahmedabad,https://blinkit.com/prn/mcvitie's-digestive-bi...,156.0,179.0,94055,healthy & digestive,Biscuit,ahmedabad
1,403804,30591,Ahmedabad_403804,Basic Chana Dal - 1 kg,Basic Chana Dal,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Chana Dal,Basic,...,no,no,ahmedabad,https://blinkit.com/prn/basic-chana-dal/prid/4...,80.0,120.0,403804,"toor, urad & chana",Chana Dal,ahmedabad
2,409538,30560,Ahmedabad_409538,Parle Platina Hide & Seek Choco Rolls Sandwich...,Parle Platina Hide & Seek Choco Rolls Sandwich...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Sandwich Cream Biscuits,Parle Platina,...,no,no,ahmedabad,https://blinkit.com/prn/parle-platina-hide-&-s...,81.0,120.0,409538,cream biscuits,Sandwich Cream Biscuits,ahmedabad
3,441861,30797,Ahmedabad_441861,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Incense Sticks,Herbal Strategi,...,no,no,ahmedabad,https://blinkit.com/prn/herbal-strategi---arom...,21.0,40.0,441861,pooja needs,Incense Sticks,ahmedabad
4,459566,30727,Ahmedabad_459566,Pond's Bright Beauty Spot-Less Glow & Germ Rem...,Pond's Bright Beauty Spot-Less Glow & Germ Rem...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Face Wash,Pond's,...,no,no,ahmedabad,https://blinkit.com/prn/pond's-bright-beauty-s...,456.0,570.0,459566,facial care,Face Wash,ahmedabad


In [9]:
### tobacco products
df.shape

(86439, 22)

In [10]:
sql_tob = """

select product_id from
((select
product_id
from dwh.dim_product where is_current = true 
and lower(product_type) in ('paan','Cigarette','Hookah Flavor','Gulkand') )
union all
(select* from metrics.nicotine_products) ) group by 1

"""

df_tob = pd.read_sql(sql=sql_tob, con=con_rs)
df_tob.head()

Unnamed: 0,product_id
0,228277
1,157145
2,69112
3,91378
4,89263


In [11]:
df_tob_ll = df_tob.product_id.to_list()

In [12]:
df = df[(df.product_id.isin(df_tob_ll) == False)]
df.shape

(86045, 22)

In [13]:
df.head()

Unnamed: 0,product_id,frontend_id,id,title,description,image_link,mobile_android_app_link,availability,custom_label_4,brand,...,adult,identifier_exists,custom_label_3,link,sale_price,price,item_group,custom_label_1,product_type,city
0,94055,30727,Ahmedabad_94055,McVitie's Digestive Biscuit - 1 kg,McVitie's Digestive Biscuit,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Biscuit,McVitie's,...,no,no,ahmedabad,https://blinkit.com/prn/mcvitie's-digestive-bi...,156.0,179.0,94055,healthy & digestive,Biscuit,ahmedabad
1,403804,30591,Ahmedabad_403804,Basic Chana Dal - 1 kg,Basic Chana Dal,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Chana Dal,Basic,...,no,no,ahmedabad,https://blinkit.com/prn/basic-chana-dal/prid/4...,80.0,120.0,403804,"toor, urad & chana",Chana Dal,ahmedabad
2,409538,30560,Ahmedabad_409538,Parle Platina Hide & Seek Choco Rolls Sandwich...,Parle Platina Hide & Seek Choco Rolls Sandwich...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Sandwich Cream Biscuits,Parle Platina,...,no,no,ahmedabad,https://blinkit.com/prn/parle-platina-hide-&-s...,81.0,120.0,409538,cream biscuits,Sandwich Cream Biscuits,ahmedabad
3,441861,30797,Ahmedabad_441861,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Incense Sticks,Herbal Strategi,...,no,no,ahmedabad,https://blinkit.com/prn/herbal-strategi---arom...,21.0,40.0,441861,pooja needs,Incense Sticks,ahmedabad
4,459566,30727,Ahmedabad_459566,Pond's Bright Beauty Spot-Less Glow & Germ Rem...,Pond's Bright Beauty Spot-Less Glow & Germ Rem...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Face Wash,Pond's,...,no,no,ahmedabad,https://blinkit.com/prn/pond's-bright-beauty-s...,456.0,570.0,459566,facial care,Face Wash,ahmedabad


In [14]:
sql = """SELECT cms_product_id product_id, cast(frontend_id as varchar) frontend_id, mrp actual_mrp, price actual_price FROM pricing_domain_pricerecommendation"""

In [15]:
df_prices = pd.read_sql(sql=sql, con=con_rep)
df_prices.head()

Unnamed: 0,product_id,frontend_id,actual_mrp,actual_price
0,243,30251,310.0,274.0
1,91027,30251,295.0,236.0
2,23664,30299,255.0,253.0
3,3570,30362,110.0,105.0
4,289190,31306,75.0,75.0


In [16]:
merged_data = df.merge(df_prices, how="inner", on=["product_id", "frontend_id"])
merged_data.head()

Unnamed: 0,product_id,frontend_id,id,title,description,image_link,mobile_android_app_link,availability,custom_label_4,brand,...,custom_label_3,link,sale_price,price,item_group,custom_label_1,product_type,city,actual_mrp,actual_price
0,94055,30727,Ahmedabad_94055,McVitie's Digestive Biscuit - 1 kg,McVitie's Digestive Biscuit,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Biscuit,McVitie's,...,ahmedabad,https://blinkit.com/prn/mcvitie's-digestive-bi...,156.0,179.0,94055,healthy & digestive,Biscuit,ahmedabad,179.0,156.0
1,403804,30591,Ahmedabad_403804,Basic Chana Dal - 1 kg,Basic Chana Dal,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Chana Dal,Basic,...,ahmedabad,https://blinkit.com/prn/basic-chana-dal/prid/4...,80.0,120.0,403804,"toor, urad & chana",Chana Dal,ahmedabad,120.0,77.0
2,409538,30560,Ahmedabad_409538,Parle Platina Hide & Seek Choco Rolls Sandwich...,Parle Platina Hide & Seek Choco Rolls Sandwich...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Sandwich Cream Biscuits,Parle Platina,...,ahmedabad,https://blinkit.com/prn/parle-platina-hide-&-s...,81.0,120.0,409538,cream biscuits,Sandwich Cream Biscuits,ahmedabad,120.0,81.0
3,441861,30797,Ahmedabad_441861,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Incense Sticks,Herbal Strategi,...,ahmedabad,https://blinkit.com/prn/herbal-strategi---arom...,21.0,40.0,441861,pooja needs,Incense Sticks,ahmedabad,40.0,40.0
4,127891,31021,Bengaluru_127891,Peppy Cheese Balls - 60 g,Peppy Cheese Balls,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Cheese Balls,Peppy,...,bengaluru,https://blinkit.com/prn/peppy-cheese-balls/pri...,45.0,60.0,127891,chips & crisps,Cheese Balls,bengaluru,60.0,45.0


In [17]:
merged_data.shape

(82405, 24)

In [18]:
merged_data["price"] = merged_data["actual_mrp"].astype(int)
merged_data["sale_price"] = merged_data["actual_price"].astype(int)
df = merged_data.copy()
df.head()

Unnamed: 0,product_id,frontend_id,id,title,description,image_link,mobile_android_app_link,availability,custom_label_4,brand,...,custom_label_3,link,sale_price,price,item_group,custom_label_1,product_type,city,actual_mrp,actual_price
0,94055,30727,Ahmedabad_94055,McVitie's Digestive Biscuit - 1 kg,McVitie's Digestive Biscuit,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Biscuit,McVitie's,...,ahmedabad,https://blinkit.com/prn/mcvitie's-digestive-bi...,156,179,94055,healthy & digestive,Biscuit,ahmedabad,179.0,156.0
1,403804,30591,Ahmedabad_403804,Basic Chana Dal - 1 kg,Basic Chana Dal,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Chana Dal,Basic,...,ahmedabad,https://blinkit.com/prn/basic-chana-dal/prid/4...,77,120,403804,"toor, urad & chana",Chana Dal,ahmedabad,120.0,77.0
2,409538,30560,Ahmedabad_409538,Parle Platina Hide & Seek Choco Rolls Sandwich...,Parle Platina Hide & Seek Choco Rolls Sandwich...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Sandwich Cream Biscuits,Parle Platina,...,ahmedabad,https://blinkit.com/prn/parle-platina-hide-&-s...,81,120,409538,cream biscuits,Sandwich Cream Biscuits,ahmedabad,120.0,81.0
3,441861,30797,Ahmedabad_441861,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Incense Sticks,Herbal Strategi,...,ahmedabad,https://blinkit.com/prn/herbal-strategi---arom...,40,40,441861,pooja needs,Incense Sticks,ahmedabad,40.0,40.0
4,127891,31021,Bengaluru_127891,Peppy Cheese Balls - 60 g,Peppy Cheese Balls,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Cheese Balls,Peppy,...,bengaluru,https://blinkit.com/prn/peppy-cheese-balls/pri...,45,60,127891,chips & crisps,Cheese Balls,bengaluru,60.0,45.0


In [19]:
del df["product_id"]
del df["frontend_id"]
del df["actual_mrp"]
del df["actual_price"]
df.head()

Unnamed: 0,id,title,description,image_link,mobile_android_app_link,availability,custom_label_4,brand,google_product_category,condition,adult,identifier_exists,custom_label_3,link,sale_price,price,item_group,custom_label_1,product_type,city
0,Ahmedabad_94055,McVitie's Digestive Biscuit - 1 kg,McVitie's Digestive Biscuit,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Biscuit,McVitie's,home > bakery & biscuits > bakery & biscuits >...,new,no,no,ahmedabad,https://blinkit.com/prn/mcvitie's-digestive-bi...,156,179,94055,healthy & digestive,Biscuit,ahmedabad
1,Ahmedabad_403804,Basic Chana Dal - 1 kg,Basic Chana Dal,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Chana Dal,Basic,"home > atta, rice & dal > toor, urad & chana >...",new,no,no,ahmedabad,https://blinkit.com/prn/basic-chana-dal/prid/4...,77,120,403804,"toor, urad & chana",Chana Dal,ahmedabad
2,Ahmedabad_409538,Parle Platina Hide & Seek Choco Rolls Sandwich...,Parle Platina Hide & Seek Choco Rolls Sandwich...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Sandwich Cream Biscuits,Parle Platina,home > bakery & biscuits > bakery & biscuits >...,new,no,no,ahmedabad,https://blinkit.com/prn/parle-platina-hide-&-s...,81,120,409538,cream biscuits,Sandwich Cream Biscuits,ahmedabad
3,Ahmedabad_441861,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,Herbal Strategi - Aromatic Vetiver Agarbatti/ ...,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Incense Sticks,Herbal Strategi,home > home & office > pooja needs > incense s...,new,no,no,ahmedabad,https://blinkit.com/prn/herbal-strategi---arom...,40,40,441861,pooja needs,Incense Sticks,ahmedabad
4,Bengaluru_127891,Peppy Cheese Balls - 60 g,Peppy Cheese Balls,http://grofers.s3.amazonaws.com/app/images/pro...,android-app://com.grofers.customerapp/grofers/...,in stock,Cheese Balls,Peppy,home > munchies > chips & crisps > crisps & puffs,new,no,no,bengaluru,https://blinkit.com/prn/peppy-cheese-balls/pri...,45,60,127891,chips & crisps,Cheese Balls,bengaluru


In [20]:
### checking if all df is non empty
assert not (df.empty or df[df.availability == "in stock"].empty)

In [21]:
print(df.info(verbose=True))

<class 'pandas.core.frame.DataFrame'>
Int64Index: 82405 entries, 0 to 82404
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   id                       82405 non-null  object
 1   title                    82405 non-null  object
 2   description              82405 non-null  object
 3   image_link               82405 non-null  object
 4   mobile_android_app_link  82405 non-null  object
 5   availability             82405 non-null  object
 6   custom_label_4           82405 non-null  object
 7   brand                    82119 non-null  object
 8   google_product_category  82405 non-null  object
 9   condition                82405 non-null  object
 10  adult                    82405 non-null  object
 11  identifier_exists        82405 non-null  object
 12  custom_label_3           82405 non-null  object
 13  link                     82405 non-null  object
 14  sale_price               82405 non-nul

In [22]:
print(df.shape)

(82405, 20)


In [23]:
### adding dag run flag condition

In [24]:
sheet_id = "1-uNp4c9uuGrWibaG2MHThepvx-o4Jctke3DpeS5Zsxg"
sheet_name = "flags"
flags_df = pb.from_sheets(sheet_id, sheet_name)
var = flags_df[flags_df.Name == "google_s3"]

In [25]:
var.Flag.to_list()[0] == "1"

True

In [26]:
for city, city_df in df.groupby("custom_label_3"):
    filename = city.lower() + "_google.csv"
    filepath = f"/tmp/{filename}"
    city_df.to_csv(filepath, index=False)
    print(
        "Dumping data for {city} to {filename} with size : {size}".format(
            city=city, filename=filename, size=city_df.shape
        )
    )
    if var.Flag.to_list()[0] == "1":
        print("Running now")
        pb.to_s3(filepath, "grofers-facebook-product-feed", filename)
    else:
        print("No runtime due to flag")

Dumping data for ahmedabad to ahmedabad_google.csv with size : (4172, 20)
Running now
Dumping data for bengaluru to bengaluru_google.csv with size : (5713, 20)
Running now
Dumping data for chandigarh to chandigarh_google.csv with size : (2530, 20)
Running now
Dumping data for chennai to chennai_google.csv with size : (3075, 20)
Running now
Dumping data for delhi to delhi_google.csv with size : (5690, 20)
Running now
Dumping data for faridabad to faridabad_google.csv with size : (4677, 20)
Running now
Dumping data for hr-ncr to hr-ncr_google.csv with size : (5804, 20)
Running now
Dumping data for hyderabad to hyderabad_google.csv with size : (5472, 20)
Running now
Dumping data for jaipur to jaipur_google.csv with size : (3834, 20)
Running now
Dumping data for jalandhar to jalandhar_google.csv with size : (2808, 20)
Running now
Dumping data for kanpur to kanpur_google.csv with size : (3393, 20)
Running now
Dumping data for kolkata to kolkata_google.csv with size : (5068, 20)
Running now
