# SKU migration

## Parameters

In [1]:
import sys
sys.path.append('/Users/efraflores/Desktop/hub/cornershop/venv/lib/python3.9/site-packages')

In [2]:
import os
import numpy as np
import pandas as pd
import datetime

BASE_DIR = '/Users/efraflores/Desktop/EF/Corner/BR/SKU_migration/'
week_number = str(datetime.datetime.today().isocalendar()[1]).zfill(2)

STORE_NAME = 'BIG'
BARCODES_COL,OLD_COL,NEW_COL = 'codigoproduto','upcwm_dv','upcsad_dv'

#STORE_NAME = 'Sams'
#BARCODES_COL,OLD_COL,NEW_COL = ' skubig ','UPC + DV OLD','UPC + DV NEW'

#To deactivate
LAST_VISIT_COL = 'max_last_visited'
DOWNLOAD_DATE = datetime.datetime.now().date()
DAYS = 4

## Old sku vs catalog

In [3]:
#Change the Sams format to the one the pipeline needs
try:
    pd.ExcelFile(os.path.join(BASE_DIR,STORE_NAME+'.xlsx')).parse(0)[[BARCODES_COL,OLD_COL,NEW_COL]].dropna().astype(int).to_csv(os.path.join(BASE_DIR,'01_old_vs_new.csv'),index=False)
except: pass

In [3]:
new = pd.read_csv(os.path.join(BASE_DIR,'01_old_vs_new.csv'))
new.drop_duplicates(subset=BARCODES_COL,inplace=True)
new.rename(columns={OLD_COL:'old',NEW_COL:'new'},inplace=True)

catalog = pd.read_csv(os.path.join(BASE_DIR,'02_catalog.csv'))
catalog.drop_duplicates(subset='catalog_product_id',inplace=True)

old_vs_cat = catalog.merge(new,left_on='sku',right_on='old')

"'"+"','".join([str(x) for x in old_vs_cat['new']])+"'"

"'25133','4770','40000681533','65719','19996','53839','50906','51354','37877','58643','32490','10016','58735','39895','39338','19491','19460','55291','55314','41614','39321','23498','10108','10153','10221','25607','17619','31677','53426','69311','50104','50050','50067','28066','55475','321','60875','40853','36948','41348','64699','67539','53617','57165','30694','36092','31257','58155','6842','48309','71406','38096','529','840','38935','64262','22507','63043','25461','41669','38683','15493','41836','50029','15547','51941','59572','7511','41638','48996','14106','29827','51699','10467','40000899730','34692','41768','22613','4299','47876','10191','49177','32360','19170','40000899518','40000899709','49887','1663','307','4343','32506','52375','7849','26420','49825','42208','54324','54928','7092','52849','10443','12447','13024','10504','6835','25362','65818','26130','51569','48804','39543','9485','31516','69304','25263','4633','17923','40433','37440','40000114642','7450','55437','56014','5541

## Map new sku

In [4]:
mapped = pd.read_csv(os.path.join(BASE_DIR,'03_mapped_sku.csv')).drop_duplicates()
sku_migration = old_vs_cat.merge(mapped, on='new')[['new','catalog_product_id','store_id']]
sku_migration.rename(columns={'new':'sku','catalog_product_id':'product_id'},inplace=True)

sku_migration.to_csv(os.path.join(BASE_DIR,f'results/skumigration_week{week_number}_{STORE_NAME}.csv'),index=False)
sku_migration.sample()

Unnamed: 0,sku,product_id,store_id
0,30762,3741130,1481


## Deactivate products

In [5]:
supply = pd.read_csv(os.path.join(BASE_DIR,'04_supply.csv'))
full = pd.read_csv(os.path.join(BASE_DIR,'05_full.csv'),sep='\t',encoding='utf-16')
full.columns = [x.strip().lower().replace(' ','_') for x in full.columns]
deact = supply.merge(full[['branch_id','product_id']])
deact[LAST_VISIT_COL] = pd.to_datetime(deact[LAST_VISIT_COL])

print(f'There were {len(deact)} rows')
deact = deact.merge(deact.groupby('product_branch_id')[LAST_VISIT_COL].max(),
                    on=['product_branch_id',LAST_VISIT_COL])
print(f'Just with the product_branch - max_date, there are {len(deact)} rows')

deact['deactivate_days'] = ((DOWNLOAD_DATE-pd.to_datetime(deact[LAST_VISIT_COL]).dt.date)
                            /np.timedelta64(1,'D')).astype(int)
deact = deact[deact['deactivate_days']>DAYS].copy()
print(f'Now, there are just {len(deact)} product-branches with last-visited greater than {DAYS} day(s)')

deact = deact[~deact['product_id'].isin(sku_migration['product_id'])
             ][['store_id','product_branch_id']].astype(int)
deact[['price','availability','schedule']] = '','UNAVAILABLE',''
print(f'Finally, there are {len(deact)} product-branches to deactivate, without the ones that we are mapping')

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


There were 112728 rows
Just with the product_branch - max_date, there are 112728 rows
Now, there are just 105003 product-branches with last-visited greater than 4 day(s)
Finally, there are 104993 product-branches to deactivate, without the ones that we are mapping


In [21]:
#Batches of 10,000
aux = []
n = 10000
for i,row in enumerate(deact.index):
  aux.append(deact.loc[row,:])
  if (i+1) % n == 0:
    pd.DataFrame(aux).to_excel(os.path.join(BASE_DIR,f'results/bulk_week{week_number}_{STORE_NAME}_{str((i+1)//n).zfill(2)}.xlsx'),index=False)
    aux = []
  elif i+2 > len(deact):
    pd.DataFrame(aux).to_excel(os.path.join(BASE_DIR,f'results/bulk_week{week_number}_{STORE_NAME}_{str((i+1)//n+1).zfill(2)}.xlsx'),index=False)

deact.sample(4)

Unnamed: 0,store_id,product_branch_id,price,availability,schedule
408,1480,100019424,,UNAVAILABLE,
3098,1480,94291689,,UNAVAILABLE,
2192,1480,102699800,,UNAVAILABLE,
6160,1480,186834158,,UNAVAILABLE,


## Unmap old sku

In [17]:
unmap = pd.read_csv(os.path.join(BASE_DIR,'06_unmap.csv'))
unmap['max_last_visited'] = pd.to_datetime(pd.to_datetime(unmap['max_last_visited']).dt.date)

unmap = unmap.groupby(['sku_source','catalog_product_id'])['max_last_visited'].min().reset_index().merge(unmap)
unmap = unmap[unmap['max_last_visited']<pd.to_datetime(DOWNLOAD_DATE-datetime.timedelta(DAYS))]
print(f'There are {len(unmap)} products with last_visited greater than {DAYS} days')
unmap = unmap.merge(new[['old']].rename(columns={'old':'sku'}))
print(f'And {len(unmap)} of those, in the migration file (old_vs_new)')

unmap[['supply_product_id']].to_csv(os.path.join(BASE_DIR,f'results/unmap_week{week_number}.csv'),index=False)
unmap.sample(4)

There are 892 products with last_visited greater than 4 days
And 125 of those, in the migration file (old_vs_new)


Unnamed: 0,sku_source,catalog_product_id,max_last_visited,supply_product_id,sku
121,br-nacional-csv,2759047,2021-02-24,13047264,707343.0
62,br-bigbompreco-csv,2742732,2021-02-16,18566173,413077.0
25,br-bigbompreco-csv,2742031,2021-03-24,19310606,69908.0
114,br-nacional-csv,2704415,2021-02-08,13042632,205238000000.0
