# NIKE CONSOLIDATION
### Cre: Dang Tran

# PHASE 1: COMBINE AND TRANSFORM DATA

In [1]:
import time
start_time = time.time()

## 1. Import libraries

In [2]:
import pandas as pd
import numpy as np
import pyodbc

## 2. Connect Database

In [3]:
conn = pyodbc.connect(
    'Driver={SQL Server};'
    'Server=ACFC-L-DANGNIKE\SQLEXPRESS;'
    'Database=NIKE;'
    'Trusted_Connection=yes;')

## 3. Load Data from SQL Server

### 3.1. Load Sales Data

In [4]:
sale = pd.read_sql_query(
    """DECLARE @ld date, @fd date
    SET @ld =  (select max(created_date) from SALES_DATA)
    SET @fd = DATEADD(day,-28, @ld)
    select STORE_CODE, S.UPC, SKU, SIZE, CREATED_DATE, QTY SOLD_QTY
    from 
        SALES_DATA S,
        MASTER_UPC U
    where 
        S.UPC=U.UPC AND
        STORE_CODE LIKE '3%' AND
        CREATED_DATE between @fd AND @ld""",
    conn)
sale.head()



Unnamed: 0,STORE_CODE,UPC,SKU,SIZE,CREATED_DATE,SOLD_QTY
0,346,30673092255,NESS9129-903,32,2022-12-18,1.0
1,346,30673143520,NESSA386-001,S,2022-12-14,1.0
2,346,30673143551,NESSA386-001,XL,2023-01-04,1.0
3,346,30673143582,NESSA386-100,S,2022-12-14,1.0
4,351,30673143605,NESSA386-100,L,2022-12-15,1.0


### 3.2. Load Stock Data

In [5]:
stock = pd.read_sql_query(
    """select STORE_CODE, S.UPC, SKU, SIZE, (ASN_PENDING+CLOSING) STOCK_QTY
    from STOCK S, MASTER_UPC U
    where 
        STORE_CODE LIKE '3%' AND 
        (ASN_PENDING+CLOSING) >0 AND
        U.UPC=S.UPC""",
    conn)
stock.head()



Unnamed: 0,STORE_CODE,UPC,SKU,SIZE,STOCK_QTY
0,352,884776237347,AV4789-101,11,1.0
1,330,884776237347,AV4789-101,11,1.0
2,353,194502688207,CV2474-010,XS,1.0
3,374,194502688207,CV2474-010,XS,1.0
4,343,194502688207,CV2474-010,XS,1.0


### 3.3. Load Onstore Date 

In [6]:
shipin=pd.read_sql_query(
    """
    DECLARE @fd date, @ld date
	set @ld = (select max(created_date) from SALES_DATA)
    SET @fd = dateadd(day,-28,@ld)
    select SKU, STORE_CODE, MIN([Requested Delivery Date ]) OSD
    from STORE_SHIPIN S, MASTER_UPC U
    where
        U.UPC=S.[Product Code] and
        ([Requested Delivery Date ] between @fd and @ld) and
        STORE_CODE like '3%'
    GROUP BY SKU, STORE_CODE
    """,
    conn)
shipin.head()



Unnamed: 0,SKU,STORE_CODE,OSD
0,831559-011,304,2022-12-18
1,943091-010,304,2022-12-18
2,943092-010,304,2022-12-18
3,BQ4632-606,304,2022-12-18
4,BQ4639-001,304,2022-12-18


### 3.4. Load target

In [None]:
target = pd.read_sql_query(
    """
    DECLARE @m int, @y int
	set @m = (select month(max(created_date)) from SALES_DATA)
    SET @y = (select year(max(created_date)) from SALES_DATA)
    select
        STORE_CODE,
        sum([TARGET RETAIL]) TARGET,
        ROW_NUMBER() OVER(ORDER BY sum([TARGET RETAIL]) desc) AS STORE_RANK
    from TARGET
    where
        month([Receipt date])=@m and
        year([Receipt date])=@y
    group by STORE_CODE
    """,
    conn)
target.head()



Unnamed: 0,STORE_CODE,TARGET,STORE_RANK
0,351,7800000000.0,1
1,365,6800000000.0,2
2,311,4700000000.0,3
3,324,3800000000.0,4
4,346,3400000000.0,5


### 3.5. Load Group Store

In [None]:
group_store=pd.read_csv('D:/OneDrive/ACFC/NIKE - Documents/From G Suite Drive/5. MERCHANDISING TEAM/CONSOLIDATE/GROUP DOOR.csv')
group_store.head()

Unnamed: 0,STORE_CODE,Short Name,Door Level,CITY,GROUP,STORE RANK
0,351,SGC,NSP Best,HCM,G04,1
1,365,VCDK,NSP Best,HCM,G05,2
2,311,CRM,NSP Best,HCM,G06,3
3,324,VCBT,NSP Best,HA NOI,G08,4
4,346,HD,OUTLET,HCM,G05,5


In [None]:
group_store['STORE_CODE']=group_store['STORE_CODE'].astype(str)
group_store.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   STORE_CODE  45 non-null     object
 1   Short Name  45 non-null     object
 2   Door Level  45 non-null     object
 3   CITY        45 non-null     object
 4   GROUP       45 non-null     object
 5   STORE RANK  45 non-null     int64 
dtypes: int64(1), object(5)
memory usage: 2.2+ KB


In [None]:
first_date=min(sale['CREATED_DATE'])
last_date=max(sale['CREATED_DATE'])
print(first_date,last_date)

2022-12-11 00:00:00 2023-01-08 00:00:00


## 4. Combining tables

### 4.1. Merge SALE data and SHIPIN

In [None]:
# Group Sale table by SKU and STORE_CODE
group_sale=sale.groupby(
    ['STORE_CODE','SKU'],
    as_index=False).apply(
        lambda x:pd.Series({
            'SOLD_QTY':x.SOLD_QTY.sum(),
            'FIRST_SOLD_DATE':x.CREATED_DATE.min()}))

group_sale=pd.merge(
    group_sale,
    shipin,
    on=['STORE_CODE','SKU'],
    how='left')


# Add WOS
group_sale['FIRST_DATE']=np.where(
    group_sale['OSD']<group_sale['FIRST_SOLD_DATE'], 
    group_sale['OSD'], 
    group_sale['FIRST_SOLD_DATE'])

group_sale['WOS']=((pd.to_datetime(last_date) - pd.to_datetime(group_sale['FIRST_DATE'])) / np.timedelta64(1, 'D')+1)/7

#add WKLY_SOLD
group_sale['WKLY_SOLD']=group_sale['SOLD_QTY']/group_sale['WOS']

group_sale.head()

Unnamed: 0,STORE_CODE,SKU,SOLD_QTY,FIRST_SOLD_DATE,OSD,FIRST_DATE,WOS,WKLY_SOLD
0,304,831559-011,1.0,2022-12-23,2022-12-18,2022-12-18,3.142857,0.318182
1,304,943091-010,3.0,2022-12-23,2022-12-18,2022-12-18,3.142857,0.954545
2,304,943092-010,1.0,2023-01-08,2022-12-18,2022-12-18,3.142857,0.318182
3,304,AJ8129-010,2.0,2022-12-11,,1670716800000000000,4.142857,0.482759
4,304,AO2810-104,3.0,2022-12-23,,1671753600000000000,2.428571,1.235294


### 4.2. Merge STOCK with Sales

In [None]:
group_stock=stock.groupby(['STORE_CODE','SKU'],as_index=False)['STOCK_QTY'].sum()

# Merge tbl with shipin
tbl=pd.merge(
    group_stock,
    group_sale[['STORE_CODE','SKU','WKLY_SOLD']],
    on=['STORE_CODE','SKU'],
    how='left')

# Replace NaN in WKLYSOLD by 0
tbl['WKLY_SOLD'].fillna(0, inplace=True)

tbl.head()

Unnamed: 0,STORE_CODE,SKU,STOCK_QTY,WKLY_SOLD
0,304,343738-434,3.0,0.0
1,304,831559-011,8.0,0.318182
2,304,913011-010,4.0,0.0
3,304,939138-010,6.0,0.0
4,304,943091-010,3.0,0.954545


### 4.3. Merge Everything together

In [None]:
#merge target vs group store
store=pd.merge(
    target[['STORE_CODE','STORE_RANK']],
    group_store[['STORE_CODE','GROUP']],
    on='STORE_CODE',
    how='inner')

#merge store vs tbl
tbl1=pd.merge(
    tbl,
    store,
    on='STORE_CODE',
    how='left')

tbl1.head()

Unnamed: 0,STORE_CODE,SKU,STOCK_QTY,WKLY_SOLD,STORE_RANK,GROUP
0,304,343738-434,3.0,0.0,25.0,G05
1,304,831559-011,8.0,0.318182,25.0,G05
2,304,913011-010,4.0,0.0,25.0,G05
3,304,939138-010,6.0,0.0,25.0,G05
4,304,943091-010,3.0,0.954545,25.0,G05


In [None]:
tbl1=tbl1[['SKU','GROUP','STORE_CODE','WKLY_SOLD','STORE_RANK','STOCK_QTY']].sort_values(
    by=['SKU','GROUP','WKLY_SOLD','STORE_RANK'],
    ascending = [True, True, False, True])
tbl1=tbl1.reset_index(drop=True)
tbl1.head()

Unnamed: 0,SKU,GROUP,STORE_CODE,WKLY_SOLD,STORE_RANK,STOCK_QTY
0,311046-162,G04,351,5.833333,1.0,12.0
1,311046-162,G05,365,6.416667,2.0,7.0
2,311046-162,G06,311,2.333333,3.0,16.0
3,315115-112,G05,346,0.0,5.0,1.0
4,315115-170,G08,324,0.0,4.0,2.0


# PROCESSING CONSOLIDATION

In [19]:
fnl=stock
fnl['TRANSFER_TO']=np.nan
fnl.head()

Unnamed: 0,STORE_CODE,UPC,SKU,SIZE,STOCK_QTY,TRANSFER_TO
0,352,884776237347,AV4789-101,11,1.0,
1,330,884776237347,AV4789-101,11,1.0,
2,353,194502688207,CV2474-010,XS,1.0,
3,374,194502688207,CV2474-010,XS,1.0,
4,343,194502688207,CV2474-010,XS,1.0,


In [20]:
def check(store_in,store_out):
    combine=pd.concat([store_in, store_out], axis=0)
    combine=combine.groupby(['SIZE'],as_index=False)['STOCK_QTY'].sum()
    combine=combine[combine.STOCK_QTY>0]
    ave= combine.STOCK_QTY.mean()
    maxx= combine.STOCK_QTY.max()
    test=False
    if (ave<=3) & (maxx<=6):
        test=True
    return test

In [21]:
def transfer(tbl, sku, store_code_in, store_code_out):
    
    #filter tbl by store_code and sku
    store_in=tbl[(tbl.STORE_CODE==store_code_in) & (tbl.SKU==sku)]
    store_out=tbl[(tbl.STORE_CODE==store_code_out) & (tbl.SKU==sku)]
    
    #combine store_in and store_out
    combine=pd.concat([store_in, store_out], axis=0)
    combine=combine.groupby(['UPC','SIZE'],as_index=False)['STOCK_QTY'].sum()
    
    #assign combined table by store_in info
    combine['STORE_CODE']=store_code_in
    combine['SKU']=sku
    combine=combine[['STORE_CODE','UPC','SKU','SIZE','STOCK_QTY']]
    
    #take out store_in
    tbl=tbl[(tbl.STORE_CODE!=store_code_in) | (tbl.SKU!=sku)]
    
    #assign direction to store_out (Transfer_to)
    tbl.loc[(tbl.STORE_CODE==store_code_out) & (tbl.SKU==sku), 'TRANSFER_TO'] = store_code_in
    
    #assign store_out's stock to 0
    tbl.loc[(tbl.STORE_CODE==store_code_out) & (tbl.SKU==sku), 'STOCK_QTY'] = 0
    
    #concat combined table into original table
    tbl=pd.concat([tbl,combine], axis=0)
    
    return tbl

In [22]:
sk=tbl1.SKU.unique()
for i in sk:
    gr=tbl1[tbl1.SKU==i].GROUP.unique()
    for j in gr:
        st=tbl1[(tbl1.SKU==i) & (tbl1.GROUP==j)].STORE_CODE
        for k in st[:-1]:
            d=st.tolist().index(k)
            for l in st[-1:d:-1]:
                store_in=fnl[(fnl.STORE_CODE==k) & (fnl.SKU==i)]
                store_out=fnl[(fnl.STORE_CODE==l) & (fnl.SKU==i)]
                if check(store_in,store_out)==True:
                    fnl=transfer(fnl,i,k,l)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tbl.loc[(tbl.STORE_CODE==store_code_out) & (tbl.SKU==sku), 'TRANSFER_TO'] = store_code_in


In [18]:
for i, j in tbl1.groupby('SKU'):
    for m, n in j.groupby('GROUP'):
        for k in n['STORE_CODE'][:-1]:
            d=n['STORE_CODE'].tolist().index(k)
            for l in n['STORE_CODE'][-1:d:-1]:
                store_in=fnl[(fnl.STORE_CODE==k) & (fnl.SKU==i)]
                store_out=fnl[(fnl.STORE_CODE==l) & (fnl.SKU==i)]
                if check(store_in,store_out)==True:
                    fnl=transfer(fnl,i,k,l)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tbl.loc[(tbl.STORE_CODE==store_code_out) & (tbl.SKU==sku), 'TRANSFER_TO'] = store_code_in


In [19]:
print("--- %s seconds ---" % (time.time() - start_time))

--- 2932.5664949417114 seconds ---


In [30]:
tmp=stock[['SKU','SIZE','UPC']].drop_duplicates(subset = ['SKU','SIZE','UPC'])
tmp

Unnamed: 0,SKU,SIZE,UPC
0,AV4789-101,7.5,884776237279
1,AV4789-101,11,884776237347
3,CV2474-010,XS,194502688207
22,CV2474-010,S,194502688214
51,618919-032,8,193145292215
...,...,...,...
99884,DJ9946-101,7,195868998863
99899,DJ9946-101,7.5,195868998870
99908,DJ9946-101,8,195868998887
99917,CT3886-602,11,196154519939
