#Import Statements

In [0]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.set_option('display.max_columns', None)

#Functions

In [0]:
# Function to handle division and replace with 0 in case of errors
def safe_division(x, y):
    try:
        result = x / y
    except ZeroDivisionError:
        result = 0
    except FloatingPointError:
        result = 0
    return result

def plot_bar(dataframe,x,y):
    xdd=pd.DataFrame(dataframe.groupby(x).sum()[y]).reset_index()
    # print(xdd)
    fig=px.bar(data_frame=xdd.sort_values(by=y, ascending=False),x=x,y=y)
    return fig

#User Inputs

In [0]:
category_name = "ICE CREAM & DESSERTS" # Mandatory field
material_group_name = "ICE CREAM IMPULSE" # Leave it blank if you want the recommendations on category level
region = "ABU DHABI"  # ABU DHABI, AL AIN, DUBAI, SHARJAH

catg_lower = material_group_name.lower() # 'material_group_name' OR 'category_name'

analysis_start_date = "2023-10-01"
analysis_end_date = "2024-09-28"

In [0]:
catg_lower = catg_lower.replace(' ', '_')
directory = f'/dbfs/FileStore/shared_uploads/prem@loyalytics.in/assortment_optimization/{catg_lower}/'

if region == "ABU DHABI":
    region_abr = "auh"
elif region == "AL AIN":
    region_abr = "aln"
elif region == "DUBAI":
    region_abr = "dxb"
else:
    region_abr = "shj"

if material_group_name == "":
    material_group_condition = ""
else:
    material_group_condition = "AND material_group_name = '" + material_group_name + "'"

In [0]:
cust = spark.sql(f"""SELECT *
                 FROM dev.sandbox.pj_ao_customer_data
                 WHERE category_name = '{category_name}'
                 {material_group_condition}
                 AND region_name = '{region}'""").toPandas().sort_values(by = 'material_id').reset_index(drop = True)

gp = spark.sql(f"""SELECT *
                 FROM dev.sandbox.pj_ao_gp_12months
                 WHERE category_name = '{category_name}'
                 {material_group_condition}
                 AND region_name = '{region}'""").toPandas().sort_values(by = 'material_id').reset_index(drop = True)

gp_3m = spark.sql(f"""SELECT *
                  FROM dev.sandbox.pj_ao_gp_3months
                  WHERE category_name = '{category_name}'
                  {material_group_condition}
                  AND region_name = '{region}'""").toPandas().sort_values(by = 'material_id').reset_index(drop = True)

df = spark.sql(f"""SELECT *
               FROM dev.sandbox.pj_ao_weekly_data
               WHERE category_name = '{category_name}'
               {material_group_condition}
               AND region_name = '{region}'""").toPandas().sort_values(by = ['week_number', 'material_id']).reset_index(drop = True)

attr = spark.sql(f"""SELECT *
                 FROM dev.sandbox.pj_ao_attributes
                 WHERE category_name = '{category_name}'
                 {material_group_condition}""").toPandas().sort_values(by = 'material_id').reset_index(drop = True)

attr['volume'] = pd.to_numeric(attr['volume'], errors='coerce')
# mean_value = attr['volume'].mean()
# attr['volume'].fillna(mean_value, inplace=True)
# attr['units'] = attr['units'].replace("Not Available", "G")
attr['new_volume'] = np.where(attr.units == 'L', attr.volume*1000, np.where(attr.units == 'KG', attr.volume*1, attr.volume))
attr['new_volume'] = attr['new_volume'].astype('object')
attr['new_volume'] = attr['new_volume'].astype(str) + 'ML'

rationalized_flag = 0 # 1 = SKUs are rationalized already, 0 = SKUs are not rationalized yet
if rationalized_flag != 0:
    rationalized_df = pd.read_csv(f"{directory}{catg_lower}_rationalized_" + region_abr + ".csv")

In [0]:
date_obj = datetime.strptime(analysis_start_date, "%Y-%m-%d")
lfl_start_date = date_obj - timedelta(days=364)
lfl_end_date = date_obj - timedelta(days=1)
lfl_start_date = lfl_start_date.strftime("%Y-%m-%d")
lfl_end_date = lfl_end_date.strftime("%Y-%m-%d")

#Data Prep

In [0]:
if rationalized_flag != 0:
    rationalized_materials = rationalized_df[~rationalized_df['delisted_date'].isna()]['material_id'].values
    rationalized_materials_sql = str(set(rationalized_materials))
    rationalized_materials_sql = '(' + rationalized_materials_sql[1:-1] + ')'

In [0]:
sale = df[['material_id', 'week_number', 'sale']]
vol = df[['material_id', 'week_number', 'vol']]
cwd = df[['material_id', 'week_number', 'cwd']]
store = df[['material_id', 'week_number', 'material_weekly_store_count']]
cust['vip_freq_pnt'] = (cust['vip_cust'] + cust['freq_cust']) / (cust['tot_vip'] + cust['tot_freq'])
gp['gp_abs']=np.abs(gp['GP'])

df['SSW'] = df.apply(lambda row: safe_division(row['sale'], row['material_weekly_store_count']), axis=1)

In [0]:
gp['new_buckets'] = gp['new_buckets'].str.replace('Maintain','SUPPORT_MORE_DIST') # high beta
gp['new_buckets'] = gp['new_buckets'].str.replace('Delist','DELIST')
gp['new_buckets'] = gp['new_buckets'].str.replace('Grow','MAINTAIN_OR_GROW_BY_PROMO') # low beta
gp['new_buckets'] = gp['new_buckets'].str.replace('Observe','OBSERVE')

df = pd.merge(df, gp[['material_id', 'new_buckets']], on='material_id', how='inner')
fdf=df.copy()
fdf.head()

In [0]:
############# FULL  dataframe ##############

agg_table_full = pd.DataFrame(
    fdf.groupby("material_id").agg(
        {
            "sale": "sum",
            "vol": "sum",
            "material_weekly_store_count": "mean",
            "cwd": "mean",'new_buckets':'max'
        }
    )
).reset_index()

agg_table_full.rename(columns={'material_weekly_store_count':'AVG_STORE_SELLING'},inplace=True)
agg_table_full.rename(columns={'cwd':'AVG_CWD'},inplace=True)
agg_table_full.columns = [x.lower() for x in agg_table_full.columns]

############# L3M dataframe ##############

agg_table_l3m = pd.DataFrame(
    fdf[fdf['week_number']>40].groupby("material_id").agg(
        {
            "sale": "sum",
            "vol": "sum",
            "material_weekly_store_count": "mean",
            "cwd": "mean",'new_buckets':'max'
        }
    )
).reset_index()

agg_table_l3m.rename(columns={'material_weekly_store_count':'AVG_STORE_SELLING'},inplace=True)
agg_table_l3m.rename(columns={'cwd':'AVG_CWD'},inplace=True)
agg_table_l3m.columns = [x.lower() for x in agg_table_l3m.columns]

In [0]:
agg_table_full.head(2)

In [0]:
############ merging GP data ########################

eda_agg_table_full=pd.merge(agg_table_full,attr,on='material_id',how='inner')
eda_agg_table_full=pd.merge(eda_agg_table_full,gp.drop(columns=['new_buckets'],axis=1),on='material_id',how='left')
eda_agg_table_full=pd.merge(eda_agg_table_full,cust.drop(columns=['material_name'],axis=1),on='material_id',how='left')
eda_agg_table_full['gp_flag']=np.where(eda_agg_table_full.GP<0,"Negative","Positive")

############ merging GP data L3m ########################

eda_agg_table_l3m=pd.merge(agg_table_l3m,attr,on='material_id',how='inner')
eda_agg_table_l3m=pd.merge(eda_agg_table_l3m,gp.drop(columns=['new_buckets'],axis=1),on='material_id',how='left') ### GP data is still full here
eda_agg_table_l3m=pd.merge(eda_agg_table_l3m,cust.drop(columns=['material_name'],axis=1),on='material_id',how='left')

eda_agg_table_l3m['gp_flag']=np.where(eda_agg_table_l3m.GP<0,"Negative","Positive")

In [0]:
eda_agg_table_full.head(2)

In [0]:
eda_agg_table_full.display()

#Delist Reco Metrics (Deck)

In [0]:
drm = pd.merge(df[['material_id', 'week_number', 'sale', 'vol']],
               gp[['material_id', 'new_buckets']],
               on='material_id', how='inner')

# Add PL flag to the materials
drm = pd.merge(drm, attr[['material_id', 'material_name', 'brand']], on='material_id', how='left')
def check_pl_flag(text):
    words = text.split()
    if 'IMP' in words or 'IMPORT' in words or text == 'LULU PRIVATE LABEL':
        return 1
    return 0
drm['pl_flag'] = drm['brand'].apply(check_pl_flag)

# Merge GP Data into the dataframe
drm = pd.merge(drm, gp_3m[['material_id', 'gp_abs_Q4', 'gp_contri_Q4']],
                  on = 'material_id', how = 'inner')

# Calculate Q4 sales and vol
sale_12weeks = drm[drm['week_number'] >= 40].groupby('material_id')['sale'].sum().reset_index()
total_quantity_sold_12weeks = drm[drm['week_number'] >= 40].groupby('material_id')['vol'].sum().reset_index()
drm = pd.merge(drm, sale_12weeks, on='material_id', how='left', suffixes=('', '2'))
drm = pd.merge(drm, total_quantity_sold_12weeks, on='material_id', how='left', suffixes=('', '2'))
drm['sale_Q4'] = drm['sale2'].fillna(0)
drm['vol_Q4'] = drm['vol2'].fillna(0)
drm = drm.drop(columns=['sale2', 'vol2'])

# Calculate weighted contribution of sales and vol of Q4
sale_12weeks_sum = drm.groupby('material_id')['sale_Q4'].head(1).sum()
total_quantity_sold_12weeks_sum = drm.groupby('material_id')['vol_Q4'].head(1).sum()
drm['sales_contri_Q4'] = (drm['sale_Q4'] / sale_12weeks_sum)
drm['quantity_contri_Q4'] = (drm['vol_Q4'] / total_quantity_sold_12weeks_sum)
drm['sale_vol_contri_Q4'] = (drm['sales_contri_Q4']*50/100 + drm['quantity_contri_Q4']*50/100)

# Calculate the Q3 sales and vol
sale_Q3 = drm[(drm['week_number'] >= 27) & (drm['week_number'] <= 40)].groupby('material_id')['sale'].sum().reset_index()
total_quantity_sold_Q3 = drm[(drm['week_number'] >= 27) & (drm['week_number'] <= 40)].groupby('material_id')['vol'].sum().reset_index()
drm = pd.merge(drm, sale_Q3, on='material_id', how='left', suffixes=('', '2'))
drm = pd.merge(drm, total_quantity_sold_Q3, on='material_id', how='left', suffixes=('', '2'))
drm['sale_Q3'] = drm['sale2'].fillna(0)
drm['vol_Q3'] = drm['vol2'].fillna(0)
drm = drm.drop(columns=['sale2', 'vol2'])

# Calculate weighted contribution of sales and vol of Q3
sale_12weeks_sum = drm.groupby('material_id')['sale_Q3'].head(1).sum()
total_quantity_sold_12weeks_sum = drm.groupby('material_id')['vol_Q3'].head(1).sum()
drm['sales_contri_Q3'] = (drm['sale_Q3'] / sale_12weeks_sum)
drm['quantity_contri_Q3'] = (drm['vol_Q3'] / total_quantity_sold_12weeks_sum)
drm['sale_vol_contri_Q3'] = (drm['sales_contri_Q3']*50/100 + drm['quantity_contri_Q3']*50/100)

# Calculate growth from Q4 sales vs Q3 sales
drm['sales_growth_Q4_vs_Q3'] = (drm['sale_Q4'] - drm['sale_Q3'])/drm['sale_Q3']
drm['sales_growth_Q4_vs_Q3'] = drm['sales_growth_Q4_vs_Q3'].replace(float('inf'), 0)
drm['sales_growth_Q4_vs_Q3'] = drm['sales_growth_Q4_vs_Q3'].fillna(0)

# Calculate growth from Q4 sales contribution vs Q3 sales contribution
drm['sales_contri_growth_Q4_vs_Q3'] = (drm['sales_contri_Q4'] - drm['sales_contri_Q3']) / drm['sales_contri_Q3']
drm['sales_contri_growth_Q4_vs_Q3'] = drm['sales_contri_growth_Q4_vs_Q3'].replace(float('inf'), 0)
drm['sales_contri_growth_Q4_vs_Q3'] = drm['sales_contri_growth_Q4_vs_Q3'].fillna(0)

# Calculate growth from Q4 sales & vol contribution vs Q3 sales & vol contribution
drm['sales_vol_contri_growth_Q4_vs_Q3'] = (drm['sale_vol_contri_Q4'] - drm['sale_vol_contri_Q3']) / drm['sale_vol_contri_Q3']
drm['sales_vol_contri_growth_Q4_vs_Q3'] = drm['sales_vol_contri_growth_Q4_vs_Q3'].replace(float('inf'), 0)
drm['sales_vol_contri_growth_Q4_vs_Q3'] = drm['sales_vol_contri_growth_Q4_vs_Q3'].fillna(0)

# Calculate weekly sales growth
temp = drm[drm['week_number'] >= 40].reset_index(drop = True)
sales_growth = []
for material in temp['material_id'].unique():
    for week in temp[temp['material_id'] == material]['week_number'].unique():
        if week == temp[temp['material_id'] == material]['week_number'].min():
            sales_growth.append(0)
            previous_week = week
        else:
            previous_sales = temp[(temp['material_id'] == material) & (temp['week_number'] == previous_week)]['sale'].iloc[0]
            current_sales = temp[(temp['material_id'] == material) & (temp['week_number'] == week)]['sale'].iloc[0]
            sales_change = (current_sales - previous_sales)/previous_sales
            sales_growth.append(sales_change)
            previous_week = week
temp['sales_weekly_growth'] = sales_growth

# Calculate the average sales weekly growth
avg_weekly_sales_growth = []
for material in temp['material_id'].unique():
    mean = temp[temp['material_id'] == material]['sales_weekly_growth'].iloc[1:].mean()
    if np.isnan(mean):
        mean = 0.0
    avg_weekly_sales_growth.append(mean)
avg_weekly_sales_growth_df = pd.DataFrame({'material_id': temp['material_id'].unique(),
                               'avg_weekly_sales_growth_Q4': avg_weekly_sales_growth})
drm = pd.merge(drm, avg_weekly_sales_growth_df, on='material_id', how='left')
drm['avg_weekly_sales_growth_Q4'] = drm['avg_weekly_sales_growth_Q4'].fillna(0)

# Remove the weekly numbers
drm = drm.drop(columns=['week_number', 'sale', 'vol']).drop_duplicates().reset_index(drop=True)

# Assign ranks to GP, sales, and vol of Q4
drm['gp_rank_Q4'] = drm['gp_abs_Q4'].rank(ascending=False)
drm['sales_rank_Q4'] = drm['sale_Q4'].rank(ascending=False)
drm['vol_rank_Q4'] = drm['vol_Q4'].rank(ascending=False)

# Calculate contribution index for sales, and also for sales & vol combined
drm['sales_contri_index_Q4'] = drm['sales_contri_Q4'] / drm['sales_contri_Q4'].mean()
drm['sales_vol_contri_index_Q4'] = drm['sale_vol_contri_Q4'] / drm['sale_vol_contri_Q4'].mean()

# Add the already delisted materials as a flag
if rationalized_flag != 0:
    drm['delist_complete'] = drm['material_id'].isin(rationalized_materials).astype(int)

# Calculate the Q4 sales % coming from all Delist recommended SKUs
delist_sales_contri_Q4 = drm[drm['new_buckets'] == 'DELIST']['sales_contri_Q4'].sum()
delist_gp_contri_Q4 = drm[drm['new_buckets'] == 'DELIST']['gp_contri_Q4'].sum()
print(f"Delist SKUs ({len(drm[drm['new_buckets'] == 'DELIST'])}) contribute to {round(delist_sales_contri_Q4*100, 2)}% sales and {round(delist_gp_contri_Q4*100, 2)}% GP")

In [0]:
drm.display()

#Delist Priority

In [0]:
delist_priority = drm[drm['new_buckets'] == "DELIST"][['material_id', 'material_name', 'brand', 'sales_contri_Q4', 'sales_growth_Q4_vs_Q3']].reset_index(drop = True)

delist_priority['wtd_avg'] = (delist_priority['sales_contri_Q4'] + delist_priority['sales_growth_Q4_vs_Q3'])/2

df_wtd_avg_0 = delist_priority[delist_priority['wtd_avg'] == 0].sort_values(by=['sales_contri_Q4', 'sales_growth_Q4_vs_Q3']).reset_index(drop=True)
df_wtd_avg_0['Delist Priority'] = range(1, len(df_wtd_avg_0) + 1)

start_priority = len(df_wtd_avg_0) + 1
df_sales_0_growth_neg1 = delist_priority[(delist_priority['sales_contri_Q4'] == 0) & (delist_priority['sales_growth_Q4_vs_Q3'] == -1)].sort_values(by=['sales_contri_Q4', 'sales_growth_Q4_vs_Q3']).reset_index(drop=True)
df_sales_0_growth_neg1['Delist Priority'] = range(start_priority, start_priority + len(df_sales_0_growth_neg1))

start_priority = len(df_wtd_avg_0) + len(df_sales_0_growth_neg1) + 1
df_sales_non0_growth_0 = delist_priority[(delist_priority['sales_contri_Q4'] != 0) & (delist_priority['sales_growth_Q4_vs_Q3'] == 0)].sort_values(by=['sales_contri_Q4', 'sales_growth_Q4_vs_Q3']).reset_index(drop=True)
df_sales_non0_growth_0['Delist Priority'] = range(start_priority, start_priority + len(df_sales_non0_growth_0))

dataframes = [df_wtd_avg_0, df_sales_0_growth_neg1, df_sales_non0_growth_0]
materials_to_remove = []
for temp in dataframes:
    materials_to_remove.extend(temp['material_id'].tolist())

remaining_df = delist_priority[~delist_priority['material_id'].isin(materials_to_remove)]
start_priority = len(df_wtd_avg_0) + len(df_sales_0_growth_neg1) + len(df_sales_non0_growth_0) + 1
remaining_df = remaining_df.sort_values(by=['wtd_avg']).reset_index(drop=True)
remaining_df['Delist Priority'] = range(start_priority, start_priority + len(remaining_df))

delist_priority = pd.concat([df_wtd_avg_0, df_sales_0_growth_neg1, df_sales_non0_growth_0, remaining_df], ignore_index=True)
delist_priority = delist_priority.sort_values(by='Delist Priority').reset_index(drop=True)
delist_priority = delist_priority[['material_id', 'material_name', 'brand', 'Delist Priority', 'sales_contri_Q4', 'sales_growth_Q4_vs_Q3', 'wtd_avg']]

In [0]:
delist_priority.display()

#Brand & Attribute Check

In [0]:
attr_brand_df = drm[['material_id', 'new_buckets', 'brand', 'sale_Q4']]
attr_brand_df = pd.merge(attr_brand_df, attr[['material_id', 'type_bin']], on = 'material_id', how = 'inner')
attr_brand_df.rename(columns={'brand':'Brand', 'type_bin':'Attribute'}, inplace=True)

brand_check_df = attr_brand_df.pivot_table(index='Brand', columns='new_buckets',
                                     values='material_id', aggfunc='count',
                                     fill_value=0).reset_index()
brand_check_df.columns.name = None

brand_check_df['Delist %'] = brand_check_df['DELIST'] / (brand_check_df['DELIST'] + brand_check_df['MAINTAIN_OR_GROW_BY_PROMO'] + brand_check_df['OBSERVE'] + brand_check_df['SUPPORT_MORE_DIST'])

brand_share_df = attr_brand_df.groupby('Brand')['sale_Q4'].sum().reset_index()
overall_brand_sales = brand_share_df['sale_Q4'].sum()
brand_share_df['sale_Q4'] = brand_share_df['sale_Q4'] / overall_brand_sales
brand_share_df.rename(columns={'sale_Q4':'Brand Share %'}, inplace=True)

brand_check_df = pd.merge(brand_check_df, brand_share_df, on = 'Brand', how = 'inner')
brand_check_df = brand_check_df.sort_values(by = 'Brand Share %', ascending = False).reset_index(drop = True)
brand_check_df.display()

In [0]:
attr_check_df = attr_brand_df.pivot_table(index='Attribute', columns='new_buckets',
                                     values='material_id', aggfunc='count',
                                     fill_value=0).reset_index()
attr_check_df.columns.name = None

attr_check_df['Grand Total'] = attr_check_df['DELIST'] + attr_check_df['MAINTAIN_OR_GROW_BY_PROMO'] + attr_check_df['OBSERVE'] + attr_check_df['SUPPORT_MORE_DIST']

sums = attr_check_df[['DELIST', 'MAINTAIN_OR_GROW_BY_PROMO', 'OBSERVE', 'SUPPORT_MORE_DIST', 'Grand Total']].sum()

new_row = pd.DataFrame([['Grand Total', sums['DELIST'],
                         sums['MAINTAIN_OR_GROW_BY_PROMO'], sums['OBSERVE'],
                         sums['SUPPORT_MORE_DIST'], sums['Grand Total']]],
                       columns=['Attribute', 'DELIST', 'MAINTAIN_OR_GROW_BY_PROMO',
                                'OBSERVE', 'SUPPORT_MORE_DIST', 'Grand Total'])

attr_check_df = pd.concat([attr_check_df, new_row], ignore_index=True)

attr_check_df['Delist %'] = attr_check_df['DELIST'] / (attr_check_df['DELIST'] + attr_check_df['MAINTAIN_OR_GROW_BY_PROMO'] + attr_check_df['OBSERVE'] + attr_check_df['SUPPORT_MORE_DIST'])

attr_share_df = attr_brand_df.groupby('Attribute')['sale_Q4'].sum().reset_index()
overall_brand_sales = attr_share_df['sale_Q4'].sum()
attr_share_df['sale_Q4'] = attr_share_df['sale_Q4'] / overall_brand_sales
attr_share_df.rename(columns={'sale_Q4':'Sales Contri %'}, inplace=True)

attr_check_df = pd.merge(attr_check_df, attr_share_df, on = 'Attribute', how = 'left')
attr_check_df['Sales Contri %'] = attr_check_df['Sales Contri %'].fillna(1)
attr_check_df = attr_check_df.sort_values(by = 'Sales Contri %', ascending = False).reset_index(drop = True)
attr_check_df = pd.concat([attr_check_df, attr_check_df.iloc[[0]]], ignore_index=True)
attr_check_df = attr_check_df.drop(attr_check_df.index[0]).reset_index(drop=True)
attr_check_df.display()

#Save Data For Dashboard

In [0]:
query = f"""
SELECT *
FROM dev.sandbox.pj_ao_dashboard_delist_reco_skus_analysis
WHERE category_name = '{category_name}'
AND material_group_name = '{material_group_name}'
AND region_name = '{region}'
"""

data_empty = spark.sql(query).toPandas().empty

if not data_empty:
    query = f"""
    DELETE
    FROM dev.sandbox.pj_ao_dashboard_delist_reco_skus_analysis
    WHERE
        category_name = '{category_name}'
        {material_group_condition}
        AND region_name = '{region}'
    """

    spark.sql(query)

In [0]:
query = f"""
SELECT department_name
FROM gold.material.material_master
WHERE category_name = '{category_name}'
AND material_group_name = '{material_group_name}'
GROUP BY 1
"""

department_name = spark.sql(query).toPandas().values[0, 0]

In [0]:
dashboard_df = drm[['material_id', 'material_name', 'brand', 'sales_contri_Q4', 'gp_contri_Q4']].copy()
dashboard_df['region_name'] = region
dashboard_df['material_group_name'] = material_group_name
dashboard_df['category_name'] = category_name
dashboard_df['department_name'] = department_name

dashboard_df = pd.merge(dashboard_df, delist_priority[['material_id', 'Delist Priority']], on='material_id', how='left')
dashboard_df['Delist Priority'] = dashboard_df['Delist Priority'].fillna(0)
dashboard_df = pd.merge(dashboard_df, attr[['material_id', 'type']], on='material_id', how='left')

temp = brand_check_df[brand_check_df['Delist %'] == 1][['Brand']]
temp['Brand_Delist_Flag'] = 1
dashboard_df = pd.merge(dashboard_df, temp, left_on='brand', right_on='Brand', how='left')
dashboard_df['Brand_Delist_Flag'] = dashboard_df['Brand_Delist_Flag'].fillna(0)

temp = drm[['material_id', 'new_buckets']]
temp = pd.merge(temp, attr[['material_id', 'type']], on = 'material_id', how = 'inner')
temp.rename(columns={'type':'Attribute'}, inplace=True)
temp = temp.pivot_table(index='Attribute', columns='new_buckets',
                                    values='material_id', aggfunc='count',
                                    fill_value=0).reset_index()
temp.columns.name = None
temp['Delist %'] = temp['DELIST'] / (temp['DELIST'] + temp['MAINTAIN_OR_GROW_BY_PROMO'] + temp['OBSERVE'] + temp['SUPPORT_MORE_DIST'])
temp = temp.sort_values(by = 'Delist %', ascending = False).reset_index(drop = True)

temp = temp[temp['Delist %'] == 1][['Attribute']]
temp['Type_Delist_Flag'] = 1
dashboard_df = pd.merge(dashboard_df, temp, left_on='type', right_on='Attribute', how='left')
dashboard_df['Type_Delist_Flag'] = dashboard_df['Type_Delist_Flag'].fillna(0)

dashboard_df = dashboard_df.drop(columns = ['Brand', 'Attribute'])
dashboard_df.rename(columns={'Delist Priority':'Delist_Priority'}, inplace=True)

In [0]:
spark_df = spark.createDataFrame(dashboard_df)
spark_df.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_ao_dashboard_delist_reco_skus_analysis")

#EDA

##YoY Sales

In [0]:
query = f"""
WITH cte AS (
    SELECT
        (CASE WHEN business_day <= '{lfl_end_date}' THEN "LFL" ELSE "Current" END) AS year_info,
        material_id,
        ROUND(SUM(sales)) AS sales
    FROM dev.sandbox.pj_ao_framework_data
    WHERE
        business_day >= '{lfl_start_date}'
        AND region_name = '{region}'
        AND category_name = '{category_name}'
        {material_group_condition}
    GROUP BY year_info, material_id
)

SELECT
    material_id,
    MAX(CASE WHEN year_info = "LFL" THEN sales ELSE 0 END) AS LFL_sales,
    MAX(CASE WHEN year_info = "Current" THEN sales ELSE 0 END) AS Current_sales,
    (Current_sales - LFL_sales)/LFL_sales AS growth
FROM cte
GROUP BY material_id
ORDER BY material_id
"""

e = spark.sql(query).toPandas()
e['growth'] = e['growth'].fillna(0)
e['growth'] = e['growth'].replace(float('inf'), 0)

In [0]:
cy_sales = e['Current_sales'].sum()
py_sales = e['LFL_sales'].sum()
yoy_df = pd.DataFrame({'year': ['PY', 'CY'], 'sales': [py_sales, cy_sales]})

growth = round((cy_sales - py_sales)/py_sales*100,1)
growth = str(growth)

fig = px.bar(yoy_df, x='year', y='sales', title=f'Year Over Year Growth ({growth}%)')
fig.show()

##TLE and SSW

In [0]:
# pldf=fdf[fdf['material_id']==1545241]
# fig = px.scatter(pldf, x="SSW", y="cwd")
# fig.add_hline(y=pldf.cwd.mean())
# fig.add_vline(x=pldf.SSW.mean())
# fig.show()

In [0]:
fdf['sale']=fdf['sale'].astype('float')
fdf['vol']=fdf['vol'].astype('float')

In [0]:
gp_bub=fdf.groupby('material_id').sum()[['sale','vol']].reset_index()

temp=pd.merge(gp_bub,gp,on='material_id',how='inner')
temp = pd.merge(temp, attr[['material_id', 'material_name','brand']], on='material_id', how='inner')
temp['GP_ABS']=np.abs(temp['GP'])
temp['GP_FLAG']=np.where(temp.GP<0,"Negative","Positive")

In [0]:
temp.head(2)

In [0]:
xd=pd.DataFrame(temp.groupby('brand').sum()['sale']).reset_index()

##Sale, vol and other Bar Charts

In [0]:
eda_agg_table_full.head(1)

In [0]:
plot_bar(eda_agg_table_full,'brand','sale')

##SKU Level

###Bubble - GP Flag

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])

x_val='sale'
y_val='vip_freq_pnt'
color_val='gp_flag'
hover_name_val='material_name'
size_max_val=40
size_val='gp_abs'

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="ALL SKUs", color_discrete_map={"Positive": "#8993f8", "Negative": "#ec8272"})
fig.add_hline(y=pbldf[y_val].mean())
fig.add_vline(x=pbldf[x_val].quantile(0.95))
fig.show()

In [0]:
if rationalized_flag != 0:
    fig = px.scatter(pbldf[pbldf['material_id'].isin(rationalized_materials)], x=x_val, y=y_val, size=np.abs(pbldf[pbldf['material_id'].isin(rationalized_materials)][size_val]),
                    color=color_val,
                    hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="Rationalized SKUs", color_discrete_map={"Positive": "#8993f8", "Negative": "#ec8272"})
    fig.add_hline(y=pbldf[y_val].mean())
    fig.add_vline(x=pbldf[x_val].quantile(0.95))
    fig.show()

In [0]:
pbldf["Quadrant"] = np.where(
    (pbldf["sale"] >= pbldf["sale"].quantile(0.95))
    & (pbldf["vip_freq_pnt"] >= pbldf["vip_freq_pnt"].mean()), 1,
    np.where(
        (pbldf["sale"] < pbldf["sale"].quantile(0.95))
        & (pbldf["vip_freq_pnt"] < pbldf["vip_freq_pnt"].mean()), 3,
    np.where(
        (pbldf["sale"] >= pbldf["sale"].quantile(0.95))
        & (pbldf["vip_freq_pnt"] < pbldf["vip_freq_pnt"].mean()), 2,
    np.where(
        (pbldf["sale"] < pbldf["sale"].quantile(0.95))
        & (pbldf["vip_freq_pnt"] >= pbldf["vip_freq_pnt"].mean()), 4,0
    )
)))

pbldf['pl_flag'] = np.where(pbldf['brand'] == "LULU PRIVATE LABEL", 1, 0)

###Bubble - Growth Flag

In [0]:
pbldf = pd.merge(pbldf, e[['material_id', 'growth']], on='material_id', how='inner')

growth_positives = pbldf[pbldf['growth'] >= 0][['growth']].values
medium = np.percentile(growth_positives, 33)
high = np.percentile(growth_positives, 66)

pbldf["Growth_Flag"] = np.where(
    (pbldf["growth"] < 0), "Negative",
    np.where((pbldf["growth"] >= high), f"High >{round(high*100,2)}%",
    np.where((pbldf["growth"] >= medium), f"Medium >{round(medium*100,2)}%",
             f"Low <{round(medium*100,2)}%")))

In [0]:
fig = px.scatter(pbldf, x="sale", y="vip_freq_pnt", size="gp_abs", 
                 color="Growth_Flag",
                 hover_name="material_id", log_x=False, size_max=40, title="ALL SKUs", color_discrete_map={f"High >{round(high*100,2)}%": "#00CC96", f"Medium >{round(medium*100,2)}%": "#636EFA", f"Low <{round(medium*100,2)}%": "#AB63FA", "Negative": "#EF553B"})

fig.add_hline(y=pbldf.vip_freq_pnt.mean())
fig.add_vline(x=pbldf.sale.quantile(0.95))
fig.show()

In [0]:
if rationalized_flag != 0:
    fig = px.scatter(pbldf[pbldf['material_id'].isin(rationalized_materials)], x="sale", y="vip_freq_pnt", size="gp_abs", 
                    color="Growth_Flag",
                    hover_name="material_id", log_x=False, size_max=40, title="Rationalized SKUs", color_discrete_map={f"High >{round(high*100,2)}%": "#00CC96", f"Medium >{round(medium*100,2)}%": "#636EFA", f"Low <{round(medium*100,2)}%": "#AB63FA", "Negative": "#EF553B"})

    fig.add_hline(y=pbldf.vip_freq_pnt.mean())
    fig.add_vline(x=pbldf.sale.quantile(0.95))
    fig.show()

In [0]:
quadrants_df = pbldf.groupby(['Quadrant']).agg({'sale':'sum', 'tot_cust_perc':'sum', 'gp_abs':'sum', 'vip_freq_pnt':'mean', 'material_id':'count', 'pl_flag':'sum', 'growth':'mean'}).reset_index()

quadrants_df['sales_share'] = quadrants_df['sale'] / quadrants_df['sale'].sum()
quadrants_df['gp_share'] = quadrants_df['gp_abs'] / quadrants_df['gp_abs'].sum()

quadrants_df.rename(columns={'material_id':'materials'},inplace=True)
quadrants_df.rename(columns={'pl_flag':'pl_materials'},inplace=True)
quadrants_df.rename(columns={'vip_freq_pnt':'avg_vip_freq_pnt'},inplace=True)
quadrants_df.rename(columns={'growth':'avg_growth'},inplace=True)

quadrants_df[['Quadrant', 'sales_share', 'avg_vip_freq_pnt', 'gp_share', 'materials', 'pl_materials', 'avg_growth']]

###Bubble - Reco Flags

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])
# pbldf=pbldf[pbldf['new_buckets']=='DELIST']
x_val='sale'
y_val='vip_freq_pnt'
color_val='new_buckets'
hover_name_val='material_name'
size_max_val=40
size_val='gp_abs'

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="ALL SKUS BY BUCKETS", color_discrete_map={"MAINTAIN_OR_GROW_BY_PROMO": "#00CC96", "SUPPORT_MORE_DIST": "#636EFA", "OBSERVE": "#AB63FA", "DELIST": "#EF553B"})
fig.add_hline(y=pbldf[y_val].mean())
fig.add_vline(x=pbldf[x_val].quantile(0.95))
fig.show()

In [0]:
if rationalized_flag != 0:
    fig = px.scatter(pbldf[pbldf['material_id'].isin(rationalized_materials)], x=x_val, y=y_val, size=np.abs(pbldf[pbldf['material_id'].isin(rationalized_materials)][size_val]), 
                    color=color_val,
                    hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="RATIONALIZED SKUS BY BUCKETS", color_discrete_map={"MAINTAIN_OR_GROW_BY_PROMO": "#00CC96", "SUPPORT_MORE_DIST": "#636EFA", "OBSERVE": "#AB63FA", "DELIST": "#EF553B"})
    fig.add_hline(y=pbldf[y_val].mean())
    fig.add_vline(x=pbldf[x_val].quantile(0.95))
    fig.show()

In [0]:
# pbldf.groupby('brand').sum().reset_index()

###Reco Bubble - GP Flag

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])

# new_buckets
# new_volume
groupby_val='new_buckets'
x_val='sale'
y_val='vol'
color_val='gp_flag'
hover_name_val=groupby_val
size_max_val=40
size_val='gp_abs'

pbldf=pbldf.groupby(groupby_val).sum().reset_index()
pbldf['gp_flag']=np.where(pbldf.GP<0,"Negative","Positive")

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="BY {}".format(groupby_val), color_discrete_map={"Positive": "#8993f8", "Negative": "#ec8272"})
fig.add_hline(y=pbldf[y_val].quantile(0.95))
fig.add_vline(x=pbldf[x_val].quantile(0.95))
fig.show()

In [0]:
eda_agg_table_full.groupby('new_buckets').sum()['gp_abs']

In [0]:
def get_top_N_list(dataframe, start, end):
    listname = "list" + "_" + str(end)
#     print(listname)
    listname = pd.DataFrame(dataframe.groupby(
        'material_id').sum()['sale']).reset_index().sort_values(
            by=dataframe.columns[2], ascending=False)['material_id'][start:end].tolist()
    return listname

###Trend Chart - Maintain or Grow

In [0]:
fdf.head(2)

In [0]:
# fdf.new_buckets.value_counts()
tx=fdf[fdf['new_buckets']=='MAINTAIN_OR_GROW_BY_PROMO']
tx=tx.groupby('material_id').sum()['sale'].reset_index().sort_values(by='sale',ascending=False)
tx_list=tx['material_id'].tolist()[0:20]

tbl=fdf[fdf['material_id'].isin(tx_list)]

tbl = pd.merge(tbl,
                attr[['material_id', 'material_name','brand']],
                left_on='material_id',
                right_on='material_id',
                how='inner')

fig = px.line(tbl, x="week_number", y="sale", color='material_name',width=900, height=600, title="SALES L52W")
fig.show()

fig2= px.line(tbl, x="week_number", y="cwd", color='material_name',width=900, height=600, title="CWD L52W")
fig2.show()

###Trend Chart - Delist

In [0]:
# fdf.new_buckets.value_counts()
tx=fdf[fdf['new_buckets']=='DELIST']
tx=tx.groupby('material_id').sum()['sale'].reset_index().sort_values(by='sale',ascending=False)
tx_list=tx['material_id'].tolist()[0:20]

tbl=fdf[fdf['material_id'].isin(tx_list)]

tbl = pd.merge(tbl,
                attr[['material_id', 'material_name','brand']],
                left_on='material_id',
                right_on='material_id',
                how='inner')

fig = px.line(tbl, x="week_number", y="sale", color='material_name',width=900, height=600)
fig.show()

fig2= px.line(tbl, x="week_number", y="cwd", color='material_name',width=900, height=600)
fig2.show()

##Misc

In [0]:
display(eda_agg_table_full.groupby('new_buckets').sum()['sale'].reset_index())

In [0]:
dl_br=pd.pivot_table(eda_agg_table_full,values='material_id',index='brand',columns='new_buckets',aggfunc='count')
dl_br=dl_br.reset_index()
dl_br['total_%_delist']=(dl_br['DELIST']/dl_br.sum(axis=1))*100
dl_br

In [0]:
tbl=store[store['material_id'].isin(get_top_N_list(sale,0,16))]

tbl = pd.merge(tbl,
                attr[['material_id', 'material_name','brand']],
                left_on='material_id',
                right_on='material_id',
                how='inner')

fig = px.line(tbl, x="week_number", y="material_weekly_store_count", color='material_name',width=900, height=600)
fig.show()

##Interactions

In [0]:
tbl=sale[sale['material_id'].isin(get_top_N_list(sale,0,16))]

tbl = pd.merge(tbl,
                attr[['material_id', 'material_name']],
                left_on='material_id',
                right_on='material_id',
                how='inner')

fig = px.line(tbl, x="week_number", y="sale", color='material_name',width=900, height=600)
fig.show()

##Brand level

###Bubble - GP Flag

In [0]:
query = f"""
WITH total_cust AS (SELECT COUNT(DISTINCT customer_id) AS tot_cust,
                            COUNT(DISTINCT CASE WHEN segment = 'VIP' THEN customer_id END) AS tot_vip,
                            COUNT(DISTINCT CASE WHEN segment = 'Frequentist' THEN customer_id END) AS tot_freq
                    FROM dev.sandbox.pj_ao_framework_data
                    WHERE
                        business_day >= '{analysis_start_date}'
                        AND region_name = '{region}'
                        AND category_name = '{category_name}'
                        {material_group_condition}
)

SELECT brand,
        COUNT(DISTINCT customer_id) AS cust,
        COUNT(DISTINCT CASE WHEN segment = 'VIP' THEN customer_id END) AS vip_cust,
        COUNT(DISTINCT CASE WHEN segment = 'Frequentist' THEN customer_id END) AS freq_cust,
        tot_cust, tot_vip, tot_freq,
        (cust/tot_cust) AS tot_cust_perc,
        (vip_cust/tot_vip) AS vip_cust_perc,
        (freq_cust/tot_freq) AS freq_cust_perc
FROM total_cust, dev.sandbox.pj_ao_framework_data
WHERE
        business_day >= '{analysis_start_date}'
        AND region_name = '{region}'
        AND category_name = '{category_name}'
        {material_group_condition}
GROUP BY brand, tot_cust, tot_vip, tot_freq
ORDER BY brand
"""

p = spark.sql(query).toPandas()
p['vip_freq_pnt'] = (p['vip_cust'] + p['freq_cust']) / (p['tot_vip'] + p['tot_freq'])

In [0]:
gp_bub2 = pd.merge(fdf, attr[['material_id', 'material_name','brand']], on='material_id', how='inner')
gp_bub=gp_bub2.groupby('brand').sum()[['sale','vol']].reset_index()
gp2 = pd.merge(gp, attr[['material_id', 'material_name','brand']], on='material_id', how='inner')
gp3=gp2.groupby('brand').sum()['GP']

temp = pd.merge(gp_bub, gp3, on='brand',how='inner')
temp = pd.merge(temp, p[['brand', 'vip_freq_pnt']], on='brand',how='inner')
temp['GP_ABS']=np.abs(temp['GP'])
temp['GP_FLAG']=np.where(temp.GP<0,"Negative","Positive")

In [0]:
fig = px.scatter(temp, x="sale", y="vip_freq_pnt", size="GP_ABS", 
                 color="GP_FLAG",
                 hover_name="brand", log_x=False, size_max=40, title="ALL BRANDS", color_discrete_map={"Positive": "#8993f8", "Negative": "#ec8272"})
fig.add_hline(y=temp.vip_freq_pnt.mean())
fig.add_vline(x=temp.sale.quantile(0.95))
fig.show()

###Bubble - Growth Flag

In [0]:
r = pd.merge(e, attr[['material_id', 'brand']], on='material_id', how='inner')
r = r.groupby('brand')[['LFL_sales', 'Current_sales']].sum().reset_index()
r['growth'] = (r['Current_sales'] - r['LFL_sales'])/r['LFL_sales']
r['growth'] = r['growth'].replace(float('inf'), 0)

In [0]:
temp = pd.merge(temp, r[['brand', 'growth']], on='brand', how='inner')

growth_positives = temp[temp['growth'] >= 0][['growth']].values
medium = np.percentile(growth_positives, 33)
high = np.percentile(growth_positives, 66)

temp["Growth_Flag"] = np.where(
    (temp["growth"] < 0), "Negative",
    np.where(
        (temp["growth"] >= high), f"High >{round(high*100,2)}%",
    np.where(
        (temp["growth"] >= medium), f"Medium >{round(medium*100,2)}%", f"Low <{round(medium*100,2)}%"
    )
))

In [0]:
fig = px.scatter(temp, x="sale", y="vip_freq_pnt", size="GP_ABS", 
                 color="Growth_Flag",
                 hover_name="brand", log_x=False, size_max=40, title="ALL BRANDS", color_discrete_map={f"High >{round(high*100,2)}%": "#00CC96", f"Medium >{round(medium*100,2)}%": "#636EFA", f"Low <{round(medium*100,2)}%": "#AB63FA", "Negative": "#EF553B"})

fig.add_hline(y=temp.vip_freq_pnt.mean())
fig.add_vline(x=temp.sale.quantile(0.95))

fig.show()

In [0]:
temp["Quadrant"] = np.where(
    (temp["sale"] >= temp["sale"].quantile(0.95))
    & (temp["vip_freq_pnt"] >= temp["vip_freq_pnt"].mean()), 1,
    np.where(
        (temp["sale"] < temp["sale"].quantile(0.95))
        & (temp["vip_freq_pnt"] < temp["vip_freq_pnt"].mean()), 3,
    np.where(
        (temp["sale"] >= temp["sale"].quantile(0.95))
        & (temp["vip_freq_pnt"] < temp["vip_freq_pnt"].mean()), 2,
    np.where(
        (temp["sale"] < temp["sale"].quantile(0.95))
        & (temp["vip_freq_pnt"] >= temp["vip_freq_pnt"].mean()), 4,0
    )
)))

In [0]:
quadrants_df = temp.groupby(['Quadrant']).agg({'sale':'sum', 'GP_ABS':'sum', 'vip_freq_pnt':'mean', 'brand':'count', 'growth':'mean'}).reset_index()

quadrants_df['sales_share'] = quadrants_df['sale'] / quadrants_df['sale'].sum()
quadrants_df['gp_share'] = quadrants_df['GP_ABS'] / quadrants_df['GP_ABS'].sum()

quadrants_df.rename(columns={'brand':'brands'},inplace=True)
quadrants_df.rename(columns={'growth':'avg_growth'},inplace=True)
quadrants_df.rename(columns={'vip_freq_pnt':'avg_vip_freq_pnt'},inplace=True)

quadrants_df[['Quadrant', 'sales_share', 'avg_vip_freq_pnt', 'gp_share', 'brands', 'avg_growth']]

In [0]:
if rationalized_flag != 0:
    query = f"""
    WITH total_cust AS (SELECT COUNT(DISTINCT customer_id) AS tot_cust,
                                COUNT(DISTINCT CASE WHEN segment = 'VIP' THEN customer_id END) AS tot_vip,
                                COUNT(DISTINCT CASE WHEN segment = 'Frequentist' THEN customer_id END) AS tot_freq
                        FROM dev.sandbox.pj_ao_framework_data
                        WHERE business_day >= '{analysis_start_date}'
                        AND material_id IN {rationalized_materials_sql}
                        AND region_name = '{region}'
    )

    SELECT brand,
            COUNT(DISTINCT customer_id) AS cust,
            COUNT(DISTINCT CASE WHEN segment = 'VIP' THEN customer_id END) AS vip_cust,
            COUNT(DISTINCT CASE WHEN segment = 'Frequentist' THEN customer_id END) AS freq_cust,
            tot_cust, tot_vip, tot_freq,
            (cust/tot_cust) AS tot_cust_perc,
            (vip_cust/tot_vip) AS vip_cust_perc,
            (freq_cust/tot_freq) AS freq_cust_perc
    FROM total_cust, dev.sandbox.pj_ao_framework_data
    WHERE business_day >= '{analysis_start_date}'
    AND material_id IN {rationalized_materials_sql}
    AND region_name = '{region}'
    GROUP BY brand, tot_cust, tot_vip, tot_freq
    ORDER BY brand
    """

    s = spark.sql(query).toPandas()
    s['vip_freq_pnt'] = (s['vip_cust'] + s['freq_cust']) / (s['tot_vip'] + s['tot_freq'])

In [0]:
if rationalized_flag != 0:
    gp_bub2 = pd.merge(fdf[fdf['material_id'].isin(rationalized_materials)],
                       attr[['material_id', 'material_name','brand']],
                       on='material_id', how='inner')
    gp_bub=gp_bub2.groupby('brand').sum()[['sale','vol']].reset_index()
    gp2 = pd.merge(gp[gp['material_id'].isin(rationalized_materials)],
                    attr[['material_id', 'material_name','brand']],
                    on='material_id', how='inner')
    gp3=gp2.groupby('brand').sum()['GP']

    temp = pd.merge(gp_bub, gp3, on='brand',how='inner')
    temp = pd.merge(temp, s[['brand', 'vip_freq_pnt']], on='brand',how='inner')
    temp['GP_ABS']=np.abs(temp['GP'])
    temp['GP_FLAG']=np.where(temp.GP<0,"Negative","Positive")

    fig = px.scatter(temp, x="sale", y="vip_freq_pnt", size="GP_ABS",
                 color="GP_FLAG",
                 hover_name="brand", log_x=False, size_max=40, title="RATIONALIZED BRANDS", color_discrete_map={"Positive": "#8993f8", "Negative": "#ec8272"})
    fig.add_hline(y=temp.vip_freq_pnt.mean())
    fig.add_vline(x=temp.sale.quantile(0.95))
    fig.show()

In [0]:
if rationalized_flag != 0:
    t = pd.merge(e[e['material_id'].isin(rationalized_materials)],
                 attr[['material_id', 'brand']],
                 on='material_id', how='inner')
    t = t.groupby('brand')[['LFL_sales', 'Current_sales']].sum().reset_index()
    t['growth'] = (t['Current_sales'] - t['LFL_sales'])/r['LFL_sales']
    t['growth'] = t['growth'].replace(float('inf'), 0)

    temp = pd.merge(temp, t[['brand', 'growth']], on='brand', how='inner')

    temp["Growth_Flag"] = np.where(
        (temp["growth"] < 0), "Negative",
        np.where(
            (temp["growth"] >= high), f"High >{round(high*100,2)}%",
        np.where(
            (temp["growth"] >= medium), f"Medium >{round(medium*100,2)}%", f"Low <{round(medium*100,2)}%"
        )
    ))

    fig = px.scatter(temp, x="sale", y="vip_freq_pnt", size="GP_ABS", 
                    color="Growth_Flag",
                    hover_name="brand", log_x=False, size_max=40, title="RATIONALIZED BRANDS", color_discrete_map={f"High >{round(high*100,2)}%": "#00CC96", f"Medium >{round(medium*100,2)}%": "#636EFA", f"Low <{round(medium*100,2)}%": "#AB63FA", "Negative": "#EF553B"})
    fig.add_hline(y=temp.vip_freq_pnt.mean())
    fig.add_vline(x=temp.sale.quantile(0.95))
    fig.show()

###Price calculation

In [0]:
df.head()

In [0]:
price_df=df[df['week_number']>40].groupby('material_id').sum()[['sale','vol']].reset_index()
price_df['price']=price_df.sale/price_df.vol

In [0]:
px.histogram(price_df,y='price')

In [0]:
xd=pd.merge(price_df,attr, on='material_id',how='right')

In [0]:
display(xd)

In [0]:
attr.head()

In [0]:
xd=sale.groupby('material_id').sum()['sale'].reset_index()
xd=pd.merge(xd,attr, on='material_id',how='right')

In [0]:
display(xd)

In [0]:
# t=pd.DataFrame(xd.groupby('new_volume').sum()['sale']).reset_index()
# t['new_volume']=t['new_volume'].astype('object')

# t['new_volume'] = t['new_volume'].astype(str)+'ML'
# t
# # print(t.dtypes)
# plot_bar(t,'new_volume','sale')

###Quadrant Values and GP

In [0]:
eda_agg_table_full.head(1)

In [0]:
eda_agg_table_full["Quadrant"] = np.where(
    (eda_agg_table_full["sale"] > eda_agg_table_full["sale"].quantile(0.95))
    & (eda_agg_table_full["tot_cust_perc"] > eda_agg_table_full["tot_cust_perc"].mean()), 1,
    np.where(
        (eda_agg_table_full["sale"] < eda_agg_table_full["sale"].quantile(0.95))
        & (eda_agg_table_full["tot_cust_perc"] < eda_agg_table_full["tot_cust_perc"].mean()), 3,
    np.where(
        (eda_agg_table_full["sale"] > eda_agg_table_full["sale"].quantile(0.95))
        & (eda_agg_table_full["tot_cust_perc"] < eda_agg_table_full["tot_cust_perc"].mean()), 2,
    np.where(
        (eda_agg_table_full["sale"] < eda_agg_table_full["sale"].quantile(0.95))
        & (eda_agg_table_full["tot_cust_perc"] > eda_agg_table_full["tot_cust_perc"].mean()), 4,0
    )
)))

eda_agg_table_full['pl_flag'] = np.where(eda_agg_table_full['brand'] == "LULU PRIVATE LABEL", 1, 0)

In [0]:
display(eda_agg_table_full.groupby('Quadrant').agg({'sale':'sum','tot_cust_perc':'mean','gp_abs':'sum', 'material_id':'count', 'pl_flag':'sum'}).reset_index())

In [0]:
fdf.merge(eda_agg_table_full)

In [0]:
display(eda_agg_table_full.groupby('new_buckets').sum()['sale'].reset_index())

##Private Label View

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])
pbldf['pl_flag'] = np.where(pbldf['brand'] == "LULU PRIVATE LABEL", "PL", "Other")
x_val='sale'
y_val='vip_freq_pnt'
color_val='pl_flag'
hover_name_val='material_name'
size_max_val=40
size_val='gp_abs'

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="LULU PL SKUs AND OTHERS", color_discrete_map={"Other": "#8993f8", "PL": "#ec8272"})

fig.add_hline(y=pbldf[y_val].mean())
fig.add_vline(x=pbldf[x_val].quantile(0.95))

fig.show()

In [0]:
if rationalized_flag != 0:
    fig = px.scatter(pbldf[pbldf['material_id'].isin(rationalized_materials)], x=x_val, y=y_val, size=np.abs(pbldf[pbldf['material_id'].isin(rationalized_materials)][size_val]), 
                    color=color_val,
                    hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="RATIONALIZED LULU PL SKUs AND OTHERS", color_discrete_map={"Other": "#8993f8", "PL": "#ec8272"})

    fig.add_hline(y=pbldf[y_val].mean())
    fig.add_vline(x=pbldf[x_val].quantile(0.95))

    fig.show()

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])
pbldf=pbldf[pbldf['brand']=='LULU PRIVATE LABEL']
x_val='sale'
y_val='vip_freq_pnt'
color_val='new_buckets'
hover_name_val='material_name'
size_max_val=40
size_val='gp_abs'

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="LULU PL SKUS BY BUCKETS", color_discrete_map={"MAINTAIN_OR_GROW_BY_PROMO": "#00CC96", "SUPPORT_MORE_DIST": "#636EFA", "OBSERVE": "#AB63FA", "DELIST": "#EF553B"})

fig.add_hline(y=pbldf[y_val].mean())
fig.add_vline(x=pbldf[x_val].quantile(0.95))

fig.show()

In [0]:
if rationalized_flag != 0:
    fig = px.scatter(pbldf[pbldf['material_id'].isin(rationalized_materials)], x=x_val, y=y_val, size=np.abs(pbldf[pbldf['material_id'].isin(rationalized_materials)][size_val]), 
                    color=color_val,
                    hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="RATIONALIZED LULU PL SKUS BY BUCKETS", color_discrete_map={"MAINTAIN_OR_GROW_BY_PROMO": "#00CC96", "SUPPORT_MORE_DIST": "#636EFA", "OBSERVE": "#AB63FA", "DELIST": "#EF553B"})

    fig.add_hline(y=pbldf[y_val].mean())
    fig.add_vline(x=pbldf[x_val].quantile(0.95))

    fig.show()

##Moving Avg

In [0]:

df_ma = pd.DataFrame(columns=fdf.columns)

for i in fdf.material_id.unique():
    # print(tx_list)  # Instead of tx.columns
    temp = fdf[fdf['material_id'] == i].sort_values(by='week_number')
    temp['MA4W'] = temp['sale'].rolling(4).mean()
    df_ma=pd.concat([temp,df_ma])
    # print(i)

In [0]:
df_ma['sale']=df_ma['sale'].astype('float64')

##Trend Chart - Observe

In [0]:
# fdf.new_buckets.value_counts()
tx=df_ma[df_ma['new_buckets']=='OBSERVE']

tx=tx.groupby('material_id').sum()['sale'].reset_index().sort_values(by='sale',ascending=False)
tx_list=tx['material_id'].tolist()[0:20]
print(tx_list)

tbl=df_ma[df_ma['material_id'].isin(tx_list)]

tbl = pd.merge(tbl,
                attr[['material_id', 'material_name','brand']],
                left_on='material_id',
                right_on='material_id',
                how='inner')

fig = px.line(tbl, x="week_number", y="MA4W", color='material_name',width=900, height=600)
fig.show()

fig2= px.line(tbl, x="week_number", y="cwd", color='material_name',width=900, height=600)
fig2.show()

##Delist by Attributes

In [0]:
eda_agg_table_full.type.value_counts()

In [0]:
pbldf=eda_agg_table_full.dropna(subset=['sale','vip_freq_pnt'])
pbldf=pbldf[pbldf['new_buckets']=='DELIST']
x_val='sale'
y_val='vip_freq_pnt'
color_val='type'
hover_name_val='material_name'
size_max_val=40
size_val='gp_abs'

fig = px.scatter(pbldf, x=x_val, y=y_val, size=np.abs(pbldf[size_val]), 
                 color=color_val,
                 hover_name=hover_name_val, log_x=False, log_y=False, size_max=size_max_val,title="DELIST BY ATTRIBUTES-  SKUs")

fig.add_hline(y=pbldf[y_val].mean())
fig.add_vline(x=pbldf[x_val].quantile(0.95))
fig.show()

In [0]:

############# L3M Previous dataframe ##############

agg_table_l3m_before = pd.DataFrame(
    fdf[(fdf['week_number']>28)&(fdf['week_number']<41)].groupby("material_id").agg(
        {
            "sale": "sum",
            "vol": "sum",
            "material_weekly_store_count": "mean",
            "cwd": "mean",'new_buckets':'max'
        }
    )
).reset_index()

agg_table_l3m_before.rename(columns={'material_weekly_store_count':'AVG_STORE_SELLING'},inplace=True)
agg_table_l3m_before.rename(columns={'cwd':'AVG_CWD'},inplace=True)
agg_table_l3m_before.columns = [x.lower() for x in agg_table_l3m_before.columns]

In [0]:
display(eda_agg_table_full)

##Sales & GP Distribution

In [0]:
# Assuming eda_agg_table_full is your DataFrame

tt = eda_agg_table_full.groupby('material_id').sum()[['gp_abs', 'sale']].reset_index()
tt = pd.merge(tt, attr[['material_id', 'brand']], on='material_id', how='inner')
tt['material_id']=tt['material_id'].astype('str')

tt = tt.sort_values(by='gp_abs', ascending=False)
tt['gp_cum_sum'] = np.round(100 * (tt.gp_abs.cumsum() / tt['gp_abs'].sum()), 2)
cum_sum_80_gp_materials = tt[tt['gp_cum_sum'] < 80]['material_id'].count() + 1
cum_sum_80_gp_brands = tt['brand'][:cum_sum_80_gp_materials].nunique()

fig = go.Figure()
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Bar(y=tt['gp_abs'], x=tt['material_id'], name="material GP"), secondary_y=False)
fig.add_trace(go.Scatter(x=tt['material_id'], y=tt['gp_cum_sum'],
                         mode='lines', name="GP Cumulative Contri"), secondary_y=True)
fig.update_layout(
    title_text="gp_abs and gp_abs dist"
)
fig.update_xaxes(title_text="xaxis title")
fig.update_yaxes(title_text="material_id gp_abs", secondary_y=False)
fig.update_yaxes(title_text="% Cumulative Contribution", secondary_y=True)
fig.update_layout(showlegend=False)
fig.update_layout(autosize=False, width=900, height=400)
# fig.add_hline(secondary_y=80)
fig.show()

tt = tt.sort_values(by='sale', ascending=False)
tt['sale_cum_sum'] = np.round(100 * (tt.sale.cumsum() / tt['sale'].sum()), 2)
cum_sum_80_sale_materials = tt[tt['sale_cum_sum'] < 80]['material_id'].count() + 1
cum_sum_80_sale_brands = tt['brand'][:cum_sum_80_sale_materials].nunique()

fig2 = go.Figure()
fig2 = make_subplots(specs=[[{"secondary_y": True}]])
fig2.add_trace(go.Bar(y=tt['sale'], x=tt['material_id'], name="material sales"), secondary_y=False)
fig2.add_trace(go.Scatter(x=tt['material_id'], y=tt['sale_cum_sum'],
                         mode='lines', name="Sales Cumulative Contri"), secondary_y=True)
fig2.update_layout(
    title_text="Sales dist"
)
fig2.update_xaxes(title_text="xaxis title")
fig2.update_yaxes(title_text="material_id sale", secondary_y=False)
fig2.update_yaxes(title_text="% Cumulative Contribution", secondary_y=True)
fig2.update_layout(showlegend=False)
fig2.update_layout(autosize=False, width=900, height=400)
# fig2.add_hline(secondary_y=80)
fig2.show()

In [0]:
top_80_sale_materials_perc = round(cum_sum_80_sale_materials/tt['material_id'].nunique()*100,2)
top_80_gp_materials_perc = round(cum_sum_80_gp_materials/tt['material_id'].nunique()*100,2)
print(f"Only top {top_80_sale_materials_perc}% and {top_80_gp_materials_perc}% SKUs provide 80% of sales and GP respectively")
print(f"\nTop {cum_sum_80_sale_materials} SKUs coming from {cum_sum_80_sale_brands} brands contribute to 80% Sales")
print(f"Top {cum_sum_80_gp_materials} SKUs coming from {cum_sum_80_gp_brands} brands contribute to 80% GP")