In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import json
import gspread
from datetime import date, timedelta
from oauth2client.service_account import ServiceAccountCredentials
import setup_environment_2
import importlib
import import_ipynb
import warnings
warnings.filterwarnings("ignore")
importlib.reload(setup_environment_2)
setup_environment_2.initialize_env()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


/home/ec2-user/.Renviron
/home/ec2-user/service_account_key.json


In [3]:
def query_snowflake(query, columns=[]):
    import os
    import snowflake.connector
    import numpy as np
    import pandas as pd
    con = snowflake.connector.connect(
        user =  os.environ["SNOWFLAKE_USERNAME"],
        account= os.environ["SNOWFLAKE_ACCOUNT"],
        password= os.environ["SNOWFLAKE_PASSWORD"],
        database =os.environ["SNOWFLAKE_DATABASE"]
    )
    try:
        cur = con.cursor()
        cur.execute("USE WAREHOUSE COMPUTE_WH")
        cur.execute(query)
        if len(columns) == 0:
            out = pd.DataFrame(np.array(cur.fetchall()))
        else:
            out = pd.DataFrame(np.array(cur.fetchall()),columns=columns)
        return out
    except Exception as e:
        print("Error: ", e)
    finally:
        cur.close()
        con.close()

In [4]:
command_string = '''
SELECT  sales_orders.created_at::date AS created_at,
                CASE WHEN regions.name_en = 'Greater Cairo' THEN cities.name_en
                    ELSE regions.name_en
                END AS region,
                products.brand_id,
				categories.id as cat_id,
                product_sales_order.product_id,
                SUM(product_sales_order.purchased_item_count * basic_unit_count) AS units,
                SUM(product_sales_order.total_price) AS nmv

        FROM sales_orders
        JOIN retailers ON retailers.id = sales_orders.retailer_id
        JOIN product_sales_order ON product_sales_order.sales_order_id = sales_orders.id
        JOIN materialized_views.retailer_polygon ON retailer_polygon.retailer_id = retailers.id
        JOIN districts ON districts.id = retailer_polygon.district_id
        JOIN cities ON cities.id = districts.city_id
        JOIN states ON states.id = cities.state_id
        JOIN regions ON regions.id = states.region_id
        JOIN products ON products.id = product_sales_order.product_id
        JOIN brands ON brands.id = products.brand_id
		JOIN categories ON categories.id = products.category_id

        WHERE sales_orders.created_at::date BETWEEN current_date - INTERVAL '47 Days' AND current_date-1 
        AND (sales_orders.sales_order_status_id NOT IN (7,12))
        AND retailers.is_market_type_private = false
        AND retailers.id != 42031
        GROUP BY all
        
'''
columns = ['created_at', 'region', 'brand_id','cat_id', 'product_id', 'units', 'nmv']
sku_cont_sales_main_df = query_snowflake(command_string, columns)
sku_cont_sales_main_df.created_at = pd.to_datetime(sku_cont_sales_main_df.created_at)
sku_cont_sales_main_df.brand_id = pd.to_numeric(sku_cont_sales_main_df.brand_id)
sku_cont_sales_main_df.cat_id = pd.to_numeric(sku_cont_sales_main_df.cat_id)
sku_cont_sales_main_df.product_id = pd.to_numeric(sku_cont_sales_main_df.product_id)
sku_cont_sales_main_df.units = pd.to_numeric(sku_cont_sales_main_df.units)
sku_cont_sales_main_df.nmv = pd.to_numeric(sku_cont_sales_main_df.nmv)

In [5]:
today = pd.Timestamp.today().date()
today_minus_3 = pd.Timestamp.today().date() - timedelta(days=4)
today_minus_7 = pd.Timestamp.today().date() - timedelta(days=8)
today_minus_14 = pd.Timestamp.today().date() - timedelta(days=15)
today_minus_21 = pd.Timestamp.today().date() - timedelta(days=22)
today_minus_35 = pd.Timestamp.today().date() -timedelta(days=36)
today_minus_45 = pd.Timestamp.today().date() -timedelta(days=46)

In [6]:
def calculate_contribution(data, start_date):
    sales = data.loc[data.created_at >= str(start_date)].copy()
    sales = sales.groupby(['region','brand_id','cat_id', 'product_id']).nmv.sum().reset_index()
    sales['total_sales'] = sales.groupby(['region','cat_id','brand_id']).nmv.transform(sum)
    sales[f'contribution_{(pd.Timestamp.today().date() - timedelta(days=1) - start_date).days}'] = sales['nmv'] / sales['total_sales']
    return sales[['region', 'brand_id','cat_id', 'product_id', f'contribution_{(pd.Timestamp.today().date() - timedelta(days=1) - start_date).days}']]


In [7]:
sales_3 = calculate_contribution(sku_cont_sales_main_df, today_minus_3)
sales_7 = calculate_contribution(sku_cont_sales_main_df, today_minus_7)
sales_14 = calculate_contribution(sku_cont_sales_main_df, today_minus_14)
sales_21 = calculate_contribution(sku_cont_sales_main_df, today_minus_21)
sales_35 = calculate_contribution(sku_cont_sales_main_df, today_minus_35)
sales_45 = calculate_contribution(sku_cont_sales_main_df, today_minus_45)

In [8]:
sku_cont_sales_data = sales_3.merge(sales_7, how='outer', on=['region',  'brand_id','cat_id', 'product_id']).merge(
                      sales_14, how='outer', on=['region', 'brand_id','cat_id', 'product_id']).merge(
                      sales_21, how='outer', on=['region', 'brand_id','cat_id', 'product_id']).merge(
                      sales_35, how='outer', on=['region',  'brand_id','cat_id', 'product_id']).merge(
                      sales_45, how='outer', on=['region',  'brand_id','cat_id', 'product_id']).fillna(0)

# Adjust contributions for missing data
sku_cont_sales_data.loc[sku_cont_sales_data['contribution_35'] == 0, 'contribution_35'] = sku_cont_sales_data['contribution_45']
sku_cont_sales_data.loc[sku_cont_sales_data['contribution_21'] == 0, 'contribution_21'] = sku_cont_sales_data['contribution_35']
sku_cont_sales_data.loc[sku_cont_sales_data['contribution_14'] == 0, 'contribution_14'] = sku_cont_sales_data['contribution_21']
sku_cont_sales_data.loc[sku_cont_sales_data['contribution_7'] == 0, 'contribution_7'] = sku_cont_sales_data['contribution_14']
sku_cont_sales_data.loc[sku_cont_sales_data['contribution_3'] == 0, 'contribution_3'] = sku_cont_sales_data['contribution_7']

In [9]:
command_string = '''

  SELECT DISTINCT
  CASE
    WHEN regions.id = 2 THEN cities.name_en
    ELSE regions.name_en
  END AS region,
  so.created_at::DATE AS DAY,
  pso.product_id,
  sum(pso.purchased_item_count * pso.basic_unit_count) AS qty
FROM
  product_sales_order pso
  JOIN sales_orders so ON so.id = pso.sales_order_id
  JOIN products ON products.id = pso.product_id
  JOIN brands ON products.brand_id = brands.id
  JOIN categories ON products.category_id = categories.id
  JOIN materialized_views.retailer_polygon ON materialized_views.retailer_polygon.retailer_id = so.retailer_id
  JOIN districts ON districts.id = materialized_views.retailer_polygon.district_id
  JOIN cities ON cities.id = districts.city_id
  JOIN states ON states.id = cities.state_id
  JOIN regions ON regions.id = states.region_id
  JOIN finance.all_cogs f ON f.product_id = pso.product_id
  AND f.from_date::DATE <= so.created_at::DATE
  AND f.to_date::DATE > so.created_at::DATE
WHERE
  so.created_at::DATE >= CURRENT_DATE -46
  AND so.sales_order_status_id NOT IN (7, 12)
  AND so.channel IN ('telesales', 'retailer')
  AND pso.purchased_item_count <> 0
  AND date_part('hour', so.created_at) < date_part('hour', CURRENT_TIMESTAMP)
GROUP BY
  ALL
ORDER BY
  1
        
'''
columns = ['region','date', 'product_id', 'qty']
SKU_mtd_volume = query_snowflake(command_string, columns)
SKU_mtd_volume.date = pd.to_datetime(SKU_mtd_volume.date)
SKU_mtd_volume.product_id = pd.to_numeric(SKU_mtd_volume.product_id)
SKU_mtd_volume.mtd_volume = pd.to_numeric(SKU_mtd_volume.qty)

In [10]:
def calculate_fillrate(data, start_date):
    sales = data.loc[data.date >= str(start_date)].copy()
    if(start_date == today):
        req_date = (today - start_date).days
    else:   
        req_date = (pd.Timestamp.today().date() - timedelta(days=1) - start_date).days
    total_sales = sales.groupby(['region','product_id']).qty.mean().reset_index()
    total_sales.columns = ['region','product_id',f'mtd_volume_{req_date}']
    total_sales[f'mtd_volume_{req_date}'] = np.ceil(total_sales[f'mtd_volume_{req_date}'])
    return total_sales

In [11]:
mtd_volume_today = calculate_fillrate(SKU_mtd_volume, today)
mtd_volume_3 = calculate_fillrate(SKU_mtd_volume, today_minus_3)
mtd_volume_7 = calculate_fillrate(SKU_mtd_volume, today_minus_7)
mtd_volume_14 = calculate_fillrate(SKU_mtd_volume, today_minus_14)
mtd_volume_21 = calculate_fillrate(SKU_mtd_volume, today_minus_21)
mtd_volume_35 = calculate_fillrate(SKU_mtd_volume, today_minus_35)

In [12]:
sku_mtd_volume_data = mtd_volume_today.merge(mtd_volume_3, how='outer', on=['region','product_id']).merge(
                      mtd_volume_7, how='outer', on=['region', 'product_id']).merge(
                      mtd_volume_14, how='outer', on=['region',  'product_id']).merge(
                      mtd_volume_21, how='outer', on=['region','product_id']).merge(
                      mtd_volume_35, how='outer', on=['region','product_id']).fillna(0)


In [13]:
def scores_calc(df,col):
    if df[col] == df['mtd_volume_0']:
        return 100
    elif df[col] == 0:
        return df['mtd_volume_0']
    else:
        return np.minimum(1/abs((df['mtd_volume_0']-df[col])/df[col]),100)

In [14]:
sku_mtd_volume_data['score_3'] = sku_mtd_volume_data.apply(lambda row: scores_calc(row, 'mtd_volume_3'), axis=1)
sku_mtd_volume_data['score_7'] = sku_mtd_volume_data.apply(lambda row: scores_calc(row, 'mtd_volume_7'), axis=1)
sku_mtd_volume_data['score_14'] = sku_mtd_volume_data.apply(lambda row: scores_calc(row, 'mtd_volume_14'), axis=1)
sku_mtd_volume_data['score_21'] = sku_mtd_volume_data.apply(lambda row: scores_calc(row, 'mtd_volume_21'), axis=1)
sku_mtd_volume_data['score_35'] = sku_mtd_volume_data.apply(lambda row: scores_calc(row, 'mtd_volume_35'), axis=1)
sku_mtd_volume_data['total_score'] = sku_mtd_volume_data['score_3']+sku_mtd_volume_data['score_7']+sku_mtd_volume_data['score_14']+sku_mtd_volume_data['score_21']+sku_mtd_volume_data['score_35']

In [15]:
sku_mtd_volume_data['wf3'] = sku_mtd_volume_data['score_3']/sku_mtd_volume_data['total_score']
sku_mtd_volume_data['wf7'] = sku_mtd_volume_data['score_7']/sku_mtd_volume_data['total_score']
sku_mtd_volume_data['wf14'] = sku_mtd_volume_data['score_14']/sku_mtd_volume_data['total_score']
sku_mtd_volume_data['wf21'] = sku_mtd_volume_data['score_21']/sku_mtd_volume_data['total_score']
sku_mtd_volume_data['wf35'] = sku_mtd_volume_data['score_35']/sku_mtd_volume_data['total_score']


In [16]:
sku_mtd_volume_data = sku_mtd_volume_data[['product_id','region','wf3','wf7','wf14','wf21','wf35']]
sku_mtd_volume_data['wf3'] = (sku_mtd_volume_data['wf3']*0.6)+(0.4*0.5)
sku_mtd_volume_data['wf7'] = (sku_mtd_volume_data['wf7']*0.6)+(0.4*0.25)
sku_mtd_volume_data['wf14'] = (sku_mtd_volume_data['wf14']*0.6)+(0.4*0.15)
sku_mtd_volume_data['wf21'] = (sku_mtd_volume_data['wf21']*0.6)+(0.4*0.075)
sku_mtd_volume_data['wf35'] = (sku_mtd_volume_data['wf35']*0.6)+(0.4*(0.025))

In [17]:
sku_cont_sales_data = sku_cont_sales_data.merge(sku_mtd_volume_data,on=['product_id','region'])
sku_cont_sales_data['final_cntrb'] = (
    (sku_cont_sales_data['contribution_3']*sku_cont_sales_data['wf3'])+
    (sku_cont_sales_data['contribution_7']*sku_cont_sales_data['wf7'])+
    (sku_cont_sales_data['contribution_14']*sku_cont_sales_data['wf14'])+
    (sku_cont_sales_data['contribution_21']*sku_cont_sales_data['wf21'])+
    (sku_cont_sales_data['contribution_35']*sku_cont_sales_data['wf35'])
)

In [20]:
sku_cont_sales_data['total_cntrb'] = sku_cont_sales_data.groupby(['region','brand_id','cat_id'])['final_cntrb'].transform(sum)
sku_cont_sales_data['final_cntrb'] = sku_cont_sales_data['final_cntrb']/sku_cont_sales_data['total_cntrb'] 

In [21]:
sku_cont_sales_data[['region','product_id','final_cntrb']].to_excel('Skus_cntrb.xlsx')