In [1]:
from db.helpers import new_sales_collection 
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from helpers.tables import industry_table,area_table
import numpy as np

In [2]:
keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']

def calculate_growth(value1, value2):
    if value1 == 0:
        return None 
    else:
        growth = ((value2 - value1) / value1) 
        return growth

def group_sales(group_id,match):
    pipeline = [
        {'$match': {**match,"Level_1_Area":"Kuwait","Monthly_Sales":{"$nin":[None,0]}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    return new_sales_collection.aggregate(pipeline)

def generate_seasonality_record(base,data):
    result = {**base}
    keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']
    for key in keys: 
        if len(data) != 2:
            result[key] = None
            continue
        growth = calculate_growth(data[0][key]/data[0]['numberOfOutlets'],data[1][key]/data[1]['numberOfOutlets'])
        if growth:
            if (growth < 2 and growth > -1):
                result[key] = growth
    return result


def getDates(start_date:datetime=datetime(2016, 1, 1),end_date:datetime=datetime(2023, 12, 1)):
    date = start_date
    while date <= end_date:
        yield date
        date += relativedelta(months=1)

def filter_sales(data:list,date_1:datetime,date_2:datetime):
    return [record for record in data if (record['_id']['year'] == date_1.year and record['_id']['month'] == date_1.month) or (record['_id']['year'] == date_2.year  and record['_id']['month']== date_2.month)]

In [3]:
# remove for loop for years and moths and use getDates 

generate Location Type Seasonality

In [4]:
location_types = new_sales_collection.distinct("Location_Type",{"Location_Type":{"$ne":0}})
_id = {'Location_type': '$Location_Type'}
result = []
for i in location_types:
    for date in getDates():

        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Location_Type":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"location_type":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
        if not key in record or record[key] == None:
            current_date = datetime(record['year'],record['month'],1)
            last_month = current_date - relativedelta(months=1)
            all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
            if len(all_locations_growth) != 2:
                continue
                raise Exception("all_locations_growth length issue")
            first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
            second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
            growth = calculate_growth(first_month,second_month)
            # Check next month, add all_locations_growth to next month growth
            record[key] = growth
location_type_df = pd.DataFrame(result)

generate Products Seasonality

In [5]:
products_types = new_sales_collection.distinct("Product_Focus",{"Level_1_Area":"Kuwait","Product_Focus":{"$ne":0}})
_id = {'Product_Focus': '$Product_Focus'}
result = []
for i in products_types:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Product_Focus":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"product_focus":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    # raise Exception("all_locations_growth length issue")
                    continue
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
product_focus_df = pd.DataFrame(result)

generate Area Seasonality

In [6]:
areas = new_sales_collection.distinct("Level_3_Area",{"Level_1_Area":"Kuwait"})
_id = {'Level_3_Area': '$Level_3_Area'}
result = []
for i in areas:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Level_3_Area":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"area":i,"year":date.year,"month":date.month},data))
# check growth for level 2 area
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                area_level_2 = area_table[record['area']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({'Level_2_Area':"$Level_2_Area"},{"Level_2_Area":area_level_2,"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
area_df = pd.DataFrame(result)

generate Industry Type Seasonality

In [7]:
industry = new_sales_collection.distinct("Industry_Level_2",{"Level_1_Area":"Kuwait","Industry_Level_2":{"$ne":0}})
_id = {'Industry_Level_2': 'Industry_Level_2'}
result = []

def group_sales_2(group_id,match,industry):
    pipeline =[
        {'$match': {**match,"Level_1_Area":"Kuwait","Monthly_Sales":{"$nin":[None,0]}}},
        {"$lookup":{
  "from": "Brands",
  "localField": "Brand",
  "foreignField": "Brand_Name_English",
  "as": "brand",
  "pipeline": [
    {
      "$match": {
        "Industry_Level_1": industry,
      },
    },
  ],
}},
{"$match":{"brand.0":{"$exists":True}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month",}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    try:
        return new_sales_collection.aggregate(pipeline)
    except:
        print(pipeline)
        raise Exception("group_sales_2 error")




for i in industry:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Industry_Level_2":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"industry":i,"year":date.year,"month":date.month},data))

for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                industry =industry_table[record['industry']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales_2({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}},industry)),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
industry_df = pd.DataFrame(result)


In [8]:
with pd.ExcelWriter('before_seasonalities.xlsx', engine='xlsxwriter') as writer:
    location_type_df.to_excel(writer, sheet_name='location_type')
    product_focus_df.to_excel(writer, sheet_name='product_focus')
    area_df.to_excel(writer, sheet_name='area')
    industry_df.to_excel(writer, sheet_name='industry')

In [9]:
for i in keys:
    location_type_df[i].mask(location_type_df[i] >=3,None, inplace=True)
    product_focus_df[i].mask(product_focus_df[i] >=3,None, inplace=True)
    area_df[i].mask(area_df[i] >=3,None, inplace=True)
    industry_df[i].mask(industry_df[i] >=3,None, inplace=True)

In [10]:
# when using these seasonalities add weights for each one of them, using them directory will create seasonality over value
location_type_df.interpolate(inplace=True)
product_focus_df.interpolate(inplace=True)
area_df.interpolate(inplace=True)
industry_df.interpolate(inplace=True)
display(location_type_df.describe())
display(product_focus_df.describe())
display(area_df.describe())
display(industry_df.describe())

  location_type_df.interpolate(inplace=True)
  product_focus_df.interpolate(inplace=True)
  area_df.interpolate(inplace=True)
  industry_df.interpolate(inplace=True)


Unnamed: 0,year,month,Weekday_Store_Sales,Weekend_Store_Sales,Weekday_Delivery_Sales,Weekend_Delivery_Sales
count,4320.0,4320.0,4320.0,4320.0,4320.0,4320.0
mean,2019.5,6.5,0.009478,0.002152,0.020715,0.021231
std,2.291553,3.452452,0.222153,0.221119,0.309487,0.313597
min,2016.0,1.0,-0.997829,-0.997263,-1.0,-1.0
25%,2017.75,3.75,-0.069019,-0.076742,-0.133896,-0.136877
50%,2019.5,6.5,-3e-06,-0.003338,0.012486,0.019257
75%,2021.25,9.25,0.071216,0.065683,0.177613,0.161516
max,2023.0,12.0,1.894327,1.974439,1.956985,1.960678


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,14496.0,14496.0,14496.0,14496.0,14496.0,14496.0
mean,2019.5,6.5,0.004908,0.019494,0.000191,0.018056
std,2.291367,3.452172,0.18817,0.282787,0.200286,0.286763
min,2016.0,1.0,-0.933824,-1.0,-0.945364,-1.0
25%,2017.75,3.75,-0.069019,-0.121135,-0.076742,-0.133778
50%,2019.5,6.5,-3e-06,0.009375,-0.00336,0.018779
75%,2021.25,9.25,0.065119,0.177613,0.046632,0.161516
max,2023.0,12.0,1.994253,1.962792,1.988379,1.995567


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,13632.0,13632.0,13632.0,13631.0,13632.0,13631.0
mean,2019.5,6.5,0.00257,0.07721,0.005838,0.072382
std,2.291372,3.452179,0.264575,0.409431,0.271526,0.404238
min,2016.0,1.0,-1.0,-1.0,-1.0,-1.0
25%,2017.75,3.75,-0.094244,-0.109151,-0.095935,-0.109865
50%,2019.5,6.5,-0.006307,0.022557,-0.007317,0.018237
75%,2021.25,9.25,0.065186,0.192913,0.060628,0.189026
max,2023.0,12.0,1.976987,2.218253,1.991667,2.137796


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,3552.0,3552.0,3552.0,3552.0,3552.0,3552.0
mean,2019.5,6.5,-0.021818,0.064976,-0.001684,0.140284
std,2.29161,3.452539,0.280361,0.433806,0.327917,0.490043
min,2016.0,1.0,-0.935641,-0.983338,-0.943174,-0.980715
25%,2017.75,3.75,-0.122589,-0.172934,-0.113091,-0.163652
50%,2019.5,6.5,-0.018572,-0.019539,-0.01453,-0.006838
75%,2021.25,9.25,0.085112,0.198807,0.069404,0.24498
max,2023.0,12.0,1.967758,1.962971,1.994259,1.872891


In [11]:

with pd.ExcelWriter('after_seasonalities.xlsx', engine='xlsxwriter') as writer:
    location_type_df.to_excel(writer, sheet_name='location_type')
    product_focus_df.to_excel(writer, sheet_name='product_focus')
    area_df.to_excel(writer, sheet_name='area')
    industry_df.to_excel(writer, sheet_name='industry')
