In [1]:
from db.helpers import gm_sales_collection
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from helpers.tables import industry_table,area_table
import numpy as np

In [2]:
keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']

def calculate_growth(value1, value2):
    if value1 == 0:
        return None 
    else:
        growth = ((value2 - value1) / value1) 
        return growth

def group_sales(group_id,match):
    pipeline = [
        {'$match': {**match,"Level_1_Area":"Kuwait"}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    return gm_sales_collection.aggregate(pipeline)

def generate_seasonality_record(base,data):
    result = {**base}
    keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']
    for key in keys: 
        if len(data) != 2:
            result[key] = None
            continue
        growth = calculate_growth(data[0][key]/data[0]['numberOfOutlets'],data[1][key]/data[1]['numberOfOutlets'])
        if growth:
            if (growth < 2 and growth > -1):
                result[key] = growth
    return result


def getDates(start_date:datetime=datetime(2016, 1, 1),end_date:datetime=datetime(2023, 12, 1)):
    date = start_date
    while date < end_date:
        yield date
        date += relativedelta(months=1)

def filter_sales(data:list,date_1:datetime,date_2:datetime):
    return [record for record in data if (record['_id']['year'] == date_1.year and record['_id']['month'] == date_1.month) or (record['_id']['year'] == date_2.year  and record['_id']['month']== date_2.month)]

In [3]:
# remove for loop for years and moths and use getDates 

generate Location Type Seasonality

In [4]:
location_types = gm_sales_collection.distinct("Location_Type",{"Location_Type":{"$ne":0}})
_id = {'Location_type': '$Location_Type'}
result = []
for i in location_types:
    for date in getDates():

        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Location_Type":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"location_type":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
        if not key in record or record[key] == None:
            current_date = datetime(record['year'],record['month'],1)
            last_month = current_date - relativedelta(months=1)
            all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
            if len(all_locations_growth) != 2:
                continue
                raise Exception("all_locations_growth length issue")
            first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
            second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
            growth = calculate_growth(first_month,second_month)
            # Check next month, add all_locations_growth to next month growth
            record[key] = growth
location_type_df = pd.DataFrame(result)

generate Products Seasonality

In [5]:
products_types = gm_sales_collection.distinct("Product_Focus",{"Level_1_Area":"Kuwait","Product_Focus":{"$ne":0}})
_id = {'Product_Focus': '$Product_Focus'}
result = []
for i in products_types:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Product_Focus":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"product_focus":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    # raise Exception("all_locations_growth length issue")
                    continue
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
product_focus_df = pd.DataFrame(result)

generate Area Seasonality

In [6]:
areas = gm_sales_collection.distinct("Level_3_Area",{"Level_1_Area":"Kuwait"})
_id = {'Level_3_Area': '$Level_3_Area'}
result = []
for i in areas:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Level_3_Area":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"area":i,"year":date.year,"month":date.month},data))
# check growth for level 2 area
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                area_level_2 = area_table[record['area']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({'Level_2_Area':"$Level_2_Area"},{"Level_2_Area":area_level_2,"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
area_df = pd.DataFrame(result)

generate Industry Type Seasonality

In [7]:
industry = gm_sales_collection.distinct("Industry_Level_2",{"Level_1_Area":"Kuwait","Industry_Level_2":{"$ne":0}})
_id = {'Industry_Level_2': 'Industry_Level_2'}
result = []

def group_sales_2(group_id,match,industry):
    pipeline =[
        {'$match': {**match,"Level_1_Area":"Kuwait"}},
        {"$lookup":{
  "from": "Brands",
  "localField": "Brand",
  "foreignField": "Brand_Name_English",
  "as": "brand",
  "pipeline": [
    {
      "$match": {
        "Industry_Level_1": industry,
      },
    },
  ],
}},
{"$match":{"brand.0":{"$exists":True}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    return gm_sales_collection.aggregate(pipeline)



for i in industry:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Industry_Level_2":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"industry":i,"year":date.year,"month":date.month},data))

for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                industry =industry_table[record['industry']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales_2({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}},industry)),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
industry_df = pd.DataFrame(result)

In [8]:
for i in keys:
    location_type_df[i].mask(location_type_df[i] >=3,None, inplace=True)
    product_focus_df[i].mask(product_focus_df[i] >=3,None, inplace=True)
    area_df[i].mask(area_df[i] >=3,None, inplace=True)
    industry_df[i].mask(industry_df[i] >=3,None, inplace=True)

In [10]:
# when using these seasonalities add weights for each one of them, using them directory will create seasonality over value
display(location_type_df.describe())
display(product_focus_df.describe())
display(area_df.describe())
display(industry_df.describe())

Unnamed: 0,year,month,Weekday_Store_Sales,Weekend_Store_Sales,Weekday_Delivery_Sales,Weekend_Delivery_Sales
count,4275.0,4275.0,4095.0,4095.0,4051.0,4015.0
mean,2019.463158,6.442105,0.023977,0.022706,0.070477,0.046157
std,2.27512,3.423898,0.314734,0.302067,0.471354,0.403711
min,2016.0,1.0,-0.997829,-0.997263,-0.988846,-0.988804
25%,2017.0,3.0,-0.120403,-0.109265,-0.219762,-0.204199
50%,2019.0,6.0,0.001377,-0.003759,-0.001096,0.020177
75%,2021.0,9.0,0.118894,0.115743,0.283443,0.269379
max,2023.0,12.0,1.894327,1.974439,2.486547,1.940132


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,14345.0,14345.0,13741.0,13592.0,13741.0,13455.0
mean,2019.463158,6.442105,0.020612,0.077502,0.021778,0.048819
std,2.274933,3.423616,0.28416,0.468043,0.282703,0.393132
min,2016.0,1.0,-0.984848,-0.998495,-0.982924,-0.997647
25%,2017.0,3.0,-0.114604,-0.202934,-0.105784,-0.192936
50%,2019.0,6.0,0.001705,-0.001096,-0.004558,0.021704
75%,2021.0,9.0,0.118894,0.318183,0.097627,0.269379
max,2023.0,12.0,1.994253,2.486547,1.988379,1.961039


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,13490.0,13490.0,12824.0,12179.0,12824.0,12233.0
mean,2019.463158,6.442105,0.020857,0.088492,0.026798,0.088944
std,2.274938,3.423624,0.373564,0.577229,0.376727,0.569301
min,2016.0,1.0,-1.0,-1.0,-1.0,-1.0
25%,2017.0,3.0,-0.157644,-0.219255,-0.152078,-0.230952
50%,2019.0,6.0,-0.012215,0.010852,-0.011409,0.023322
75%,2021.0,9.0,0.134001,0.28792,0.135761,0.293117
max,2023.0,12.0,2.408675,2.977512,1.959597,2.941109


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,3515.0,3515.0,3004.0,2081.0,3013.0,1995.0
mean,2019.463158,6.442105,0.011215,0.084444,0.017417,-0.007417
std,2.275177,3.423984,0.459099,0.743269,0.452691,0.578555
min,2016.0,1.0,-1.0,-1.0,-1.0,-1.0
25%,2017.0,3.0,-0.203879,-0.261239,-0.203919,-0.274372
50%,2019.0,6.0,-0.013213,-0.017771,-0.012515,-0.038805
75%,2021.0,9.0,0.171137,0.275053,0.17303,0.218762
max,2023.0,12.0,2.948898,2.956619,2.239008,2.910215


In [9]:
with pd.ExcelWriter('seasonalities.xlsx', engine='xlsxwriter') as writer:
    location_type_df.to_excel(writer, sheet_name='location_type')
    product_focus_df.to_excel(writer, sheet_name='product_focus')
    area_df.to_excel(writer, sheet_name='area')
    industry_df.to_excel(writer, sheet_name='industry')
