In [1]:
from db.helpers import gm_sales_collection
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from helpers.tables import industry_table,area_table
import numpy as np

In [2]:
keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']

def calculate_growth(value1, value2):
    if value1 == 0:
        return None 
    else:
        growth = ((value2 - value1) / value1) 
        return growth

def group_sales(group_id,match):
    pipeline = [
        {'$match': {**match,"Level_1_Area":"Kuwait"}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    return gm_sales_collection.aggregate(pipeline)

def generate_seasonality_record(base,data):
    result = {**base}
    keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']
    for key in keys: 
        if len(data) != 2:
            result[key] = None
            continue
        growth = calculate_growth(data[0][key]/data[0]['numberOfOutlets'],data[1][key]/data[1]['numberOfOutlets'])
        if growth:
            if (growth < 2 and growth > -1):
                result[key] = growth
    return result


def getDates(start_date:datetime=datetime(2016, 1, 1),end_date:datetime=datetime(2023, 12, 1)):
    date = start_date
    while date < end_date:
        yield date
        date += relativedelta(months=1)

def filter_sales(data:list,date_1:datetime,date_2:datetime):
    return [record for record in data if (record['_id']['year'] == date_1.year and record['_id']['month'] == date_1.month) or (record['_id']['year'] == date_2.year  and record['_id']['month']== date_2.month)]

In [3]:
# remove for loop for years and moths and use getDates 

generate Location Type Seasonality

In [4]:
location_types = gm_sales_collection.distinct("Location_Type",{"Location_Type":{"$ne":0}})
_id = {'Location_type': '$Location_Type'}
result = []
for i in location_types:
    for date in getDates():

        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Location_Type":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"location_type":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
        if not key in record or record[key] == None:
            current_date = datetime(record['year'],record['month'],1)
            last_month = current_date - relativedelta(months=1)
            all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
            if len(all_locations_growth) != 2:
                continue
                raise Exception("all_locations_growth length issue")
            first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
            second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
            growth = calculate_growth(first_month,second_month)
            # Check next month, add all_locations_growth to next month growth
            record[key] = growth
location_type_df = pd.DataFrame(result)

generate Products Seasonality

In [5]:
products_types = gm_sales_collection.distinct("Product_Focus",{"Level_1_Area":"Kuwait","Product_Focus":{"$ne":0}})
_id = {'Product_Focus': '$Product_Focus'}
result = []
for i in products_types:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Product_Focus":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"product_focus":i,"year":date.year,"month":date.month},data))
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    # raise Exception("all_locations_growth length issue")
                    continue
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
product_focus_df = pd.DataFrame(result)

generate Area Seasonality

In [6]:
areas = gm_sales_collection.distinct("Level_3_Area",{"Level_1_Area":"Kuwait"})
_id = {'Level_3_Area': '$Level_3_Area'}
result = []
for i in areas:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Level_3_Area":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"area":i,"year":date.year,"month":date.month},data))
# check growth for level 2 area
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                area_level_2 = area_table[record['area']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({'Level_2_Area':"$Level_2_Area"},{"Level_2_Area":area_level_2,"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
area_df = pd.DataFrame(result)

generate Industry Type Seasonality

In [7]:
industry = gm_sales_collection.distinct("Industry_Level_2",{"Level_1_Area":"Kuwait","Industry_Level_2":{"$ne":0}})
_id = {'Industry_Level_2': 'Industry_Level_2'}
result = []

def group_sales_2(group_id,match,industry):
    pipeline =[
        {'$match': {**match,"Level_1_Area":"Kuwait"}},
        {"$lookup":{
  "from": "Brands",
  "localField": "Brand",
  "foreignField": "Brand_Name_English",
  "as": "brand",
  "pipeline": [
    {
      "$match": {
        "Industry_Level_1": industry,
      },
    },
  ],
}},
{"$match":{"brand.0":{"$exists":True}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    return gm_sales_collection.aggregate(pipeline)



for i in industry:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Industry_Level_2":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"industry":i,"year":date.year,"month":date.month},data))

for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                industry =industry_table[record['industry']]
                current_date = datetime(record['year'],record['month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales_2({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}},industry)),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
industry_df = pd.DataFrame(result)

In [8]:
for i in keys:
    location_type_df[i].mask(location_type_df[i] >=3,None, inplace=True)
    product_focus_df[i].mask(product_focus_df[i] >=3,None, inplace=True)
    area_df[i].mask(area_df[i] >=3,None, inplace=True)
    industry_df[i].mask(industry_df[i] >=3,None, inplace=True)

In [9]:
# when using these seasonalities add weights for each one of them, using them directory will create seasonality over value
location_type_df.interpolate(inplace=True)
product_focus_df.interpolate(inplace=True)
area_df.interpolate(inplace=True)
industry_df.interpolate(inplace=True)
display(location_type_df.describe())
display(product_focus_df.describe())
display(area_df.describe())
display(industry_df.describe())

  location_type_df.interpolate(inplace=True)
  product_focus_df.interpolate(inplace=True)
  area_df.interpolate(inplace=True)
  industry_df.interpolate(inplace=True)


Unnamed: 0,year,month,Weekday_Store_Sales,Weekend_Store_Sales,Weekday_Delivery_Sales,Weekend_Delivery_Sales
count,4275.0,4275.0,4275.0,4275.0,4275.0,4275.0
mean,2019.463158,6.442105,0.015926,0.015377,0.06605,0.038238
std,2.27512,3.423898,0.310981,0.298855,0.465213,0.399089
min,2016.0,1.0,-0.997829,-0.997263,-0.988846,-0.988804
25%,2017.0,3.0,-0.144029,-0.136765,-0.219762,-0.204199
50%,2019.0,6.0,-0.001117,-0.007667,0.030572,0.021704
75%,2021.0,9.0,0.11726,0.097627,0.271573,0.245904
max,2023.0,12.0,1.894327,1.974439,2.486547,1.940132


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,14345.0,14345.0,14345.0,14345.0,14345.0,14345.0
mean,2019.463158,6.442105,0.011878,0.07305,0.013663,0.040464
std,2.274933,3.423616,0.281504,0.461868,0.279688,0.388961
min,2016.0,1.0,-0.984848,-0.998495,-0.982924,-0.997647
25%,2017.0,3.0,-0.123329,-0.202934,-0.118578,-0.204199
50%,2019.0,6.0,-0.001117,0.030572,-0.0132,0.03682
75%,2021.0,9.0,0.108911,0.271573,0.093751,0.245904
max,2023.0,12.0,1.994253,2.486547,1.988379,1.961039


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,13490.0,13490.0,13490.0,13489.0,13490.0,13489.0
mean,2019.463158,6.442105,0.004561,0.075535,0.010982,0.082101
std,2.274938,3.423624,0.374982,0.579841,0.377727,0.573692
min,2016.0,1.0,-1.0,-1.0,-1.0,-1.0
25%,2017.0,3.0,-0.175009,-0.25,-0.162487,-0.245221
50%,2019.0,6.0,-0.01897,0.008267,-0.017592,0.022936
75%,2021.0,9.0,0.122203,0.285812,0.12123,0.298906
max,2023.0,12.0,2.408675,2.977512,1.959597,2.941109


Unnamed: 0,year,month,Weekday_Store_Sales,Weekday_Delivery_Sales,Weekend_Store_Sales,Weekend_Delivery_Sales
count,3515.0,3515.0,3515.0,3515.0,3515.0,3515.0
mean,2019.463158,6.442105,0.006958,-0.075711,0.001212,-0.119759
std,2.275177,3.423984,0.469825,0.6647,0.443196,0.551785
min,2016.0,1.0,-1.0,-1.0,-1.0,-1.0
25%,2017.0,3.0,-0.253598,-0.513125,-0.237185,-0.506829
50%,2019.0,6.0,-0.024072,-0.08299,-0.025819,-0.115439
75%,2021.0,9.0,0.171837,0.1315,0.171058,0.130686
max,2023.0,12.0,2.948898,2.956619,2.239008,2.910215


In [10]:

with pd.ExcelWriter('seasonalities.xlsx', engine='xlsxwriter') as writer:
    location_type_df.to_excel(writer, sheet_name='location_type')
    product_focus_df.to_excel(writer, sheet_name='product_focus')
    area_df.to_excel(writer, sheet_name='area')
    industry_df.to_excel(writer, sheet_name='industry')
