In [8]:
from db.helpers import new_sales_collection 
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from helpers.tables import industry_table,area_table
import numpy as np

In [9]:
keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']

def calculate_growth(value1, value2):
    if value1 == 0:
        return None 
    else:
        growth = ((value2 - value1) / value1) 
        return growth

def group_sales(group_id,match):
    pipeline = [
        {'$match': {**match,"Level_1_Area":"Kuwait",    
                "$or": [
                        {
                            "Weekday_Store_Sales": {"$ne": None},
                        },
                        {
                            "Weekend_Store_Sales": {"$ne": None},
                        },
                        {
                            "Weekday_Delivery_Sales": {"$ne": None},
                        },
                        {
                            "Weekend_Delivery_Sales": {"$ne": None},
                        },
                    ],"Source":{'$ne':'Algorithm'}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month"}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":-1,
             "_id.month":-1
        }
    },
]
    return new_sales_collection.aggregate(pipeline)

def generate_seasonality_record(base,data):
    result = {**base}
    keys = ['Weekday_Store_Sales','Weekday_Delivery_Sales','Weekend_Store_Sales','Weekend_Delivery_Sales']
    for key in keys: 
        if len(data) != 2:
            result[key] = None
            continue
        growth = calculate_growth(data[0][key]/data[0]['numberOfOutlets'],data[1][key]/data[1]['numberOfOutlets'])
        if growth:
            if (growth < 2 and growth > -1):
                result[key] = growth
    return result


def getDates(start_date:datetime=datetime(2016, 1, 1),end_date:datetime=datetime(2023, 12, 1)):
    date =end_date 
    while date >= start_date:
        yield date
        date += relativedelta(months=-1)

def filter_sales(data:list,date_1:datetime,date_2:datetime):
    return [record for record in data if (record['_id']['year'] == date_1.year and record['_id']['month'] == date_1.month) or (record['_id']['year'] == date_2.year  and record['_id']['month']== date_2.month)]

In [10]:
# remove for loop for years and moths and use getDates 

generate Location Type Seasonality

In [11]:
location_types = new_sales_collection.distinct("Location_Type",{"Location_Type":{"$ne":0}})
_id = {'Location_Type': '$Location_Type'}
result = []
for i in location_types:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Location_Type":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"Location_Type":i,"Sales_Year":date.year,"Sales_Month":date.month},data))
for record in result:
    for key in keys:
        if not key in record or record[key] == None:
            current_date = datetime(record['Sales_Year'],record['Sales_Month'],1)
            last_month = current_date - relativedelta(months=1)
            all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
            if len(all_locations_growth) != 2:
                continue
                raise Exception("all_locations_growth length issue")
            first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
            second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
            growth = calculate_growth(first_month,second_month)
            # Check next month, add all_locations_growth to next month growth
            record[key] = growth
location_type_df = pd.DataFrame(result)

2023-12-01 00:00:00
2023-11-01 00:00:00
2023-10-01 00:00:00
2023-09-01 00:00:00
2023-08-01 00:00:00
2023-07-01 00:00:00
2023-06-01 00:00:00
2023-05-01 00:00:00
2023-04-01 00:00:00
2023-03-01 00:00:00
2023-02-01 00:00:00
2023-01-01 00:00:00
2022-12-01 00:00:00
2022-11-01 00:00:00
2022-10-01 00:00:00
2022-09-01 00:00:00
2022-08-01 00:00:00
2022-07-01 00:00:00
2022-06-01 00:00:00
2022-05-01 00:00:00
2022-04-01 00:00:00
2022-03-01 00:00:00
2022-02-01 00:00:00
2022-01-01 00:00:00
2021-12-01 00:00:00
2021-11-01 00:00:00
2021-10-01 00:00:00
2021-09-01 00:00:00
2021-08-01 00:00:00
2021-07-01 00:00:00
2021-06-01 00:00:00
2021-05-01 00:00:00
2021-04-01 00:00:00
2021-03-01 00:00:00
2021-02-01 00:00:00
2021-01-01 00:00:00
2020-12-01 00:00:00
2020-11-01 00:00:00
2020-10-01 00:00:00
2020-09-01 00:00:00
2020-08-01 00:00:00
2020-07-01 00:00:00
2020-06-01 00:00:00
2020-05-01 00:00:00
2020-04-01 00:00:00
2020-03-01 00:00:00
2020-02-01 00:00:00
2020-01-01 00:00:00
2019-12-01 00:00:00
2019-11-01 00:00:00


generate Products Seasonality

In [None]:
products_types = new_sales_collection.distinct("Product_Focus",{"Level_1_Area":"Kuwait","Product_Focus":{"$ne":0}})
_id = {'Product_Focus': '$Product_Focus'}
result = []
for i in products_types:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Product_Focus":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"Product_Focus":i,"Sales_Year":date.year,"Sales_Month":date.month},data))
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                current_date = datetime(record['Sales_Year'],record['Sales_Month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    # raise Exception("all_locations_growth length issue")
                    continue
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
product_focus_df = pd.DataFrame(result)

generate Area Seasonality

In [None]:
areas = new_sales_collection.distinct("Level_3_Area",{"Level_1_Area":"Kuwait"})
_id = {'Level_3_Area': '$Level_3_Area'}
result = []
for i in areas:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Level_3_Area":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"Level_3_Area":i,"Sales_Year":date.year,"Sales_Month":date.month},data))
# check growth for level 2 area
for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                area_level_2 = area_table[record['Level_3_Area']]
                current_date = datetime(record['Sales_Year'],record['Sales_Month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales({'Level_2_Area':"$Level_2_Area"},{"Level_2_Area":area_level_2,"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}})),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
area_df = pd.DataFrame(result)

generate Industry Type Seasonality

In [None]:
industry = new_sales_collection.distinct("Industry_Level_2",{"Level_1_Area":"Kuwait","Industry_Level_2":{"$ne":0}})
_id = {'Industry_Level_2': 'Industry_Level_2'}
result = []

def group_sales_2(group_id,match,industry):
    pipeline =[
        {'$match': {**match,"Level_1_Area":"Kuwait","Monthly_Sales":{"$nin":[None,0]}}},
        {"$lookup":{
  "from": "Brands",
  "localField": "Brand",
  "foreignField": "Brand_Name_English",
  "as": "brand",
  "pipeline": [
    {
      "$match": {
        "Industry_Level_1": industry,
      },
    },
  ],
}},
{"$match":{"brand.0":{"$exists":True}}},
    {
        '$group': {
            '_id': {**group_id,"year":"$Sales_Year","month":"$Sales_Month",}, 
            'Weekday_Store_Sales': {
                '$sum': '$Weekday_Store_Sales'
            }, 
            'Weekday_Delivery_Sales': {
                '$sum': '$Weekday_Delivery_Sales'
            }, 
            'Weekend_Store_Sales': {
                '$sum': '$Weekend_Store_Sales'
            }, 
            'Weekend_Delivery_Sales': {
                '$sum': '$Weekend_Delivery_Sales'
            },
                "numberOfOutlets":{"$sum":1}
        }
    },
    {   "$sort":{
            "_id.year":1,
             "_id.month":1
        }
    },
]
    try:
        return new_sales_collection.aggregate(pipeline)
    except:
        print(pipeline)
        raise Exception("group_sales_2 error")




for i in industry:
    for date in getDates():
        last_month = (date - relativedelta(months=1))
        data = filter_sales(list(group_sales(_id,{"Industry_Level_2":i,"Sales_Month":{"$in":[date.month,last_month.month]},"Sales_Year":{"$in":[date.year,last_month.year]}})),date,last_month)
        result.append(generate_seasonality_record({"Industry_Level_2":i,"Sales_Year":date.year,"Sales_Month":date.month},data))

for record in result:
    for key in keys:
            if key not in record or record[key] == None:
                industry =industry_table[record['Industry_Level_2']]
                current_date = datetime(record['Sales_Year'],record['Sales_Month'],1)
                last_month = current_date - relativedelta(months=1)
                all_locations_growth = filter_sales(list(group_sales_2({},{"Sales_Month":{"$in":[current_date.month,last_month.month]},"Sales_Year":{"$in":[current_date.year,last_month.year]}},industry)),current_date,last_month)
                if len(all_locations_growth) != 2:
                    continue
                    # raise Exception("all_locations_growth length issue")
                first_month = all_locations_growth[0][key]/all_locations_growth[0]['numberOfOutlets']
                second_month = all_locations_growth[1][key]/all_locations_growth[1]['numberOfOutlets']
                growth = calculate_growth(first_month,second_month)
                # Check next month, add all_locations_growth to next month growth
                record[key] = growth
industry_df = pd.DataFrame(result)


In [None]:
with pd.ExcelWriter('seasonalities_reverse.xlsx', engine='xlsxwriter') as writer:
    location_type_df.to_excel(writer, sheet_name='location_type')
    product_focus_df.to_excel(writer, sheet_name='product_focus')
    area_df.to_excel(writer, sheet_name='area')
    industry_df.to_excel(writer, sheet_name='industry')