In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import os
from typing import Optional
import os
import pickle

# Load Data

In [None]:
df_warehouse_path = r'C:\Users\yotam\SDatta\fashion\strategy_benchmark\source_simulation\31_12_2023\df_all_store_VZ01.parquet'

In [None]:
df_warehouse = pd.read_parquet(df_warehouse_path)
df_warehouse["sku"]=df_warehouse["sku"].astype(str)
# filter sku that stock max is 0
df_warehouse = df_warehouse[df_warehouse["stock"]>0]
df_warehouse = df_warehouse[~df_warehouse["sku"].isin(["100080385000001","100080385000002"])] 
# df_warehouse = df_warehouse[df_warehouse["sku"]=="100528128000008"]

In [None]:
# df_warehouse[df_warehouse["sku"]=="100633202000009"]

In [None]:
folder_of_stock_sales = r'C:\Users\yotam\SDatta\fashion\strategy_benchmark\source_simulation\31_12_2023\datasets_5'
df_stock_sales = pd.DataFrame()
for file in os.listdir(folder_of_stock_sales):
    df_stock_sales = pd.concat([df_stock_sales, pd.read_parquet(os.path.join(folder_of_stock_sales, file))])
df_stock_sales["sku"]=df_stock_sales["sku"].astype(str)
df_stock_sales["store"]=df_stock_sales["store"].astype(str)
df_stock_sales = df_stock_sales.drop(columns=["item","sku_store"])
df_stock_sales = df_stock_sales[df_stock_sales["sku"].isin(df_warehouse["sku"].unique().tolist())]
# df_stock_sales = df_stock_sales[df_stock_sales["sku"]=="100528128000008"]

In [None]:
df_warehouse = df_warehouse.rename(columns={"stock": "warehouse_stock"})
df_stock_sales_temp = df_stock_sales
df_warehouse_temp = df_warehouse

# Assumption:

##### 1. The time delivery ahead is 2 weeks (later we will change dynamically between stores for example 1 day - 14 days)
##### 2. the prior sales distribution of stores taking from 1 year before the experiment (for example 2019 if the experiment is in 2020)
##### 3. All the 126 stores going to be active in the optimization warehouse
##### 4. Not all the time series of the stores are the same length, so there are some that starting earlier and some later (it depends on the store time shipping from the warehouse and also the time of the decision to start shipping to the store)
##### 5. Discount over time:

        5.1 after 20 weeks the discount is 0.8-0.85 % from the original price (the discount is the worst case for store because the store sales the product in the loss)

##### 6. Removing worst stores over time:

        6.1 6 weeks after store get the first delivery, if the store is in the bottom 10% of the stores from total stores, we remove it from the optimization warehouse
        6.2 8 weeks after store get the first delivery, if the store is in the bottom 20% of the stores from total stores, we remove it from the optimization warehouse
        6.3 12 weeks after store get the first delivery, if the store is in the bottom 30% of the stores from total stores, we remove it from the optimization warehouse
        6.4 14 weeks after store get the first delivery, if the store is in the bottom 40% of the stores from total stores, we remove it from the optimization warehouse

##### 7. There is no reordering to the warehouse what we have is what we have
##### 8. If the stock in the warehouse is 0 we can move the stock of sku from other store to the store that need it but it is not optimal
##### 10. The first allocation of the stock in store is given by the palmers (as 50% of the stock sku in the store they believe that the store can sell)



In [None]:
df_stock_sales_temp["date"] = pd.to_datetime(df_stock_sales_temp["date"])
df_warehouse_temp["date"] = pd.to_datetime(df_warehouse_temp["date"])
df_stock_sales_temp = df_stock_sales_temp.sort_values(by=["date", "store"])
df_warehouse_temp = df_warehouse_temp.sort_values(by=["date"])

# Load dict of deliveries from warehouse and arrivals to stores

In [None]:

dict_arrivals_store_deliveries_path = r"C:\Users\yotam\SDatta\fashion\strategy_benchmark\source_simulation\31_12_2023\date_to_store_deliveries_dict.json"
dict_deliveries_from_wharehouse_dict_path = r"C:\Users\yotam\SDatta\fashion\strategy_benchmark\source_simulation\31_12_2023\deliveries_from_wharehouse_dict.json"

with open(dict_arrivals_store_deliveries_path) as json_file:
    dict_arrivals_store_deliveries = json.load(json_file)

with open(dict_deliveries_from_wharehouse_dict_path) as json_file:
    dict_deliveries_from_wharehouse_dict = json.load(json_file)

In [None]:
fix_dict_arrivals_stors ={84:173,
 95:47,
 91:225,
 90:180,
 73:181,
 74:181,
 99:106,
 79:160,
 81:186,
 85:104,
 88:104,
 8:162,
 96:43,
 76:10,
 89:57,82:106,7:173,69:26}

In [None]:
for date,stores in dict_arrivals_store_deliveries.items():
    for store_problem,store_same in fix_dict_arrivals_stors.items():
        if store_same in stores:
            stores.append(store_problem)

In [None]:
for date,stores in dict_deliveries_from_wharehouse_dict.items():
    for store_problem,store_same in fix_dict_arrivals_stors.items():
        if store_same in stores:
            stores.append(store_problem)

In [None]:
unique_stores_in_2020 = set()
for date,stores in dict_arrivals_store_deliveries.items():
    extract_year = pd.to_datetime(date).year
    if extract_year == 2020:
        # show all the unique stores that arrive stock in 2020
        unique_stores_in_2020.update(stores)
                
        
        

# prepare dict of sales and dict of stocks

In [None]:
def initialize_stocks(df_stock_sales, df_warehouse, stores, skus):
    dict_stocks = {store: {sku: 0 for sku in skus} for store in stores + ["VZ01"]}
    store_stock = df_stock_sales.groupby(['store', 'sku']).apply(lambda x: x[x['stock'] > 0]['stock'].iloc[0] if not x[x['stock'] > 0].empty else 0)
    for (store, sku), stock in store_stock.items():
        dict_stocks[store][sku] = stock
    dict_sum_sku_stock = {sku: sum(dict_stocks[store][sku] for store in stores) for sku in skus}
    for sku in skus:
        # warehouse_stock = df_warehouse[(df_warehouse['sku'] == sku) & (df_warehouse["warehouse_stock"].max() )]['warehouse_stock'].iloc[0]
        # warehouse_stock take the max stock of the warehouse
        warehouse_stock = df_warehouse[df_warehouse['sku'] == sku]['warehouse_stock'].max()
        dict_stocks["VZ01"][sku] = max(warehouse_stock - dict_sum_sku_stock.get(sku, 0), 0)  # Prevent negative stock
    return dict_stocks


In [None]:
def create_dict_sales(df_stock_sales):
    filtered_df = df_stock_sales[df_stock_sales['sales'] != 0]
    grouped_df = filtered_df.groupby(['store', 'date', 'sku'])['sales'].sum().reset_index()
    grouped_df['date'] = grouped_df['date'].dt.strftime('%Y-%m-%d')
    dict_sales = {}
    for _, row in grouped_df.iterrows():
        store = row['store']
        date = row['date']
        sku = row['sku']
        amount = row['sales']
        if store not in dict_sales:
            dict_sales[store] = {}
        if date not in dict_sales[store]:
            dict_sales[store][date] = []
        dict_sales[store][date].append((sku, amount))
    return dict_sales

In [None]:
skus_simulation = df_stock_sales_temp["sku"].unique().tolist()
stores_simulation = df_stock_sales_temp["store"].unique().tolist()

In [None]:
dict_stocks = initialize_stocks( df_stock_sales_temp,df_warehouse_temp, stores_simulation, skus_simulation)
sum([dict_stocks[store][sku] for store in dict_stocks for sku in dict_stocks[store] if store != "VZ01"])

In [None]:
for store in dict_stocks:
    for sku in dict_stocks[store]:
        if store == "VZ01":
            print(sku)
            print(dict_stocks[store][sku])

In [None]:
dict_sales = create_dict_sales(df_stock_sales_temp)
# print(dict_stocks)

# get the start and end dates of the palmers each store and sku

In [None]:
def filter_from_first_non_zero(group):
    first_non_zero_index = group[group['stock_palmers'].ne(0)].index.min()
    return group.loc[first_non_zero_index:]

In [None]:
def filter_to_last_non_zero(group):
    group['date'] = pd.to_datetime(group['date'], format='%Y-%m-%d')
    last_non_zero_date = group[group['stock_palmers'].ne(0)]['date'].max()
    day_after_last_non_zero = last_non_zero_date + pd.Timedelta(days=1)
    group = group[group['date'] <= day_after_last_non_zero]
    group["date"] = group["date"].astype(str)
    return group

In [None]:
def filter_from_first_non_zero_warehouse(group):
    first_non_zero_index = group[group['warehouse_stock'].ne(0)].index.min()
    return group.loc[first_non_zero_index:]

In [None]:
def filter_to_last_non_zero_wh(group):
    group['date'] = pd.to_datetime(group['date'], format='%Y-%m-%d')
    last_non_zero_date = group[group['warehouse_stock'].ne(0)]['date'].max()
    day_after_last_non_zero = last_non_zero_date + pd.Timedelta(days=1)
    group = group[group['date'] <= day_after_last_non_zero]
    group["date"] = group["date"].astype(str)
    return group

In [None]:
def creat_dict_start_dates(df_palmers,df_warehouse):
    """
    dic_start_dates[date] = [(sku, store),...]
    """
    dict_start_dates = {}
    df_palmers["date"] = df_palmers["date"].astype(str)
    df_palmers = df_palmers.rename(columns={"stock": "stock_palmers"})
    df_palmers = df_palmers[["stock_palmers","store","sku","date","sales"]]
    df_warehouse_grouped = df_warehouse.groupby(['sku'])
    unique_groups = df_palmers.groupby(['store', 'sku'])
    for (store, sku), group in unique_groups:
        filtered_group_start = filter_from_first_non_zero(group)
        if not filtered_group_start.empty:
            start_date = filtered_group_start['date'].min()
            if start_date not in dict_start_dates:
                dict_start_dates[start_date] = []
            dict_start_dates[start_date].append((sku, store))
    # add the warehouse start dates with the function filter_from_first_non_zero_warehouse
    for sku in df_warehouse_grouped:
        filtered_group_start = filter_from_first_non_zero_warehouse(sku[1])
        if not filtered_group_start.empty:
            start_date = filtered_group_start['date'].min()
            start_date = start_date.strftime('%Y-%m-%d')
            if start_date not in dict_start_dates:
                dict_start_dates[start_date] = []
            dict_start_dates[start_date].append((sku[0], "VZ01"))
        
    return dict_start_dates

In [None]:
dict_start_dates = creat_dict_start_dates(df_stock_sales_temp,df_warehouse_temp)

In [None]:
dict_start_dates

In [None]:
def creat_dict_end_dates(df_palmers):
    """
    dic_end_dates[date] = [sku1, sku2, ...]
    """
    dict_end_dates = {}

    # Convert dates to datetime objects for proper handling
    df_palmers["date"] = pd.to_datetime(df_palmers["date"])

    # Determine the last sale date for each SKU in df_palmers
    for sku, group in df_palmers.groupby(['sku']):
        last_sale_date = group[group['sales'].ne(0)]['date'].max()
        if pd.notna(last_sale_date):
            # The end date is the day after the last sale date
            end_date = (last_sale_date + pd.Timedelta(days=1)).strftime('%Y-%m-%d')
            dict_end_dates.setdefault(end_date, []).append(sku)

    return dict_end_dates


In [None]:
dict_end_dates = creat_dict_end_dates(df_stock_sales_temp)

In [None]:
dict_end_dates

In [None]:
len(dict_end_dates)

In [None]:
for dates in dict_start_dates:
    for sku,store in dict_start_dates[dates]:
        if store == "VZ01":
            print(sku)

# The simulation

In [None]:
def initialize_all_the_dicts(stores_simulation:list,skus_simulation:list, start_dates: dict, dict_stocks: dict) -> (dict,dict,dict,list,list,list,list):
    """
    This function initialize all the dicts : AshlonStock, MissedSales, ActiveStores, current_stocks
    Args: 
    --------
    stores_simulation : list
        list of stores to simulate
    skus_simulation : list
        list of skus to simulate
    start_dates: dict
        start_dates[date] = [(sku, store),...]
        
    -------
    return:  AshlonStock, MissedSales, ActiveStores
    """
    AshlonStock = {}
    MissedSales = {} 
    ActiveStores = {}
    current_stock = {}
    accumulated_stocks = dict_stocks.copy()
    accumulated_AshlonStock = []
    accumulated_ActiveStores = []
    start_dates_copy = start_dates.copy()
    start_dates_copy = {pd.to_datetime(date): start_dates_copy[date] for date in start_dates_copy}
    for sku in skus_simulation:
        ActiveStores[sku] = {} 
    for store in stores_simulation:
        store = str(store)
        AshlonStock[store] = {}
        MissedSales[store] = {}
        ActiveStores[store] = {}
        current_stock[store] = {}
        for sku in skus_simulation:
            MissedSales[store][sku] = 0
            ActiveStores[sku][store] = 1
    return  AshlonStock, MissedSales, ActiveStores, current_stock, accumulated_stocks, accumulated_AshlonStock, accumulated_ActiveStores

def initialize_kpi_structures(stores_simulation: list ,skus_simulation: list) -> (dict,dict,dict,dict,dict,dict):
    """
    This function initialize all the kpi dicts : d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv
    Args: 
    --------
    stores_simulation : list
        list of stores to simulate
    skus_simulation : list
        list of skus to simulate
        
    -------
    return:  d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv
    """
    d_wo_inv = {}
    d_wo_inv_wo_wh = {}
    Ex_i_s_r = {}
    avg_integral_diff = {}
    Ex_total_days_wo_inv = {}
    loose = {}
    for sku in skus_simulation:
        d_wo_inv[sku] = {}
        d_wo_inv_wo_wh[sku] = {}
        Ex_i_s_r[sku] = {}
        avg_integral_diff[sku] = {}
        Ex_total_days_wo_inv[sku] = {}
        loose[sku] = 0
        for store in stores_simulation:
            d_wo_inv[sku][store] = 0
            d_wo_inv_wo_wh[sku][store] = 0
            Ex_i_s_r[sku][store] = {'len': 0, 'sum': 0}
            avg_integral_diff[sku][store] = {'len': 0, 'sum': 0}
            Ex_total_days_wo_inv[sku][store] = {'len': 0, 'sum': 0}
    
    return d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose

In [None]:
def receive_stock(dict_stocks: dict, AshlonStock: dict, current_stores_arrivals_stock: list, date: str) -> (dict,dict):
    """
    This function receive stock and update AshlonStock for the current stores that arrive stock today .
    description:
    1. update dict_stocks by the stock that arrive today
    2. update AshlonStock by the stock that arrive today
    3. delete the date from AshlonStock
     Args: 
    --------
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    AshlonStock: dict
        AshlonStock[store][date] = (sku, amount)
    current_stores_arrivals_stock: list
        list of stores that arrive stock today
    date: str
        date of the simulation
    -------
    return: dict_stocks, AshlonStock
    """
    for store in current_stores_arrivals_stock:
        store = str(store)
        if store not in dict_stocks:
            continue
        if date not in AshlonStock[store] :
            continue
            
        if date in AshlonStock[store]:
            for delivery in AshlonStock[store][date]:
                sku, amount = delivery
                if sku in dict_stocks[store]:
                    dict_stocks[store][sku] += amount
                else:
                    dict_stocks[store][sku] = amount
            del AshlonStock[store][date]
    return dict_stocks, AshlonStock   

In [None]:
def update_stocks_by_sales(dict_stocks: dict,dict_sales: dict,MissedSales: dict,date: str) -> (dict,dict):
    """
    This function update stocks by sales and update MissedSales by the following description:
        1. check if the stock is enough for the sales
        2. if the stock is enough for the sales update the stock
        3. if the stock is not enough for the sales update the MissedSales
    Args:
    --------
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    dict_sales: dict
        dict_sales[store][date] = (sku, amount)
    MissedSales: dict
        MissedSales[store][date] = (sku, amount)
    -------
    return: dict_stocks, MissedSales
    """
    for store in dict_sales:
        store = str(store)
        if store not in dict_stocks:
            continue
        if date not in dict_sales[store] :
            continue
        for sale in dict_sales[store][date]:
            sku, amount = sale
            if sku not in dict_stocks[store]:
                continue
            if dict_stocks[store][sku] >= amount:
                dict_stocks[store][sku] -= amount
            else:
                MissedSales[store][sku] += amount - dict_stocks[store][sku]
                dict_stocks[store][sku] = 0
    
        # del dict_sales[store][date]
    return dict_stocks, MissedSales

In [None]:
def update_active_stores(ActiveStores: dict,dict_stocks: dict) -> dict:
    """
    This function update ActiveStores (note: update by hard data from the past from outside the simulation)
    
    
    assumption for now: all the stores are active
    for later: we will need to decide which stores are active and which are not by timeline interval rule
    Args:
    --------
    ActiveStores: dict
        ActiveStores[store] = 1/0
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    -------
    return: ActiveStores
    """
    return ActiveStores

In [None]:

def extract_last_sale_for_sku(dict_sales: dict,store: str,sku: str,current_date: str) -> Optional[int]:
    """
    This function extract the last date and sku from dict_sales
    Args:
    --------
    dict_sales: dict
        dict_sales[store][date] = (sku, amount)
    store: str
        store id
    -------
    return: last_date, sku
    """
    for date in dict_sales[store].keys():
        if current_date <= date:
            continue
        for sale in dict_sales[store][date]:
            sale_sku, amount = sale
            if sale_sku == sku:
                return amount
    return None

In [None]:
def update_AshlonStock_waerhouse(potential_stock_order_from_warehouse:int,dict_stocks:dict,AshlonStock:dict, accumulated_stocks: dict,sku:str,store:str,date:str):
    """
    This function update the AshlonStock and dict_stocks by the following description:
    1. check if the potential_stock_order_from_warehouse is positive and the stock in the warehouse is enough for the order
    2. update the stock in the warehouse
    3. update the AshlonStock for the future date (2 days from the current date)
    Args:
    --------
    potential_stock_order_from_warehouse: int
        amount of the last sale - the stock of the store
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    AshlonStock: dict
        AshlonStock[store][date] = (sku, amount)
    accumulated_stocks: dict
        accumulated_stocks[store][sku] = amount
    sku: str
        sku id
    store: str
        store id
    date: str
        date of the simulation
    -------
    return: dict_stocks, AshlonStock
    """
    if potential_stock_order_from_warehouse <= dict_stocks["VZ01"][sku] and potential_stock_order_from_warehouse > 0:
        store = str(store)
        dict_stocks["VZ01"][sku] -= potential_stock_order_from_warehouse
        two_days_from_current_date = pd.to_datetime(date) + pd.Timedelta(days=2)
        two_days_from_current_date_str = two_days_from_current_date.strftime('%Y-%m-%d')
        AshlonStock[store][two_days_from_current_date_str] = AshlonStock[store].get(two_days_from_current_date_str, []) + [(sku, potential_stock_order_from_warehouse)]
        accumulated_stocks[store][sku] += potential_stock_order_from_warehouse
    return dict_stocks,AshlonStock,accumulated_stocks

In [None]:
def update_current_stock_with_new_sku(current_stock: dict,start_dates: dict, date: str, dict_stocks: dict, Ex_total_days_wo_inv: dict, Ex_i_s_r: dict, avg_integral_diff: dict) -> tuple[dict, dict, dict, dict]:
    """
    This function update current_stock with new sku
    Args:
    --------
    current_stock: dict
        current_stock[store][sku] = amount
    start_dates: dict
        start_dates[date] = [(sku, store),...]
    date: str
        date of the simulation
    -------
    return: current_stock, Ex_tottal_days_wo_inv, Ex_i_s_r, avg_integral_diff
    """
    for data in start_dates[date]:
        sku, store = data
        current_stock[store][sku] = dict_stocks[store][sku]
        Ex_total_days_wo_inv[sku][store] = {'len': 0, 'sum': 0}
        Ex_i_s_r[sku][store] = {'len': 0, 'sum': 0}
        avg_integral_diff[sku][store] = {'len': 0, 'sum': 0}
    return current_stock, Ex_total_days_wo_inv, Ex_i_s_r, avg_integral_diff

In [None]:
def update_current_stock_with_kill_sku(current_stock: dict, end_dates: dict, date: str, loose: dict, dict_stocks: dict, ashelon_stock: dict) -> tuple[dict, dict, dict]:
    """
    This function update current_stock with kill sku
    Args:
    --------
    current_stock: dict
        current_stock[store][sku] = amount
    end_dates: dict
        end_dates[date] = [(sku),...]
    date: str
        date of the simulation
    loose: dict
        loose[sku] = percent of loose
    -------
    return: current_stock, loose
    
    Note:
        1. loose is the percent of the stock that we have not sold
    """
    date_datetime = pd.to_datetime(date)
    for sku in end_dates[date]:
        tempo_sku_total_stock = 0
        for store in current_stock:
            if sku in current_stock[store]:
                loose[sku] += current_stock[store][sku]
                del current_stock[store][sku]
            if store in dict_stocks and sku in dict_stocks.get(store, {}):
                tempo_sku_total_stock += dict_stocks[store][sku]
            for date_str in pd.date_range(date_datetime, date_datetime + pd.Timedelta(days=2), freq="D"):
                date_str = date_str.strftime('%Y-%m-%d')
                if store in ashelon_stock and date_str in ashelon_stock.get(store, {}) and sku in ashelon_stock[store].get(date_str, {}):
                    sku, amount = ashelon_stock[store][date_str]
                    loose[sku] += amount
                    del ashelon_stock[store][date_str]
        if tempo_sku_total_stock == 0:
            raise ValueError(f"initial stock for sku {sku} is 0")
        loose[sku] = loose[sku] / tempo_sku_total_stock
    return current_stock, loose, ashelon_stock

def update_kpi_wo_inv(d_wo_inv: dict, d_wo_inv_wo_wh: dict, current_stock: dict, Ex_total_days_wo_inv: dict) -> tuple[dict, dict, dict]:
    """
    This function update kpi dicts : d_wo_inv, d_wo_inv_wo_wh, Ex_total_days_wo_inv
    Args:
    --------
    d_wo_inv: dict
        d_wo_inv[sku][store] = amount
    d_wo_inv_wo_wh: dict
        d_wo_inv_wo_wh[sku][store] = amount
    current_stock: dict
        current_stock[store][sku] = amount

    -------
    return: d_wo_inv, d_wo_inv_wo_wh, Ex_total_days_wo_inv
    """
    for store in current_stock:
        for sku in current_stock[store]:
            if current_stock[store][sku] == 0:
                d_wo_inv[sku][store] += 1
                Ex_total_days_wo_inv[sku][store]['len'] += 1
                Ex_total_days_wo_inv[sku][store]['sum'] += d_wo_inv[sku][store]/Ex_total_days_wo_inv[sku][store]['len']
                if store != "VZ01" and current_stock[store][sku] == 0:
                    d_wo_inv_wo_wh[sku][store] += 1
            else:
                print(f"store: {store}, sku: {sku}")
                print("current_stock[store][sku]: ", current_stock[store][sku])
                Ex_total_days_wo_inv[sku][store]['len'] += 1
    return d_wo_inv, d_wo_inv_wo_wh, Ex_total_days_wo_inv

def update_info_for_kpi(accumulated_stocks: dict,current_stock: dict, Ex_i_s_r: dict, avg_integral_diff: dict, margin_ratio: int = 3) -> tuple[dict, dict]:
    """
    This function update kpi dicts : Ex_i_s_r, avg_integral_diff
    Args:
    --------
    accumulated_stocks: dict
        accumulated_stocks[store][sku] = amount
    current_stock: dict
        current_stock[store][sku] = amount

    -------
    return: Ex_i_s_r, avg_integral_diff
    """
    for store in current_stock:
        total_stock, total_sales = 0, 0
        if store == "VZ01":
            continue
        for sku in current_stock[store]:
            total_stock += accumulated_stocks[store][sku]
            total_sales += accumulated_stocks[store][sku] - current_stock[store][sku]
            if total_stock == 0:
                Ex_i_s_r[sku][store]['len'] += 1
                Ex_i_s_r[sku][store]['sum'] += 0
            else:
                Ex_i_s_r[sku][store]['len'] += 1
                Ex_i_s_r[sku][store]['sum'] += total_sales / total_stock
            avg_integral_diff[sku][store]['len'] += 1
            avg_integral_diff[sku][store]['sum'] += current_stock[store][sku] - 1 if current_stock[store][sku] > 0 else margin_ratio
        for sku in current_stock['VZ01']:
            total_stock = np.sum([accumulated_stocks[store][sku] for store in current_stock])
            total_sales = total_stock - current_stock['VZ01'][sku]
            if total_stock == 0:
                raise ValueError(f"total initialized stock in all stores for sku {sku} is 0") 
            else:
                Ex_i_s_r[sku]['VZ01']['len'] += 1
                Ex_i_s_r[sku]['VZ01']['sum'] += total_sales / total_stock
    return Ex_i_s_r, avg_integral_diff

In [None]:
def kill_and_save_results(accumulated_stocks: dict, d_wo_inv: dict, d_wo_inv_wo_wh: dict, Ex_i_s_r: dict, avg_integral_diff: dict, Ex_total_days_wo_inv: dict, loose: dict, date: str, end_dates: dict, MissedSales: dict, base_path: str = r'C:\Users\yotam\SDatta\fashion\strategy_benchmark\source_simulation\07_01_2024', simulation_name: str = 'run1', lamda: float = 0.1) -> tuple[dict, dict, dict, dict, dict, dict, dict, dict]:
    """
    This function kill and save results
    Args:
    --------
    d_wo_inv: dict
        d_wo_inv[sku][store] = amount
    d_wo_inv_wo_wh: dict
        d_wo_inv_wo_wh[sku][store] = amount
    Ex_i_s_r: dict
        Ex_i_s_r[sku][store] = amount
    avg_integral_diff: dict
        avg_integral_diff[sku][store] = amount
    Ex_total_days_wo_inv: dict
        Ex_total_days_wo_inv[sku][store] = amount
    loose: dict
        loose[sku] = percent of loose
    date: str
        date of the simulation
    strategy_names: str
        name of the strategy to apply
    end_dates: dict
        end_dates[date] = [(sku),...]
    MissedSales: dict
        MissedSales[store][date] = (sku, amount)
    -------
    return: d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose, MissedSales
    """
    if date not in end_dates:
        return accumulated_stocks, d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose, MissedSales
    final_kpi_res = {}
    simulation_dir = os.path.join(base_path, simulation_name)
    if not os.path.exists(simulation_dir):
        os.makedirs(simulation_dir)
    for sku in end_dates[date]:
        for store in accumulated_stocks:
            if sku in accumulated_stocks[store]:
                lose =d_wo_inv[sku]["VZ01"]*(np.exp(lamda*loose[sku])-1), loose[sku] if store == "VZ01" else None
                avg_integral_diff_sum_divde_avg_integral_diff=  avg_integral_diff[sku][store]["sum"]/avg_integral_diff[sku][store]["len"] if avg_integral_diff[sku][store]["len"] != 0 else None
                Ex_total_days_wo_inv_sum_divde_Ex_total_days_wo_inv = Ex_total_days_wo_inv[sku][store]["sum"]/Ex_total_days_wo_inv[sku][store]["len"] if Ex_total_days_wo_inv[sku][store]["len"] != 0 else None
                Ex_i_s_r_sum_divde_Ex_i_s_r = Ex_i_s_r[sku][store]["sum"]/Ex_i_s_r[sku][store]["len"] if Ex_i_s_r[sku][store]["len"] != 0 else None
                final_kpi_res[f'{sku}_{store}'] = {f'days without stock in {sku, store}: {d_wo_inv[sku][store]}',
                                 f'days without stock in {sku, store} without warehouse: {d_wo_inv_wo_wh[sku][store]}',
                                    f'expected value inventory sales ratio in {sku, store}: {Ex_i_s_r_sum_divde_Ex_i_s_r}',
                                    f'average integral difference in {sku, store}: {avg_integral_diff_sum_divde_avg_integral_diff}',
                                    f'expected value total days without inventory in {sku, store}: {Ex_total_days_wo_inv_sum_divde_Ex_total_days_wo_inv}',
                                    f'loose, lose ratio in {sku} {lose}',
                                    f'missed sales in {sku, store}: {MissedSales[store][sku]}',
                                    f'accumulated stock in {sku, store}: {accumulated_stocks[store][sku]}'}
                
                file_path = os.path.join(simulation_dir, f'{sku}_{store}.pkl')
                with open(file_path, 'wb') as f:
                    pickle.dump(final_kpi_res[f'{sku}_{store}'], f)
                del accumulated_stocks[store][sku], d_wo_inv[sku][store], d_wo_inv_wo_wh[sku][store], Ex_i_s_r[sku][store], avg_integral_diff[sku][store], Ex_total_days_wo_inv[sku][store], MissedSales[store][sku]   
        del loose[sku]
    return accumulated_stocks, d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose, MissedSales

In [None]:
def apply_strategy_naive_bayes(dict_stocks: dict, AshlonStock: dict, ActiveStores: dict, current_stores_replenished: list, dict_sales: dict, accumulated_stocks: dict, date: str,store_traveling_time_dict: dict = None) -> (dict, dict):
    """
    This function apply strategy and update dict_stocks, AshlonStock.
    The strategy is naive bayes:
    1. for every store that can be replenished from the warehouse
    2. for every sku in the store
    3. check if the store sold the last sale of the sku
    4. if the store sold the last sale of the sku
    5. add 1 to the last sale of the sku
    6. caculate the potential_stock_order_from_warehouse = amount of the last sale - the stock of the store
    7. if the potential_stock_order_from_warehouse is positive and the stock in the warehouse is enough for the order
    8. update the stock in the warehouse
    9. update the AshlonStock

    Args:
    --------
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    AshlonStock: dict
        AshlonStock[store][date] = (sku, amount)
    ActiveStores: dict
        ActiveStores[store] = 1/0
    current_stores_replenished: list
        list of stores that can be replenished from the warehouse
    dict_sales: dict
        dict_sales[store][date] = (sku, amount)
    accumulated_stocks: dict
        accumulated_stocks[store][sku] = amount
    date: str
        date of the simulation
    store_traveling_time_dict: dict
        store_traveling_time_dict[store] = traveling_time
    -------
    return: dict_stocks, AshlonStock
    """
    for store in current_stores_replenished:
        store = str(store)
        if store not in dict_stocks:
            continue
        for sku in dict_stocks[store].keys():
            if ActiveStores[sku][store] == 0:
                continue
            amount_last_sale = extract_last_sale_for_sku(dict_sales, store,sku,date) + 1 if extract_last_sale_for_sku(dict_sales, store,sku,date) is not None else None
            if amount_last_sale is None:
                continue
            potential_stock_order_from_warehouse = amount_last_sale - dict_stocks[store][sku]
            dict_stocks,AshlonStock,accumulated_stocks = update_AshlonStock_waerhouse(potential_stock_order_from_warehouse,dict_stocks,AshlonStock,accumulated_stocks,sku,store,date)
    return dict_stocks, AshlonStock, accumulated_stocks        

In [None]:
def main_simulation(dict_deliveries_from_warehouse: dict,dict_arrivals_store_deliveries : dict,stores_simulation : list,skus_simulation : list,dict_sales : dict, dict_stocks: dict, start_dates: dict, end_dates: dict, strategy_names:str="naive_bayes") -> None:
    """
    This function is the main simulation function by the next steps:
    0. initialize all the dicts : AshlonStock, MissedSales, ActiveStores, current_stocks
    1. start queue
    2. Check which stores accept inventory today: 2.1 check for new sku in the store or sku to kill
                                                  2.2 check for incoming inventory
    3. receive stock and update AshlonStock
    4. update stocks by sales and update MissedSales
    5. update ActiveStores (note: update by hard data from the past from outside the simulation)
    6. current stores that can be replenished from dict_deliveries_from_warehouse
    7. apply strategy and update current_stocks, AshlonStock
    8. save results
    9. end queue
    
    Args: 
    --------
    dict_deliveries_from_warehouse: dict
        dict_deliveries_from_warehouse[date] = [store1,store2,...]
    dict_arrivals_store_deliveries : dict
        dict_arrivals_store_deliveries[date] = [store1,store2,...]
    stores_simulation : list
        list of stores to simulate
    skus_simulation : list
        list of skus to simulate
    dict_sales : dict
        dict_sales[store][date] = (sku, amount)
    dict_stocks: dict
        dict_stocks[store][sku] = amount
    start_date : str
        start date of the simulation
    end_date : str
        end date of the simulation
    start_dates: dict
        start_dates[date] = [(sku, store),...]
    end_dates: dict
        end_dates[date] = [(sku),...]
    strategy_names: str
        name of the strategy to apply (Default: "naive_bayes")
        
    -------
    return: None
    
    Note:
        1. All constants are in the strategy
        2. The simulation is by days (every queue is a day)
        3. AshlonStock[store][date] = (sku, amount)
        4. MissedSales[store][date] = (sku, amount)
        5. ActiveStores[store] = 1/0
    """
    AshlonStock, MissedSales, ActiveStores, current_stock, accumulated_stocks, accumulated_AshlonStock, accumulated_ActiveStores = initialize_all_the_dicts(stores_simulation,skus_simulation, start_dates, dict_stocks)
    d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose = initialize_kpi_structures(stores_simulation,skus_simulation) 
    start_dates_copy = start_dates.copy()
    start_dates_copy = {pd.to_datetime(date): start_dates_copy[date] for date in start_dates_copy}
    min_start_date_copy = min(start_dates_copy.keys()).strftime('%Y-%m-%d')
    dict_end_dates_copy = end_dates.copy()
    dict_end_dates_copy = {pd.to_datetime(date): dict_end_dates_copy[date] for date in dict_end_dates_copy}
    end_date = max(dict_end_dates_copy.keys()).strftime('%Y-%m-%d')
    for date in pd.date_range(min_start_date_copy,end_date,freq="D"):
        date_str = date.strftime('%Y-%m-%d')
        print(f"Processing date {date_str}")
        if date_str in start_dates.keys():
            current_stock, Ex_total_days_wo_inv, Ex_i_s_r, avg_integral_diff = update_current_stock_with_new_sku(current_stock, start_dates, date_str, dict_stocks, Ex_total_days_wo_inv,  Ex_i_s_r, avg_integral_diff )
        if date_str in end_dates.keys():
            current_stock, loose,AshlonStock = update_current_stock_with_kill_sku(current_stock,end_dates,date_str, loose, dict_stocks, AshlonStock) 
        if date_str in dict_arrivals_store_deliveries:
            current_stores_arrivals_stock = dict_arrivals_store_deliveries[date_str]
            current_stock, AshlonStock = receive_stock(current_stock,AshlonStock,current_stores_arrivals_stock,date_str) 
        d_wo_inv, d_wo_inv_wo_wh, Ex_total_days_wo_inv = update_kpi_wo_inv(d_wo_inv, d_wo_inv_wo_wh, current_stock, Ex_total_days_wo_inv)
        current_stock, MissedSales = update_stocks_by_sales(current_stock,dict_sales,MissedSales,date_str)
        ActiveStores = update_active_stores(ActiveStores,current_stock)
        if date_str in dict_deliveries_from_warehouse:
            current_stores_replenished = dict_deliveries_from_warehouse[date_str]
            current_stock, AshlonStock, accumulated_stocks = apply_strategy_naive_bayes(current_stock,AshlonStock,ActiveStores,current_stores_replenished,dict_sales, accumulated_stocks,date_str)
        Ex_i_s_r, avg_integral_diff = update_info_for_kpi(accumulated_stocks,current_stock,Ex_i_s_r,avg_integral_diff) 
        accumulated_stocks, d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose, MissedSales  = kill_and_save_results(accumulated_stocks, d_wo_inv, d_wo_inv_wo_wh, Ex_i_s_r, avg_integral_diff, Ex_total_days_wo_inv, loose, date_str, end_dates, MissedSales)

In [None]:
main_simulation(dict_deliveries_from_wharehouse_dict,dict_arrivals_store_deliveries,list(stores_simulation)+["VZ01"],skus_simulation,dict_sales,dict_stocks,dict_start_dates,dict_end_dates)

In [None]:
dict_stocks["100"]

In [None]:
dict_stocks["4133"]

In [None]:
# def save_results( accumulated_AshlonStock: list,error_metrics:pd.DataFrame , base_output_dir: str=r"C:\Users\yotam\SDatta\fashion\simulation_res\simulation_results_benchmark") -> None:
#     """
#     This function save results to csv
#     Args:
#     --------
#     dict_stocks: dict
#         dict_stocks[store][sku] = amount
#     AshlonStock: dict
#         AshlonStock[store][date] = (sku, amount)
#     MissedSales: dict
#         MissedSales[store][date] = (sku, amount)
#     ActiveStores: dict
#         ActiveStores[store] = 1/0
#     -------
#     return: None
#     """
#     experiment_number = 1
#     output_dir = os.path.join(base_output_dir, f"experiment_{experiment_number}")
#     while os.path.exists(output_dir):
#         experiment_number += 1
#         output_dir = os.path.join(base_output_dir, f"experiment_{experiment_number}")
# 
#     os.makedirs(output_dir, exist_ok=True)
#     df_AshlonStock = pd.DataFrame(accumulated_AshlonStock)
#     
#     df_AshlonStock.to_csv(os.path.join(output_dir, "AshlonStock.csv"), index=False)
#     error_metrics.to_csv(os.path.join(output_dir, "error_metrics.csv"), index=False)
#     print(f"Saved final simulation results in {output_dir}")

In [None]:
# def apply_strategy(dict_stocks: dict, AshlonStock: dict, ActiveStores: dict, current_stores_replenished: list, dict_sales: dict, date: str) -> (dict, dict):
#     """
#     Apply strategy and update dict_stocks, AshlonStock
#     """
#     pass

In [None]:
# def update_accumulated(accumulated_stocks, accumulated_AshlonStock, accumulated_MissedSales, dict_stocks, AshlonStock, MissedSales, date):
#     pd_date = pd.to_datetime(date)
#     accumulated_stocks.extend([
#         {'store': store, 'sku': sku, 'date': date, 'stock': int(amount)}
#         for store in dict_stocks 
#         for sku, amount in dict_stocks[store].items()
#     ])
# 
#     
#     accumulated_AshlonStock.extend([{**{'store': store, 'date': date}, **{'stock': amount}} for store in AshlonStock for  amount in AshlonStock[store].items()])
#     
#     accumulated_MissedSales.extend([
#         {'store': store, 'sku': sku, 'date': date, 'amount_miss_sales': int(amount)}
#         for store in MissedSales 
#         for sku, amount in MissedSales[store].items()
#     ])
#     return accumulated_stocks, accumulated_AshlonStock, accumulated_MissedSales


In [None]:
# def filter_from_first_non_zero(group):
#     first_non_zero_index = group[group['stock_palmers'].ne(0)].index.min()
#     return group.loc[first_non_zero_index:]

In [None]:
# def filter_to_last_non_zero(group):
#     group['date'] = pd.to_datetime(group['date'], format='%Y-%m-%d')
#     last_non_zero_date = group[group['stock_palmers'].ne(0)]['date'].max()
#     day_after_last_non_zero = last_non_zero_date + pd.Timedelta(days=1)
#     group = group[group['date'] <= day_after_last_non_zero]
#     group["date"] = group["date"].astype(str)
#     return group

In [None]:
# def process_groups_without_apply(df_palmers):
#     unique_groups = df_palmers.groupby(['store', 'sku'])
#     result_frames = []
#     for name, group in unique_groups:
#         filtered_group = filter_to_last_non_zero(group)
#         result_frames.append(filtered_group)
#     return pd.concat(result_frames)

In [None]:
# def filter_dates_inner_with_palmers(accumulated_stocks:list,df_palmers:pd.DataFrame,strategy_names:str,accumulated_MissedSales:list) -> pd.DataFrame:
#     df_stocks = pd.DataFrame(accumulated_stocks)
#     df_stocks = df_stocks.rename(columns={"stock": f"stock_{strategy_names}"})
#     df_miss_sales = pd.DataFrame(accumulated_MissedSales)
#     df_palmers["date"] = df_palmers["date"].astype(str)
#     df_palmers = df_palmers.rename(columns={"stock": "stock_palmers"})
#     df_palmers = df_palmers[["stock_palmers","store","sku","date","sales"]]
#     df_palmers_start_from_no_zero = df_palmers.groupby(['store', 'sku'], group_keys=False).apply(filter_from_first_non_zero)
#     df_palmers_end_from_no_zero = process_groups_without_apply(df_palmers_start_from_no_zero)
#     df_stocks_inner = df_stocks.merge(df_palmers_end_from_no_zero,how="right",on=["store","sku","date"])
#     df_compare = df_miss_sales.merge(df_stocks_inner,how="right",on=["store","sku","date"])
#     return df_compare

In [None]:
# def compute_error_metrics(df_all_clean_after_merge:str,strategy_names:str) -> pd.DataFrame:
#     
#     stock_cum_sum = df_all_clean_after_merge.set_index(["date"]).groupby(["sku", "store"]).apply(
#         lambda x: np.round((x[f"stock_palmers"].cumsum()-x[f"stock_{strategy_names}"].cumsum())/x[f"stock_palmers"].cumsum(),2)
#     ).to_frame("stock_saving_cumsum_vs_palmers").reset_index().fillna(0)
#     stock_cum_sum["stock_saving_cumsum_vs_palmers"] = np.where(stock_cum_sum["stock_saving_cumsum_vs_palmers"]== -np.inf, 0, stock_cum_sum["stock_saving_cumsum_vs_palmers"])
#     df_all_clean_after_merge = df_all_clean_after_merge.merge(stock_cum_sum, on=["sku", "store", "date"], how="left")
#     return df_all_clean_after_merge

In [None]:
# def find_last_sale_dates_global(dict_sales:dict) -> dict:
#     last_sale_dates = {}
#     for store, sales_data in dict_sales.items():
#         for date, sales in sales_data.items():
#             for sku, _ in sales:
#                 if sku not in last_sale_dates or date > last_sale_dates[sku] and _ > 0:
#                     last_sale_dates[sku] = date
#     return last_sale_dates


In [None]:
# def find_first_sale_dates_global(dict_sales:dict) -> dict:
#     first_sale_dates = {}
#     for store, sales_data in dict_sales.items():
#         for date, sales in sales_data.items():
#             for sku, _ in sales:
#                 if sku not in first_sale_dates or date < first_sale_dates[sku] and _ > 0:
#                     first_sale_dates[sku] = date
#     return first_sale_dates

# develop AVG models strategy
* delay parameter up to 7 days
* window size up to a month
* strategy is to compute the window size AVG up to delay ceil +1
* opt means to try all range per id and save the best params