## Order size distributions
This script computes order size distributions from historical demand data collected form 
demand transactions in MMI.

In [14]:
import pandas as pd
import numpy as np


In [15]:
investigated_items_path = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/investigated_items.xlsx"
items = pd.read_excel(investigated_items_path,"Sheet1")
items.head(3)

Unnamed: 0,Interesting items,Unnamed: 1
0,1030-61460,
1,11033998,
2,11033999,


In [16]:
excel_path = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/item_demand_size_distributions.xlsx"
writer = pd.ExcelWriter(excel_path)

for item_code in items["Interesting items"]:
    demand_history_dir = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/item_demand_history_csv_files"
    demand_history_path = f"{demand_history_dir}/dh_{item_code}.csv"
    demand_history_df = pd.read_csv(demand_history_path)

    input_dir = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/item_input_csv_files"
    input_path = f"{input_dir}/item_{item_code}_input.csv"
    input_df = pd.read_csv(input_path)

    # Create output df
    demand_size_dist_df = pd.DataFrame()

    for name in input_df["Name"]:
        if name == "Johannesburg":
            continue


        # Picking out demands the the dealer at hand.
        demand_history_dealer_df = demand_history_df[demand_history_df["Warehouse"] == name]
        
        # Removing demands of negative values
        demand_history_dealer_df = demand_history_dealer_df.astype({"Demanded qty.": int})
        demand_history_dealer_df = demand_history_dealer_df.loc[demand_history_dealer_df["Demanded qty."] >= 0]

        # Counting orders of different demand size 
        demand_count_dict = {}
        for q in demand_history_dealer_df["Demanded qty."]:
            if q in demand_count_dict:
                demand_count_dict[q] += 1
            else:
                demand_count_dict[q] = 1
        total_amount_of_orders = len(demand_history_dealer_df)

        # Computing demand size frequencies.
        demand_frequency_dict = {}
        for key in demand_count_dict.keys():
            demand_frequency_dict[key] = demand_count_dict[key]/total_amount_of_orders

        max_demand_size = int(max(demand_frequency_dict.keys()))

        # Index 0 contains freq of demand size 1 and so on.
        demand_size_frequency_list = []
        for i in range(max_demand_size):
            k = i+1
            if k in demand_frequency_dict:
                demand_size_frequency_list.append(demand_frequency_dict[k])
            else:
                demand_size_frequency_list.append(0)

        demand_size_arr = pd.Series(demand_size_frequency_list)
        assert (demand_size_arr.sum(0) >= 1-10**-4 and demand_size_arr.sum(0) <= 1+10**-4) , f"Sum of {item_code}, {name} is: {demand_size_arr.sum()}, not 1"
               
        if not demand_size_dist_df.empty:
            while len(demand_size_dist_df) < len(demand_size_arr):
                demand_size_dist_matrix = demand_size_dist_df.to_numpy()
                demand_size_dist_matrix = np.pad(demand_size_dist_matrix, ((0,len(demand_size_arr)-len(demand_size_dist_df)),(0,0)))
                demand_size_dist_df = pd.DataFrame(demand_size_dist_matrix,columns=demand_size_dist_df.columns)

        demand_size_dist_df[name] =  demand_size_arr


    # Saving to csv
    csv_dir = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/item_demand_size_distributions_csv_files"
    csv_path = f"{csv_dir}/item_{item_code}_d_size_dist.csv"
    demand_size_dist_df.to_csv(csv_path)

    # Writing to excel
    excel_sheet_name = f"item_{item_code}_d_size_dist"
    demand_size_dist_df.to_excel(writer, sheet_name = excel_sheet_name) 

writer.save()
    

In [72]:
## Extra troubleshooting code-snippet.


item_code = 15073006
#item_code = 14688861
demand_history_dir = "/Volumes/GoogleDrive/.shortcut-targets-by-id/10oYqI9u7nCLK0q7xF2CvGGIQVokusjaI/Exjobb/7. Data collection/item_demand_history_csv_files"
demand_history_path = f"{demand_history_dir}/dh_{item_code}.csv"
demand_history_df = pd.read_csv(demand_history_path)
for i,d in enumerate(demand_history_df["Demanded qty."]):
    #print(f"num: {d}, type: {type(d)}")
    try:
        x = int(d)
    except ValueError:
        print(f"This value is not possible to print: no {i}, value: {d}")
    

This value is not possible to print: no 7850, value: 2,000
