In [1]:
# Importing all the dependencies
import pandas as pd
import numpy as np

from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Border, Side, Font
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.table import Table, TableStyleInfo
from openpyxl.styles.alignment import Alignment
import os

In [2]:
cwd = os.getcwd()
demand_filepath = "28-04-2024_Demand_Automation.xlsx"
storage_filepath = os.path.join(cwd, "client_data", "Freight.xlsx")

storage_df=pd.read_excel(storage_filepath,sheet_name='I. Template - WH Storage RawDat',skiprows=3)
demand_df=pd.read_excel(demand_filepath)

In [3]:
# Filter demand_df
demand_df = demand_df[demand_df['Ret_Flag'] == 1]
skus = demand_df['FGSKUCode'].unique()
# Define the warehouses and their default costs
warehouses = ['NFCM', 'GFCM', 'KFCM']
default_costs = {
    'NFCM': 0.625,
    'KFCM': 1.109375,
    'GFCM': 0.507837838
}

# Generate all combinations of SKU and warehouse
combinations = pd.MultiIndex.from_product([skus, warehouses], names=['FGSKUCode', 'Warehouse']).to_frame(index=False)

# Process freight data
storage_df.columns = ['Unnamed', 'FG Code', 'FG_Desc','UOM','WH Code', '#UOM per pallet','Storage Cost/Pallet (USD)','Storage Cost/Carton','retain_flag']
#storage_df = storage_df[['FG Code', 'WH Code', 'UOM','Storage Cost/Carton']]
storage_df.head()

Unnamed: 0,Unnamed,FG Code,FG_Desc,UOM,WH Code,#UOM per pallet,Storage Cost/Pallet (USD),Storage Cost/Carton,retain_flag
0,,4001031128,TENDER CHICKEN BREAST(2*1*5000GM)KWT,CARTON,NFCM,48.0,30.0,0.625,
1,,4001100155,ARABIC SPICE BEEF BURGER15*1KG,CARTON,NFCM,35.0,30.0,0.857143,
2,,4001100156,JUMBO BBQ BEEF BURGER 10PCS (15*10*100GM),CARTON,NFCM,35.0,30.0,0.857143,
3,,4001111270,Chicken Trimming 18 Kg,CARTON,NFCM,64.285714,30.0,0.466667,
4,,4001199005,HARDEES BEEF BURGER 1.78OZ,CARTON,NFCM,48.0,30.0,0.625,


In [4]:
result_df = pd.merge(combinations, storage_df, left_on=['FGSKUCode', 'Warehouse'], right_on=['FG Code', 'WH Code'], how='left')
result_df.rename(columns={'FGSKUCode': 'FGSKUCode', 'Warehouse': 'WH Cod', 'Storage Cost/Carton': 'CostperUOM','UOM':'UM'}, inplace=True)

result_df['retain_flag'] = 1
result_df.head()

Unnamed: 0,FGSKUCode,WH Cod,Unnamed,FG Code,FG_Desc,UM,WH Code,#UOM per pallet,Storage Cost/Pallet (USD),CostperUOM,retain_flag
0,4001351001,NFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,NFCM,88.0,30.0,0.340909,1
1,4001351001,GFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,GFCM,68.444444,18.79,0.274529,1
2,4001351001,KFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,KFCM,91.0,53.25,0.585165,1
3,4001351022,NFCM,,4001351000.0,CRAVES ANGUS BEEF HOTDOG 450GM (20*5*90GM),CARTON,NFCM,48.0,30.0,0.625,1
4,4001351022,GFCM,,4001351000.0,CRAVES ANGUS BEEF HOTDOG 450GM (20*5*90GM),CARTON,GFCM,37.333333,18.79,0.503304,1


In [5]:
for warehouse, default_cost in default_costs.items():
    result_df.loc[(result_df['WH Cod'] == warehouse) & (result_df['CostperUOM'].isnull()), 'CostperUOM'] = default_cost

In [6]:
result_df.head()

Unnamed: 0,FGSKUCode,WH Cod,Unnamed,FG Code,FG_Desc,UM,WH Code,#UOM per pallet,Storage Cost/Pallet (USD),CostperUOM,retain_flag
0,4001351001,NFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,NFCM,88.0,30.0,0.340909,1
1,4001351001,GFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,GFCM,68.444444,18.79,0.274529,1
2,4001351001,KFCM,,4001351000.0,Beef Hot Dog 450G X 20,CARTON,KFCM,91.0,53.25,0.585165,1
3,4001351022,NFCM,,4001351000.0,CRAVES ANGUS BEEF HOTDOG 450GM (20*5*90GM),CARTON,NFCM,48.0,30.0,0.625,1
4,4001351022,GFCM,,4001351000.0,CRAVES ANGUS BEEF HOTDOG 450GM (20*5*90GM),CARTON,GFCM,37.333333,18.79,0.503304,1


In [7]:
result_df = result_df[['FGSKUCode', 'UM', 'WH Cod', 'Storage Cost/Pallet (USD)', 'CostperUOM', 'retain_flag']]

In [8]:
print(len(skus)*3)
print(len(result_df))

1653
1653


In [9]:
# Get the number of distinct values for each column
final_wh_fg = result_df
distinct_counts = final_wh_fg.nunique()

# Get the number of null values for each column
null_counts = final_wh_fg.isnull().sum()
counts=final_wh_fg.count()
# Get the count, mean, std, min, and percentile distribution for each column
summary_stats = final_wh_fg.describe(percentiles=[0.25, 0.5, 0.75]).transpose()

# Combine all the results into a single DataFrame
result_df1 = pd.DataFrame({
    'Count':counts,
    'Distinct Values': distinct_counts,
    'Null Count': null_counts,
    'Mean': summary_stats['mean'],
    'Std': summary_stats['std'],
    'Minimum Value': summary_stats['min'],
    'Percentile Distribution (25%)': summary_stats['25%'],
    'Percentile Distribution (50%)': summary_stats['50%'],
    'Percentile Distribution (75%)': summary_stats['75%'],
    'Maximum Value': summary_stats['max']
})



result_df1

Unnamed: 0,Count,Distinct Values,Null Count,Mean,Std,Minimum Value,Percentile Distribution (25%),Percentile Distribution (50%),Percentile Distribution (75%),Maximum Value
CostperUOM,1653,105,0,0.7840069,0.4067678,0.07607143,0.5078378,0.625,1.109375,3.423214
FGSKUCode,1653,551,0,4003547000.0,43017500.0,4001031000.0,4001362000.0,4001372000.0,4001972000.0,5011999000.0
Storage Cost/Pallet (USD),1329,3,324,34.01333,14.35701,18.79,18.79,30.0,53.25,53.25
UM,1329,2,324,,,,,,,
WH Cod,1653,3,0,,,,,,,
retain_flag,1653,1,0,1.0,0.0,1.0,1.0,1.0,1.0,1.0


# Formatting the excel

In [10]:
#main Block

# Write the name of dataframe and tab name that you want in excel
dataframes = {
    'WH-FG storage': final_wh_fg,
    'QC_output':result_df1
   }

# Specify the path to save your Excel file
file_path = r'29-04-2024 WH-FG storage.xlsx'
# Using ExcelWriter to write each DataFrame to a separate sheet
with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
    for sheet_name, dataframe in dataframes.items():
        dataframe.to_excel(writer, sheet_name=sheet_name, index=False)

    # Load the workbook and apply formatting
    workbook = writer.book
    for sheet_name in dataframes.keys():
        worksheet = workbook[sheet_name]
        worksheet.sheet_view.showGridLines = False  # Remove gridlines

        # Add one row and column at the top
        worksheet.insert_rows(1)
        worksheet.insert_cols(1)
        worksheet.cell(row=1, column=1)

        # Add table borders
        border = Border(left=Side(style='thin'), 
                        right=Side(style='thin'), 
                        top=Side(style='thin'), 
                        bottom=Side(style='thin'))
        for row in worksheet.iter_rows(min_row=2, min_col=2, max_col=worksheet.max_column):
            for cell in row:
                cell.border = border

        
# Set light blue color for headers
        for cell in worksheet.iter_cols(min_row=2, min_col=2):
            cell[0].fill = PatternFill(start_color="ADD8E6", end_color="ADD8E6", fill_type="solid")  # Light blue
    
        # Remove borders for the extra added row and column at the top
        for cell in worksheet['A1:C1'][0]:
            cell.border = None
# Autofit columns
        for col in worksheet.columns:
            max_length = 0
            column = col[0].column_letter  # Get the column name
            for cell in col:
                try:  # Necessary to avoid error on empty cells
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            worksheet.column_dimensions[column].width = adjusted_width
 # Convert pixels to character units
        

        # Apply auto filters starting from the 2nd row in the B column
        #worksheet.auto_filter.ref = worksheet.dimensions
        last_row = worksheet.max_row
        last_column = worksheet.max_column
        range_str = f"B2:{get_column_letter(last_column)}{last_row}"
        worksheet.auto_filter.ref = range_str
# Save the Excel file
workbook.save(file_path)

print("All DataFrames are written to {}".format(file_path))


All DataFrames are written to 29-04-2024 WH-FG storage.xlsx
