# Main Code for Demand 

In [3]:
import pandas as pd
import os
from openpyxl import Workbook
from openpyxl.styles import Font, NamedStyle, Border, Side, PatternFill

In [4]:
# Input Demand Raw Data file
cwd = os.getcwd()
file_path = os.path.join(cwd, "client_data", "Demand.xlsx")

In [5]:
#Verifying file path
print(file_path)

/workspaces/Factory-Production-June/client_data/Demand.xlsx


In [6]:
df_raw=pd.read_excel(file_path, skiprows=2)
df_with_ret_flag = df_raw.copy()
df_with_ret_flag= df_with_ret_flag.iloc[:, :-5]
df_with_ret_flag['Ret_Flag'] = df_with_ret_flag['Unified code'].apply(lambda x: 0 if x == 'No code' else 1)
filtered_df = df_with_ret_flag[~df_with_ret_flag['Origin'].isin(['Other', 'nan','NaN'])]

In [7]:
filtered_df['Country'] = filtered_df['Geography'] + '_' + filtered_df['Market']

In [8]:
df_melted = filtered_df.melt(id_vars=['Geography','Market','Market system','Channel','Channel - Customer','Origin','Country','Unified code','Code','Description','UM','Ret_Flag'], var_name='Months', value_name='Demand')
#df_melted.to_excel("test2.xlsx")

In [9]:
df_demand2 = df_melted.groupby(['Country', 'Origin','Unified code','UM', 'Ret_Flag','Months'])['Demand'].sum().reset_index()
df_demand2.to_excel("Actual_Production_w_Factory.xlsx")

In [10]:
df_demand = df_melted.groupby(['Country', 'Unified code','UM', 'Ret_Flag','Months'])['Demand'].sum().reset_index()
#df_demand.to_excel("test3.xlsx")

In [11]:
##Quality Check - If this fails the data preparation is incorrect
# Total demand before grouping
total_demand_before = df_melted['Demand'].sum()
print("Total demand before grouping:", total_demand_before)

# Total demand after grouping
total_demand_after = df_demand['Demand'].sum()
print("Total demand after grouping:", total_demand_after)

Total demand before grouping: 11278430.4444088
Total demand after grouping: 11278430.4444088


In [12]:
# Update retention flag
df_demand['Ret_Flag'] = df_demand.apply(lambda x: 0 if x['Demand'] < 1 else x['Ret_Flag'], axis=1)
# Display the DataFrame with updated 'Ret_Flag' column
column_order = ['Unified code', 'Country', 'Months','UM', 'Demand', 'Ret_Flag']
# Reorder columns in df_demand
df_demand = df_demand[column_order]
df_demand.columns=['FGSKUCode', 'Country', 'Month','UM', 'Demand', 'Ret_Flag']
df_demand['Month'] = pd.to_datetime(df_demand['Month']).dt.month

# QC

In [13]:
# Get the number of distinct values for each column
distinct_counts = df_demand.nunique()

# Get the number of null values for each column
null_counts = df_demand.isnull().sum()
counts=df_demand.count()
# Get the count, mean, std, min, and percentile distribution for each column
summary_stats = df_demand.describe(percentiles=[0.25, 0.5, 0.75]).transpose()

# Combine all the results into a single DataFrame
result_df = pd.DataFrame({
    'Count':counts,
    'Distinct Values': distinct_counts,
    'Null Count': null_counts,
    'Mean': summary_stats['mean'],
    'Std': summary_stats['std'],
    'Minimum Value': summary_stats['min'],
    'Percentile Distribution (25%)': summary_stats['25%'],
    'Percentile Distribution (50%)': summary_stats['50%'],
    'Percentile Distribution (75%)': summary_stats['75%'],
    'Maximum Value': summary_stats['max']
})



result_df

Unnamed: 0,Count,Distinct Values,Null Count,Mean,Std,Minimum Value,Percentile Distribution (25%),Percentile Distribution (50%),Percentile Distribution (75%),Maximum Value
Country,14820,22,0,,,,,,,
Demand,14820,7792,0,761.0277,3223.06,-117.42,0.0,65.49663,450.0,82707.11
FGSKUCode,14820,551,0,4002448000.0,28744890.0,4001031000.0,4001370000.0,4001371000.0,4001972000.0,5011999000.0
Month,14820,12,0,6.5,3.452169,1.0,3.75,6.5,9.25,12.0
Ret_Flag,14820,2,0,0.6943995,0.4606769,0.0,0.0,1.0,1.0,1.0
UM,14820,3,0,,,,,,,


# FG SKU COUNTS

In [14]:
df_raw_check1=df_raw['Unified code'].nunique()
df_demand_check1=df_demand['FGSKUCode'].nunique()
df_month_demand_ret_1 = df_demand[df_demand['Ret_Flag'] == 1]
df_month_demand_ret_1=df_month_demand_ret_1['FGSKUCode'].nunique()

# VOLUME(CARTONS)

In [15]:
df_raw_carton=df_raw[df_raw['UM'] == 'CARTON']
total_volume = df_raw_carton.iloc[:, 11:23].sum().sum()

df_demand_carton=df_demand[df_demand['UM'] == 'CARTON']
df_demand_carton_check2=df_demand_carton['Demand'].sum().sum()

df_demand_carton_ret_1 = df_demand_carton[df_demand_carton['Ret_Flag'] == 1]
df_demand_carton_ret_1=df_demand_carton_ret_1['Demand'].sum().sum()


# VOLUME(KG)

In [16]:
df_raw_carton=df_raw[df_raw['UM'] == 'KILO GRAM']
total_volume_kg = df_raw_carton.iloc[:, 11:23].sum().sum()

df_demand_kg=df_demand[df_demand['UM'] == 'KILO GRAM']
df_demand_kg_check2=df_demand_kg['Demand'].sum().sum()

df_demand_kg_ret_1 = df_demand_kg[df_demand_kg['Ret_Flag'] == 1]
df_demand_kg_ret_1=df_demand_kg_ret_1['Demand'].sum().sum()

In [17]:
#Storing result in a single df
result_df_1 = pd.DataFrame({
    'Initial Value(Raw data)': [df_raw_check1,total_volume,total_volume_kg],
    'Manufactured Value': [df_demand_check1,df_demand_carton_check2,df_demand_kg_check2],
    'Solver Input (Ret_Flag=1)': [df_month_demand_ret_1,df_demand_carton_ret_1,df_demand_kg_ret_1]
}, index=['FGSKU','Volume (cartons)','Volume (kgs)'])

# Display the result 
print(result_df_1.to_string(float_format='{:,.2f}'.format))

                  Initial Value(Raw data)  Manufactured Value  Solver Input (Ret_Flag=1)
FGSKU                              551.00              551.00                     551.00
Volume (cartons)            10,207,064.01       11,077,708.85              11,077,837.49
Volume (kgs)                   136,189.07          158,211.31                 158,211.31


In [18]:
#for saving output in a new excel
dfs = [df_demand, result_df, result_df_1]
# Create a new Excel workbook
wb = Workbook()

# Remove default sheet created by openpyxl
default_sheet = wb['Sheet']
wb.remove(default_sheet)

# Create a named style for bold headings
bold_style = NamedStyle(name='bold_style')
bold_style.font = Font(bold=True, color='FFFFFF')  # Set font color to white

# Define the desired sheet names
sheet_names = ['Demand_output', 'QC_output']

# Define blue fill
blue_fill = PatternFill(start_color="4169E1", end_color="4169E1", fill_type="solid")

# Loop through each DataFrame and adding it to a new sheet in the workbook
for df, sheet_name in zip(dfs, sheet_names):
    # Creating a new sheet
    ws = wb.create_sheet(title=sheet_name)

    # Writing DataFrame to the sheet, starting from cell A1
    
    if sheet_name == 'Demand_output':
        header_row = [str(col) for col in df.columns]
        ws.append(header_row)
        for idx, row in df.iterrows():
            ws.append(row.tolist())
        
        for cell in ws[1]:
            cell.style = bold_style
            cell.fill = blue_fill
            
        for row in ws.iter_rows(min_row=1, max_row=ws.max_row, max_col=ws.max_column):
            for cell in row:
                cell.border = Border(left=Side(style='thin'),
                                     right=Side(style='thin'),
                                     top=Side(style='thin'),
                                     bottom=Side(style='thin'))
        ws.sheet_view.showGridLines = False
        for col in ws.columns:
            max_length = 0
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            ws.column_dimensions[col[0].column_letter].width = adjusted_width
    elif sheet_name == 'QC_output':
        df = result_df
        header_row = ['Column Name'] + [str(col) for col in df.columns]
        ws.append(header_row)
        for idx, row in df.iterrows():
            ws.append([idx]+row.tolist())
        for cell in ws[1]:
            cell.style = bold_style
            cell.fill = blue_fill
            
        for row in ws.iter_rows(min_row=1, max_row=ws.max_row, max_col=ws.max_column):
            for cell in row:
                cell.border = Border(left=Side(style='thin'),
                                     right=Side(style='thin'),
                                     top=Side(style='thin'),
                                     bottom=Side(style='thin'))
        for col in ws.columns:
            max_length = 0
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            ws.column_dimensions[col[0].column_letter].width = adjusted_width

        for _ in range(5):
            ws.append([])
            
        df = result_df_1
        header_row = ['Data'] + [str(col) for col in df.columns]
        ws.append(header_row)
        for idx, row in df.iterrows():
            ws.append([idx]+row.tolist())
            
        for idx, cell in enumerate(ws[13], start=1):
                if idx <= 4:
                    cell.style = bold_style
                    cell.fill = blue_fill
                    
        for row in ws.iter_rows(min_row=13, max_row=16, max_col=4):
            for cell in row:
                cell.border = Border(left=Side(style='thin'),
                                     right=Side(style='thin'),
                                     top=Side(style='thin'),
                                     bottom=Side(style='thin'))
        ws.sheet_view.showGridLines = False
        for col in ws.columns:
            max_length = 0
            for cell in col:
                try:
                    if len(str(cell.value)) > max_length:
                        max_length = len(cell.value)
                except:
                    pass
            adjusted_width = (max_length + 2) * 1.2
            ws.column_dimensions[col[0].column_letter].width = adjusted_width

In [19]:
# Save the workbook by changing the name as required
wb.save('03-06-2024_Demand_Automation.xlsx')

In [20]:
df_demand.shape

(14820, 6)