In [628]:
import pandas as pd
import numpy as np
import xlsxwriter
import os

# Load the Excel file
excel_file = pd.ExcelFile('Template v2.xlsx')

# Get the list of sheet names
sheet_names = excel_file.sheet_names

# Print the list of sheet names
print(f'Available sheets in the file are- {sheet_names}')

Available sheets in the file are- ['Base Capacity', 'Support', 'Zone Capacity', 'Support Capacity', 'BI Inputs', 'Sheet1', 'Format', 'Mapping']


In [629]:
# Load the sheets into dataframes
Mapping_df = pd.read_excel(excel_file, sheet_name="Mapping")
BaseCapacity_df = pd.read_excel(excel_file, sheet_name="Base Capacity")
Support_Capacity_df = pd.read_excel(excel_file, sheet_name="Support Capacity")
Support_df = pd.read_excel(excel_file, sheet_name="Support")
Bi_Inputs_df = pd.read_excel(excel_file, sheet_name="BI Inputs")
Zone_Capacity_df = pd.read_excel(excel_file, sheet_name="Zone Capacity")

In [630]:
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

In [631]:
#Mapping Sheet Data Workings

# Select only the "OU" and "Facility Name" columns
OU_Facility_df = Mapping_df[['OU', 'Facility Name']]
# Remove duplicates from OU_Facility_df
OU_Facility_df = OU_Facility_df.drop_duplicates(subset='OU')

# Select only the "OU", and "City" columns
OU_City_df = Mapping_df[['OU', 'City']]
# Remove duplicates from OU_City_df
OU_City_df = OU_City_df.drop_duplicates(subset='OU')

# Select only the "OU", and "BI" columns this is only used for BI inputs BI Geo validation
OU_BIGeo_df = Mapping_df[['OU', 'BI Geo']]
# Remove duplicates from OU_BIGeo_df
OU_BIGeo_df = OU_BIGeo_df.drop_duplicates(subset='OU')

# Select only the "BI Geo", and "Geo" columns
BIGeo_Geo_df = Mapping_df[['BI Geo', 'Geo']]
# Remove duplicates from BIGeo_Geo_df This is mandatory to remove duplicate entries
BIGeo_Geo_df = BIGeo_Geo_df.drop_duplicates(subset='BI Geo')

# Select only the "BI Geo", and "Geo Type" columns
BIGeo_GeoType_df = Mapping_df[['BI Geo', 'Geo Type']]
# Remove duplicates from BIGeo_GeoType_df This is mandatory to remove duplicate entries
BIGeo_GeoType_df = BIGeo_GeoType_df.drop_duplicates(subset='BI Geo')

# Select only the "BI Geo", and "Geo" columns
OU_Status_CustomOU_df = Mapping_df[['OU', 'Status','Custom OU']]





# New working cross check is required
# Remove duplicates from BIGeo_Geo_df This is mandatory to remove duplicate entries
OU_Status_CustomOU_df = OU_Status_CustomOU_df.drop_duplicates(subset='OU')


In [632]:
#BI Inputs Sheet Data Working
# Rename specific columns
Bi_Inputs_df = Bi_Inputs_df.rename(columns={
    "LE[Scenario]": "FCST",
    "LE[Client]": "Customer",
    "LE[Horizontal]": "Horizontal",
    "LE[Stage]": "Stage",
    "LE[Vertical]": "Vertical",
    "LE[SD_Geo]": "BI Geo",
    "LE[OU_DESCR]": "OU",
    "LE[Project_DESCR]": "Program Name",
    "LE[Account]": "Account",
    "LE[Year]": "FY",
    "[SumJul]": "Jul",
    "[SumAug]": "Aug",
    "[SumSep]": "Sep",
    "[SumOct]": "Oct",
    "[SumNov]": "Nov",
    "[SumDec]": "Dec",
    "[SumJan]": "Jan",
    "[SumFeb]": "Feb",
    "[SumMar]": "Mar",
    "[SumApr]": "Apr",
    "[SumMay]": "May",
    "[SumJun]": "Jun"
})


columns_to_keep = [
    "FCST", "Customer", "Horizontal", "Stage", "Vertical", 
    "BI Geo", "OU", "Program Name", "FY", "Account"]+numerical_col
    
# Select only the specified columns
Bi_Inputs_df = Bi_Inputs_df[columns_to_keep]

#Fillter Account to select only Seats for Allocation_Adj_Store
Bi_Inputs_df = Bi_Inputs_df[Bi_Inputs_df['Account'] == "Seats for Allocation_Adj_Store"]


# Define a function to determine the 'Seat Type' based on the 'Stage' value
def determine_seat_type(stage):
    if stage == 'Existing':
        return 'Production'
    elif stage == 'Stage 5':
        return 'BD Stage 5'
    elif stage in ['Stage 3', 'Stage 4']:
        return 'BD Stage 3 & 4'
    else:
        return 'BD Stage 2 & Below'
    
# Apply the function to create the new column 'Seat Type'
Bi_Inputs_df['Seat Type'] = Bi_Inputs_df['Stage'].apply(determine_seat_type)

# Keep only the first 4 characters of the 'OU' column
Bi_Inputs_df['OU'] = Bi_Inputs_df['OU'].str.slice(0, 4)

# Remove the first 2 characters from the 'Customer' column
Bi_Inputs_df['Customer'] = Bi_Inputs_df['Customer'].str.slice(2)

# Remove the last 2 characters from the 'Vertical' column
Bi_Inputs_df['Vertical'] = Bi_Inputs_df['Vertical'].str.slice(0, -2)

# Remove the last 2 characters from the 'Horizontal' column
Bi_Inputs_df['Horizontal'] = Bi_Inputs_df['Horizontal'].str.slice(0, -2)

# Create a new column 'Job Code' based on the provided conditions
Bi_Inputs_df['Job Code'] = Bi_Inputs_df['Program Name'].apply(lambda x: x[:5] if x[:1].isdigit() else x[:18])




# New working cross check is required after Mapping data is updated
# Merge the dataframes on the 'OU' column to change or get Custom OU
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_Status_CustomOU_df, on='OU', how='left')
Bi_Inputs_df['OU'] = Bi_Inputs_df.apply(lambda row: row['Custom OU'] if row['Status'] == 'N' else row['OU'], axis=1)

# Drop the 'Custom OU' column
Bi_Inputs_df = Bi_Inputs_df.drop(columns=['Custom OU'])




# Merge the dataframes on the 'OU' column
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_Facility_df, on='OU', how='left')
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_City_df, on='OU', how='left')
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_BIGeo_df, on='OU', how='left')

# Replace 'BI Geo_x' with 'BI Geo_y' where they are not equal
Bi_Inputs_df['BI Geo_x'] = np.where(Bi_Inputs_df['BI Geo_x'] != Bi_Inputs_df['BI Geo_y'], Bi_Inputs_df['BI Geo_y'], Bi_Inputs_df['BI Geo_x'])

# Drop the 'BI Geo_y' column
Bi_Inputs_df = Bi_Inputs_df.drop(columns=['BI Geo_y'])

# Rename 'BI Geo_x' to 'BI Geo'
Bi_Inputs_df = Bi_Inputs_df.rename(columns={'BI Geo_x': 'BI Geo'})

# replace NaN values with zero. This is optional if incase there is no value provided.
Bi_Inputs_df[numerical_col] = Bi_Inputs_df[numerical_col].fillna(0)

# Convert month column data types to float using numerical_col
Bi_Inputs_df[numerical_col] = Bi_Inputs_df[numerical_col].astype(float)





In [633]:
# New Working to get all row items

# Create combined index of all unique rows based on OU and BI Geo
all_rows = pd.concat([BaseCapacity_df[["OU","BI Geo"]],Support_Capacity_df[["OU","BI Geo"]],Support_df[["OU","BI Geo"]],Bi_Inputs_df[["OU","BI Geo"]]]).drop_duplicates()

# reindex Based Capacity to include all unique rows
BaseCapacity_df = BaseCapacity_df.set_index(["OU","BI Geo"]).reindex(all_rows.set_index(["OU","BI Geo"]).index,fill_value=0).reset_index()
Support_Capacity_df = Support_Capacity_df.set_index(["OU","BI Geo"]).reindex(all_rows.set_index(["OU","BI Geo"]).index,fill_value=0).reset_index()

# Add other columns data
# Identify the most common non-zero value in the "FY" column
most_common_value = BaseCapacity_df.loc[BaseCapacity_df["FY"] != 0, "FY"].mode()[0]
most_common_value = Support_Capacity_df.loc[Support_Capacity_df["FY"] != 0, "FY"].mode()[0]

# Replace 0 with the most common non-zero value using a lambda function
BaseCapacity_df["FY"] = BaseCapacity_df["FY"].apply(lambda x: most_common_value if x == 0 else x)
Support_Capacity_df["FY"] = Support_Capacity_df["FY"].apply(lambda x: most_common_value if x == 0 else x)

# You can use the inplace parameter to modify the DataFrame directly
BaseCapacity_df.drop(["Facility Name"], axis=1, inplace=True)
Support_Capacity_df.drop(["Facility Name"], axis=1, inplace=True)

# Add Facility Name Again for all rows based on OU
BaseCapacity_df = pd.merge(BaseCapacity_df, OU_Facility_df, on='OU', how='left')
Support_Capacity_df = pd.merge(Support_Capacity_df, OU_Facility_df, on='OU', how='left')

BaseCapacity_df["Seat Type"] = "Base Capacity"
Support_Capacity_df["Seat Type"] = "Support Capacity"

In [634]:
# Extract the value from the first row of the "FCST" column
Scenario = Bi_Inputs_df.loc[0, "FCST"]

Scenario

'FY25-Dec Forecast'

In [635]:
Existing_Total_df = Bi_Inputs_df
BD_3andAbove_Total_df = Bi_Inputs_df
BD_2andBelow_Total_df = Bi_Inputs_df

Client_Existing_Total_df = Bi_Inputs_df
Client_BD_3toExisting_Total_df = Bi_Inputs_df
Client_AllStage_Total_df = Bi_Inputs_df

In [636]:
StageLevelExisting = ['Existing']
StageLevel3to5 = ['Stage 3','Stage 4','Stage 5']
StageLevel2toBelow = ['Stage 1', 'Stage 2', 'Stage_Other']

In [637]:
# Filter the DataFrame to include only rows based on Stage
Existing_Total_df = Existing_Total_df[Existing_Total_df['Stage'].isin(StageLevelExisting)]
BD_3andAbove_Total_df = BD_3andAbove_Total_df[BD_3andAbove_Total_df['Stage'].isin(StageLevel3to5)]
BD_2andBelow_Total_df = BD_2andBelow_Total_df[BD_2andBelow_Total_df['Stage'].isin(StageLevel2toBelow)]

Client_Existing_Total_df = Client_Existing_Total_df[Client_Existing_Total_df['Stage'].isin(StageLevelExisting)]
Client_BD_3toExisting_Total_df = Client_BD_3toExisting_Total_df[Client_BD_3toExisting_Total_df['Stage'].isin(['Existing', 'Stage 5','Stage 4','Stage 3'])]
Client_AllStage_Total_df = Client_AllStage_Total_df[Client_AllStage_Total_df['Stage'].isin(['Existing', 'Stage 5','Stage 4','Stage 3','Stage 2','Stage 1','Stage_Other'])]

In [638]:
# Grouping the data to have single row item data for all 
Existing_Total_df = Existing_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
BD_3andAbove_Total_df = BD_3andAbove_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
BD_2andBelow_Total_df = BD_2andBelow_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU', "City"])[numerical_col].sum().reset_index().fillna(0)


Client_Existing_Total_df = Client_Existing_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City","Customer"])[numerical_col].sum().reset_index().fillna(0)
Client_BD_3toExisting_Total_df = Client_BD_3toExisting_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City","Customer"])[numerical_col].sum().reset_index().fillna(0)
Client_AllStage_Total_df = Client_AllStage_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City","Customer"])[numerical_col].sum().reset_index().fillna(0)



In [639]:
# Zone Capacity Working
Zone_Capacity_df = pd.merge(Zone_Capacity_df, OU_City_df, on='OU', how='left')
Zone_Capacity_df["FCST"] = Scenario


In [640]:
# Calculations for New_Client_Existing_Total

# Setting the index for both dataframes
index_cols = ["FY", "FCST", "BI Geo",
              "Facility Name", "OU", "City", "Customer"]
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov',
                 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']


Zone_Capacity_df.set_index(index_cols, inplace=True)
Client_Existing_Total_df.set_index(index_cols, inplace=True)



# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_df.index.union(Client_Existing_Total_df.index)


Zone_Capacity_df = Zone_Capacity_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)
Client_Existing_Total_df = Client_Existing_Total_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)


# Using np.maximum() for element-wise max comparison
New_Client_Existing_Total_df = pd.DataFrame(
    np.maximum(Zone_Capacity_df.values, Client_Existing_Total_df.values),
    index=all_indexes,
    columns=numerical_col
)


# Reset the index to restore the original format
New_Client_Existing_Total_df.reset_index(inplace=True)
Client_Existing_Total_df.reset_index(inplace=True)
Zone_Capacity_df.reset_index(inplace=True)


In [641]:
# Calculations for Client_BD_3toExisting_Total_df

# Setting the index for both dataframes
index_cols = ["FY", "FCST", "BI Geo",
              "Facility Name", "OU", "City", "Customer"]
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov',
                 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']


Zone_Capacity_df.set_index(index_cols, inplace=True)
Client_BD_3toExisting_Total_df.set_index(index_cols, inplace=True)


# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_df.index.union(Client_BD_3toExisting_Total_df.index)


Zone_Capacity_df = Zone_Capacity_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)
Client_BD_3toExisting_Total_df = Client_BD_3toExisting_Total_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)


# Using np.maximum() for element-wise max comparison
New_Client_BD_3toExisting_Total_df = pd.DataFrame(
    np.maximum(Zone_Capacity_df.values, Client_BD_3toExisting_Total_df.values),
    index=all_indexes,
    columns=numerical_col
)


# Reset the index to restore the original format
New_Client_BD_3toExisting_Total_df.reset_index(inplace=True)
Client_BD_3toExisting_Total_df.reset_index(inplace=True)
Zone_Capacity_df.reset_index(inplace=True)


In [642]:
# Calculations for Client_AllStage_Total_df

# Setting the index for both dataframes
index_cols = ["FY", "FCST", "BI Geo",
              "Facility Name", "OU", "City", "Customer"]
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov',
                 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']


Zone_Capacity_df.set_index(index_cols, inplace=True)
Client_AllStage_Total_df.set_index(index_cols, inplace=True)


# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_df.index.union(Client_AllStage_Total_df.index)


Zone_Capacity_df = Zone_Capacity_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)
Client_AllStage_Total_df = Client_AllStage_Total_df.reindex(
    all_indexes, columns=numerical_col, fill_value=0)


# Using np.maximum() for element-wise max comparison
New_Client_AllStage_Total_df = pd.DataFrame(
    np.maximum(Zone_Capacity_df.values, Client_AllStage_Total_df.values),
    index=all_indexes,
    columns=numerical_col
)


# Reset the index to restore the original format
New_Client_AllStage_Total_df.reset_index(inplace=True)
Client_AllStage_Total_df.reset_index(inplace=True)
Zone_Capacity_df.reset_index(inplace=True)


In [643]:
# Grouping all New Zone Capacities

New_Client_Existing_Total_df = New_Client_Existing_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
New_Client_BD_3toExisting_Total_df = New_Client_BD_3toExisting_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
New_Client_AllStage_Total_df = New_Client_AllStage_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)

In [644]:
# Zone trapped seats Calculation

Zone_Capacity_Grouped_df = Zone_Capacity_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)

BI_Existing_Total_df = Bi_Inputs_df
BI_Existing_Total_df = BI_Existing_Total_df[BI_Existing_Total_df['Stage'].isin(['Existing'])]
BI_Existing_Total_df = BI_Existing_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)

BI_Stage3toExisting_Total_df = Bi_Inputs_df
BI_Stage3toExisting_Total_df = BI_Stage3toExisting_Total_df[BI_Stage3toExisting_Total_df['Stage'].isin(['Existing', 'Stage 5','Stage 4','Stage 3'])]
BI_Stage3toExisting_Total_df = BI_Stage3toExisting_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)

BI_AllStagetoExisting_Total_df = Bi_Inputs_df
BI_AllStagetoExisting_Total_df = BI_AllStagetoExisting_Total_df[BI_AllStagetoExisting_Total_df['Stage'].isin(['Existing', 'Stage 5','Stage 4','Stage 3','Stage 2','Stage 1','Stage_Other'])]
BI_AllStagetoExisting_Total_df = BI_AllStagetoExisting_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)




In [645]:
# Working for Trapped_Seats_Aft_SD_df
# Set the index to ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']
index_cols = ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.set_index(index_cols)
BI_Existing_Total_df = BI_Existing_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_Grouped_df.index.union(BI_Existing_Total_df.index)

Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reindex(all_indexes, fill_value=0)
BI_Existing_Total_df = BI_Existing_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Trapped_Seats_Aft_SD_df = np.where(
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) > BI_Existing_Total_df[numerical_col].fillna(0),
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) - BI_Existing_Total_df[numerical_col].fillna(0),
    0
)

# Convert the NumPy array back to a DataFrame
Trapped_Seats_Aft_SD_df = pd.DataFrame(Trapped_Seats_Aft_SD_df, index=all_indexes, columns=numerical_col)

# Reset index to bring back the index columns
Trapped_Seats_Aft_SD_df = Trapped_Seats_Aft_SD_df.reset_index()
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reset_index()
BI_Existing_Total_df = BI_Existing_Total_df.reset_index()

In [646]:
# Working for Trapped Seats Aft. BD (> stage3)

# Set the index to ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.set_index(index_cols)
BI_Stage3toExisting_Total_df = BI_Stage3toExisting_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_Grouped_df.index.union(BI_Stage3toExisting_Total_df.index)

Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reindex(all_indexes, fill_value=0)
BI_Stage3toExisting_Total_df = BI_Stage3toExisting_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Trapped_Seats_Aft_Stage3_df = np.where(
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) > BI_Stage3toExisting_Total_df[numerical_col].fillna(0),
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) - BI_Stage3toExisting_Total_df[numerical_col].fillna(0),
    0
)

# Convert the NumPy array back to a DataFrame
Trapped_Seats_Aft_Stage3_df = pd.DataFrame(Trapped_Seats_Aft_Stage3_df, index=all_indexes, columns=numerical_col)

# Reset index to bring back the index columns
Trapped_Seats_Aft_Stage3_df = Trapped_Seats_Aft_Stage3_df.reset_index()
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reset_index()
BI_Stage3toExisting_Total_df = BI_Stage3toExisting_Total_df.reset_index()


In [647]:
# Working for Trapped Seats Aft. all BD


# Set the index to ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']
index_cols = ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']

# Set the index to ['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City']
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.set_index(index_cols)
BI_AllStagetoExisting_Total_df = BI_AllStagetoExisting_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = Zone_Capacity_Grouped_df.index.union(BI_AllStagetoExisting_Total_df.index)

Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reindex(all_indexes, fill_value=0)
BI_AllStagetoExisting_Total_df = BI_AllStagetoExisting_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Trapped_Seats_Aft_AllStage_df = np.where(
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) > BI_AllStagetoExisting_Total_df[numerical_col].fillna(0),
    Zone_Capacity_Grouped_df[numerical_col].fillna(0) - BI_AllStagetoExisting_Total_df[numerical_col].fillna(0),
    0
)

# Convert the NumPy array back to a DataFrame
Trapped_Seats_Aft_AllStage_df = pd.DataFrame(Trapped_Seats_Aft_AllStage_df, index=all_indexes, columns=numerical_col)

# Reset index to bring back the index columns
Trapped_Seats_Aft_AllStage_df = Trapped_Seats_Aft_AllStage_df.reset_index()
Zone_Capacity_Grouped_df = Zone_Capacity_Grouped_df.reset_index()
BI_AllStagetoExisting_Total_df = BI_AllStagetoExisting_Total_df.reset_index()


In [648]:
# Add Seat Type

Trapped_Seats_Aft_SD_df["Seat Type"] = "Trapped Seats Aft. SD"
Trapped_Seats_Aft_Stage3_df["Seat Type"] = "Trapped Seats Aft. BD (> stage3)"
Trapped_Seats_Aft_AllStage_df["Seat Type"] = "Trapped Seats Aft. all BD"


In [649]:
Trapped_Seats_Aft_SD_df.columns

Index(['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City', 'Jul', 'Aug',
       'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       'Seat Type'],
      dtype='object')

In [650]:
Trapped_Seats_Aft_SD_df.head(1)

Unnamed: 0,FY,FCST,BI Geo,Facility Name,OU,City,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,Seat Type
0,FY25,FY25-Dec Forecast,Geo Bulgaria,BLG_Burgas - Galleria,BG03,Burgas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Trapped Seats Aft. SD


In [651]:
#Base Capacity Sheet Data Working

# Grouping to sum up Base capacity if incase multiple entries or do manual check to remove duplicates from raw data
BaseCapacity_df = BaseCapacity_df.groupby(["FY",'OU','BI Geo',"Facility Name" ,"Seat Type"])[numerical_col].sum().reset_index().fillna(0)

# Merge the dataframes on the 'OU' column
BaseCapacity_df = pd.merge(BaseCapacity_df, OU_City_df, on='OU', how='left')

# Add FCST column
BaseCapacity_df['FCST']= Scenario

In [652]:
#Support Capacity Sheet Data Working

# Grouping to sum up Support capacity if incase multiple entries or do manual check to remove duplicates from raw data
Support_Capacity_df = Support_Capacity_df.groupby(["FY",'OU','BI Geo',"Facility Name" ,"Seat Type"])[numerical_col].sum().reset_index().fillna(0)

# Merge the dataframes on the 'OU' column
Support_Capacity_df = pd.merge(Support_Capacity_df, OU_City_df, on='OU', how='left')
# replace NaN values with zero. This is optional if incase there is no value provided.
Support_Capacity_df[numerical_col] = Support_Capacity_df[numerical_col].fillna(0)

# Add FCST column
Support_Capacity_df['FCST']= Scenario


In [653]:
#Support Sheet Data Working
#Fillter Sum column to remove rows if its value is 0
Support_df = Support_df[Support_df['Sum'] != 0]
# Merge the dataframes on the 'OU' column
Support_df = pd.merge(Support_df, OU_City_df, on='OU', how='left')
# replace NaN values with zero. This is optional if incase there is no value provided.
Support_df[numerical_col] = Support_df[numerical_col].fillna(0)

# Add FCST column
Support_df['FCST']= Scenario


In [654]:
# New Working for calculations
# Set index for each DataFrame without inplace=True
BaseCapacity_df = BaseCapacity_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"])
Support_Capacity_df = Support_Capacity_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"])

# Align the indexes and fill missing rows/values with 0
all_indexes = BaseCapacity_df.index.union(Support_Capacity_df.index).union(
    Existing_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
).union(
    BD_3andAbove_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
).union(
    BD_2andBelow_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
)

BaseCapacity_df = BaseCapacity_df.reindex(all_indexes, fill_value=0)
Support_Capacity_df = Support_Capacity_df.reindex(all_indexes, fill_value=0)
EX_df = Existing_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)
BD_3andAbove_df = BD_3andAbove_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)
BD_2andBelow_df = BD_2andBelow_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)

# Perform the calculations with NaN values replaced by 0
FS_afterSD_df = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0))
FS_afterBD3andAbove_df = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0) + BD_3andAbove_df[numerical_col].fillna(0))
FS_afterAllBD_df = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0) + BD_3andAbove_df[numerical_col].fillna(0) + BD_2andBelow_df[numerical_col].fillna(0))

# Reset index to bring back the index columns
FS_afterSD_df = FS_afterSD_df.reset_index()
FS_afterBD3andAbove_df = FS_afterBD3andAbove_df.reset_index()
FS_afterAllBD_df = FS_afterAllBD_df.reset_index()

# Add Seat Type column
FS_afterSD_df['Seat Type'] = "Free Seats Aft. SD"
FS_afterBD3andAbove_df['Seat Type'] = "Free Seats Aft. BD (> stage3)"
FS_afterAllBD_df['Seat Type'] = "Free Seats Aft. all BD"

# Add FCST column
FS_afterSD_df['FCST'] = Scenario
FS_afterBD3andAbove_df['FCST'] = Scenario
FS_afterAllBD_df['FCST'] = Scenario


In [655]:
# New working for Production Capacity
# Define the column headers
index_columns = ["FY", "BI Geo", "Facility Name", "OU",]

# Create an empty DataFrame with the specified headers
Pro_Capacity_df = pd.DataFrame(columns=index_columns)



In [656]:
BaseCapacity_df = BaseCapacity_df.reset_index()
Support_Capacity_df = Support_Capacity_df.reset_index()

In [657]:

# Set index for each DataFrame without inplace=True
BaseCapacity_df = BaseCapacity_df.set_index(index_columns)
Support_Capacity_df = Support_Capacity_df.set_index(index_columns)

# Create the combined index
all_indexes = BaseCapacity_df.index.union(Support_Capacity_df.index).union(
    Pro_Capacity_df.set_index(index_columns).index
)

BaseCapacity_df = BaseCapacity_df.reindex(all_indexes, fill_value=0)
Support_Capacity_df = Support_Capacity_df.reindex(all_indexes, fill_value=0)
Pro_Capacity_df = Pro_Capacity_df.set_index(index_columns).reindex(all_indexes, fill_value=0)

# Perform the calculations with NaN values replaced by 0
Pro_Capacity_df = BaseCapacity_df[numerical_col].fillna(0) - Support_Capacity_df[numerical_col].fillna(0)

# Reset index to bring back the index columns
Pro_Capacity_df = Pro_Capacity_df.reset_index()
# Reset index to bring back the index columns Trial only for now not sure if this is making any difference in final output
BaseCapacity_df = BaseCapacity_df.reset_index()
Support_Capacity_df = Support_Capacity_df.reset_index()
# Add Seat Type column
Pro_Capacity_df['Seat Type'] = "Production Capacity"

# Add City column
Pro_Capacity_df = pd.merge(Pro_Capacity_df, OU_City_df, on='OU', how='left')

# Add FCST column
Pro_Capacity_df['FCST'] = Scenario

In [658]:
# New working for Zone Free Seats starts from here

In [659]:
# Working for Free_Seats_Zone_Aft_SD_df

# Set the index to ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
index_cols = ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
BaseCapacity_df = BaseCapacity_df.set_index(index_cols)
Support_Capacity_df = Support_Capacity_df.set_index(index_cols)
New_Client_Existing_Total_df = New_Client_Existing_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = BaseCapacity_df.index.union(Support_Capacity_df.index).union(New_Client_Existing_Total_df.index)

BaseCapacity_df = BaseCapacity_df.reindex(all_indexes, fill_value=0)
Support_Capacity_df = Support_Capacity_df.reindex(all_indexes, fill_value=0)
New_Client_Existing_Total_df = New_Client_Existing_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Free_Seats_Zone_Aft_SD_df = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + New_Client_Existing_Total_df[numerical_col].fillna(0))

# Reset index to bring back the index columns
Free_Seats_Zone_Aft_SD_df = Free_Seats_Zone_Aft_SD_df.reset_index()
BaseCapacity_df = BaseCapacity_df.reset_index()
Support_Capacity_df = Support_Capacity_df.reset_index()

In [660]:
# Working for Free Seats (Zone) Aft. BD (> stage3)

# Set the index to ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
index_cols = ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
BaseCapacity_df = BaseCapacity_df.set_index(index_cols)
Support_Capacity_df = Support_Capacity_df.set_index(index_cols)
New_Client_BD_3toExisting_Total_df = New_Client_BD_3toExisting_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = BaseCapacity_df.index.union(Support_Capacity_df.index).union(New_Client_BD_3toExisting_Total_df.index)

BaseCapacity_df = BaseCapacity_df.reindex(all_indexes, fill_value=0)
Support_Capacity_df = Support_Capacity_df.reindex(all_indexes, fill_value=0)
New_Client_BD_3toExisting_Total_df = New_Client_BD_3toExisting_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Free_Seats_Zone_Aft_BD_Stage3 = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + New_Client_BD_3toExisting_Total_df[numerical_col].fillna(0))

# Reset index to bring back the index columns
Free_Seats_Zone_Aft_BD_Stage3 = Free_Seats_Zone_Aft_BD_Stage3.reset_index()
BaseCapacity_df = BaseCapacity_df.reset_index()
Support_Capacity_df = Support_Capacity_df.reset_index()

In [661]:
# Working for Free Seats(Zone)  Aft. all BD

# Set the index to ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
index_cols = ['OU', 'Facility Name', 'FY', 'BI Geo', 'City']
BaseCapacity_df = BaseCapacity_df.set_index(index_cols)
Support_Capacity_df = Support_Capacity_df.set_index(index_cols)
New_Client_AllStage_Total_df = New_Client_AllStage_Total_df.set_index(index_cols)

# Align the indexes and fill missing rows/values with 0
all_indexes = BaseCapacity_df.index.union(Support_Capacity_df.index).union(New_Client_AllStage_Total_df.index)

BaseCapacity_df = BaseCapacity_df.reindex(all_indexes, fill_value=0)
Support_Capacity_df = Support_Capacity_df.reindex(all_indexes, fill_value=0)
New_Client_AllStage_Total_df = New_Client_AllStage_Total_df.reindex(all_indexes, fill_value=0)

# Perform the calculation with NaN values replaced by 0
Free_Seats_Zone_Aft_All_BD = BaseCapacity_df[numerical_col].fillna(0) - (Support_Capacity_df[numerical_col].fillna(0) + New_Client_AllStage_Total_df[numerical_col].fillna(0))

# Reset index to bring back the index columns
Free_Seats_Zone_Aft_All_BD = Free_Seats_Zone_Aft_All_BD.reset_index()
BaseCapacity_df = BaseCapacity_df.reset_index()
Support_Capacity_df = Support_Capacity_df.reset_index()

In [662]:
# Add Seat Type column
Free_Seats_Zone_Aft_SD_df['Seat Type'] = "Free Seats (Zone) Aft. SD"
Free_Seats_Zone_Aft_BD_Stage3['Seat Type'] = "Free Seats (Zone) Aft. BD (> stage3)"
Free_Seats_Zone_Aft_All_BD['Seat Type'] = "Free Seats(Zone)  Aft. all BD"


# Add FCST column
Free_Seats_Zone_Aft_SD_df['FCST'] = Scenario
Free_Seats_Zone_Aft_BD_Stage3['FCST'] = Scenario
Free_Seats_Zone_Aft_All_BD['FCST'] = Scenario

In [663]:
Trapped_Seats_Aft_SD_df.head(1)

Unnamed: 0,FY,FCST,BI Geo,Facility Name,OU,City,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,Seat Type
0,FY25,FY25-Dec Forecast,Geo Bulgaria,BLG_Burgas - Galleria,BG03,Burgas,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Trapped Seats Aft. SD


In [664]:
# Append all required dataframes to make final GCD

Final_df = pd.concat([BaseCapacity_df, Support_Capacity_df,Pro_Capacity_df, Support_df, Bi_Inputs_df, FS_afterSD_df,FS_afterBD3andAbove_df,FS_afterAllBD_df,Trapped_Seats_Aft_SD_df,Trapped_Seats_Aft_Stage3_df,Trapped_Seats_Aft_AllStage_df,Free_Seats_Zone_Aft_SD_df,Free_Seats_Zone_Aft_BD_Stage3,Free_Seats_Zone_Aft_All_BD], ignore_index=True)

# Replace NaN values with 0 in the specified numerical columns
Final_df[numerical_col] = Final_df[numerical_col].fillna(0)

# Replace NaN values in all other columns with " - "
Final_df = Final_df.apply(lambda x: x.fillna(" - ") if x.name not in numerical_col else x)

# Convert month column data types to integer using numerical_col
Final_df[numerical_col] = Final_df[numerical_col].astype(int)

# Merge the dataframes on the 'BI Geo' column to get Geo
Final_df = pd.merge(Final_df, BIGeo_Geo_df, on='BI Geo', how='left')

# Merge the dataframes on the 'BI Geo' column to get Geo Type
Final_df = pd.merge(Final_df, BIGeo_GeoType_df, on='BI Geo', how='left')

# Create the new column 'SD Geo' by concatenating 'Geo Type' and 'Geo' columns
Final_df["SD Geo"] = Final_df["Geo Type"] + " " +Final_df["Geo"]
Final_df["SD Geo OU"]= Final_df["SD Geo"] + "_" + Final_df["OU"]

# Create new columns for each quarter
Final_df['Q1'] = Final_df[['Jul', 'Aug', 'Sep']].mean(axis=1)
Final_df['Q2'] = Final_df[['Oct', 'Nov', 'Dec']].mean(axis=1)
Final_df['Q3'] = Final_df[['Jan', 'Feb', 'Mar']].mean(axis=1)
Final_df['Q4'] = Final_df[['Apr', 'May', 'Jun']].mean(axis=1)

# Create new columns for each half-year
Final_df['H1'] = Final_df[['Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']].mean(axis=1)
Final_df['H2'] = Final_df[['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']].mean(axis=1)

# Create a new column for the full year
Final_df['FY Average'] = Final_df[numerical_col].mean(axis=1)

# Extract the month from the Scenario
month_in_scenario = Scenario.split('-')[1].split()[0]

# Find the index of the month in numerical_col
start_index = numerical_col.index(month_in_scenario)

# Calculate the average from the specified month to June
if start_index <= numerical_col.index('Jun'):
    Final_df['FY FCST'] = Final_df[numerical_col[start_index:numerical_col.index('Jun') + 1]].mean(axis=1)
else:
    Final_df['FY FCST'] = Final_df[numerical_col[start_index:] + numerical_col[:numerical_col.index('Jun') + 1]].mean(axis=1)

    
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Q1', 'Q2', 'Q3', 'Q4', 'H1', 'H2', 'FY Average', 'FY FCST']
# Convert month column data types to integer using numerical_col
Final_df[numerical_col] = Final_df[numerical_col].astype(int)


In [665]:
Trapped_Seats_Aft_SD_df.columns

Index(['FY', 'FCST', 'BI Geo', 'Facility Name', 'OU', 'City', 'Jul', 'Aug',
       'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       'Seat Type'],
      dtype='object')

In [666]:
Final_df.columns

Index(['OU', 'Facility Name', 'FY', 'BI Geo', 'City', 'Seat Type', 'Jul',
       'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May',
       'Jun', 'FCST', 'Vertical', 'Job Code', 'Program Name', 'Customer',
       'Horizontal', 'Sum', 'Stage', 'Account', 'Status', 'Geo', 'Geo Type',
       'SD Geo', 'SD Geo OU', 'Q1', 'Q2', 'Q3', 'Q4', 'H1', 'H2', 'FY Average',
       'FY FCST'],
      dtype='object')

In [667]:
# Reordering the columns in Final GCD
Final_df = Final_df[['FCST', 'FY',  'BI Geo','Geo', "SD Geo","Geo Type","SD Geo OU",'OU', 'City', 'Facility Name', 'Vertical', 'Stage',
                     'Job Code', 'Customer', 'Seat Type','Program Name','Account', 'Jul', 'Aug', 'Sep', 'Oct',
                     'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Q1', 'Q2', 'Q3', 'Q4', 'H1', 'H2', 'FY Average', 'FY FCST']]

In [668]:
#Write all required data to new excel file
file_name = f'PythonGCD {Scenario}.xlsx'
# Try to export the merged dataframe to an Excel file
try:
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    with pd.ExcelWriter(file_name, engine='xlsxwriter') as writer:
        # Write each dataframe to a different worksheet.
        
        # Bi_Inputs_df.to_excel(writer, sheet_name='BI Inputs', index=False)
        Final_df.to_excel(writer, sheet_name='Final GCD', index=False)
    print(f"Data has been exported to {file_name}")
except PermissionError:
    print(f"The file '{file_name}' is already open. Please close the file and try again.")

Data has been exported to PythonGCD FY25-Dec Forecast.xlsx


In [669]:
import os
import win32com.client as win32


# Create an instance of Excel
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = True

# Open the workbook
workbook = excel.Workbooks.Open(os.path.abspath(file_name))

# Extract sheet name
sheet_name = 'Final GCD'

# Select the sheet with your data
sheet = workbook.Sheets(sheet_name)

# Define the range of your data dynamically
data_range = sheet.UsedRange

# Add a new sheet for the pivot table
pivot_sheet = workbook.Sheets.Add()
pivot_sheet.Name = 'City Summary'

# Create the pivot table
pivot_cache = workbook.PivotTableWizard(
    SourceType=win32.constants.xlDatabase,
    SourceData=data_range,
    TableDestination=pivot_sheet.Range('A1'),
    TableName='CitySummary'
)

# Configure the pivot table
pivot_table = pivot_sheet.PivotTables('CitySummary')

# Custom sort order for "Seat Type"
custom_sort_order = [
    "Base Capacity",
    "Production Capacity",
    "Support Capacity",
    "Production",
    "Digital Production",
    "Support",
    "BD Stage 5",
    "BD Stage 3 & 4",
    "BD Stage 2 & Below",
    "Free Seats Aft. SD",
    "Free Seats Aft. BD (> stage3)",
    "Free Seats Aft. all BD",
    "Free Seats (Zone) Aft. SD",
    "Free Seats (Zone) Aft. BD (> stage3)",
    "Free Seats(Zone)  Aft. all BD",
    "Trapped Seats Aft. SD",
    "Trapped Seats Aft. BD (> stage3)",
    "Trapped Seats Aft. all BD",    
]

# Check if pivot table is created successfully
if pivot_table:
    try:
        # Set up the pivot table fields
        pivot_table.PivotFields('Geo Type').Orientation = win32.constants.xlPageField  # Filter
        pivot_table.PivotFields('Geo').Orientation = win32.constants.xlPageField  # Filter

        # Add row fields
        row_fields = ['City', 'BI Geo', 'Seat Type', 'Vertical', 'Customer']
        for field in row_fields:
            if field in [f.Name for f in pivot_table.PivotFields()]:
                pivot_table.PivotFields(field).Orientation = win32.constants.xlRowField
                pivot_table.PivotFields(field).Subtotals = [False] * 12  # Remove subtotals
                pivot_table.PivotFields(field).RepeatLabels = True  # Repeat item labels
            else:
                print(f"Field '{field}' not found in pivot table.")

        # Add month fields as values
        for month in numerical_col:
            if month in [f.Name for f in pivot_table.PivotFields()]:
                data_field = pivot_table.PivotFields(month)
                data_field.Orientation = win32.constants.xlDataField
                data_field.Function = win32.constants.xlSum  # Aggregation function (Sum)
                data_field.Name = f'Sum of {month}'
            else:
                print(f"Field '{month}' not found in pivot table.")

        # Ensure data fields are displayed as columns
        pivot_table.DataPivotField.Orientation = win32.constants.xlColumnField

        # Set pivot table layout to tabular form
        pivot_table.ShowTableStyleRowStripes = False
        pivot_table.TableStyle2 = 'PivotStyleMedium6'  # Change to any style you prefer
        pivot_table.RowAxisLayout(win32.constants.xlTabularRow)

        # Apply custom sort order to "Seat Type"
        seat_type_field = pivot_table.PivotFields('Seat Type')
        seat_type_field.AutoSort(win32.constants.xlManual, 'Seat Type')
        for i, item in enumerate(custom_sort_order):
            seat_type_field.PivotItems(item).Position = i + 1

        # Save the workbook directly
        workbook.Save()

        # Close the workbook and quit Excel
        workbook.Close(SaveChanges=True)
        excel.Quit()
    except Exception as e:
        print(f"An error occurred: {e}")
else:
    print("Pivot table creation failed.")