In [1]:
import pandas as pd
import numpy as np
import os

# Load the Excel file
excel_file = pd.ExcelFile('Template v2.xlsx')

# Get the list of sheet names
sheet_names = excel_file.sheet_names

# Print the list of sheet names
print(f'Available sheets in the file are- {sheet_names}')

Available sheets in the file are- ['Base Capacity', 'Support', 'Support Capacity', 'BI Inputs', 'Sheet1', 'Format', 'Mapping']


In [2]:
# Load the sheets into dataframes
Mapping_df = pd.read_excel(excel_file, sheet_name="Mapping")
BaseCapacity_df = pd.read_excel(excel_file, sheet_name="Base Capacity")
Support_Capacity_df = pd.read_excel(excel_file, sheet_name="Support Capacity")
Support_df = pd.read_excel(excel_file, sheet_name="Support")
Bi_Inputs_df = pd.read_excel(excel_file, sheet_name="BI Inputs")

In [3]:
numerical_col = ['Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

In [4]:
Mapping_df.columns

Index(['OU', 'Current OU Description', 'Facility Name', 'City', 'Geo Type',
       'BI Geo', 'Geo', 'Gross Revenue', 'Seats - FCT', 'Seats', 'HC', 'Staus',
       'Additional Comments'],
      dtype='object')

In [5]:
#Mapping Sheet Data Workings

# Select only the "OU" and "Facility Name" columns
OU_Facility_df = Mapping_df[['OU', 'Facility Name']]
# Remove duplicates from OU_Facility_df
OU_Facility_df = OU_Facility_df.drop_duplicates(subset='OU')

# Select only the "OU", and "City" columns
OU_City_df = Mapping_df[['OU', 'City']]
# Remove duplicates from OU_City_df
OU_City_df = OU_City_df.drop_duplicates(subset='OU')

# Select only the "OU", and "BI" columns this is only used for BI inputs BI Geo validation
OU_BIGeo_df = Mapping_df[['OU', 'BI Geo']]
# Remove duplicates from OU_BIGeo_df
OU_BIGeo_df = OU_BIGeo_df.drop_duplicates(subset='OU')

# Select only the "BI Geo", and "Geo" columns
BIGeo_Geo_df = Mapping_df[['BI Geo', 'Geo']]
# Remove duplicates from BIGeo_Geo_df This is mandatory to remove duplicate entries
BIGeo_Geo_df = BIGeo_Geo_df.drop_duplicates(subset='BI Geo')

# Select only the "BI Geo", and "Geo Type" columns
BIGeo_GeoType_df = Mapping_df[['BI Geo', 'Geo Type']]
# Remove duplicates from BIGeo_GeoType_df This is mandatory to remove duplicate entries
BIGeo_GeoType_df = BIGeo_GeoType_df.drop_duplicates(subset='BI Geo')


In [6]:
BIGeo_Geo_df

Unnamed: 0,BI Geo,Geo
0,Geo Bulgaria,Bulgaria
3,Geo Canada,Canada
4,Geo China,China
6,Geo Colombia,Colombia
9,Geo Egypt,Egypt
12,Geo India,India
29,Geo India Domestic,India
30,Geo Jamaica,Jamaica
35,Geo Kosovo,Kosovo
36,Geo Malaysia,Malaysia


In [7]:
#BI Inputs Sheet Data Working
# Rename specific columns
Bi_Inputs_df = Bi_Inputs_df.rename(columns={
    "LE[Scenario]": "FCST",
    "LE[Client]": "Customer",
    "LE[Horizontal]": "Horizontal",
    "LE[Stage]": "Stage",
    "LE[Vertical]": "Vertical",
    "LE[SD_Geo]": "BI Geo",
    "LE[OU_DESCR]": "OU",
    "LE[Project_DESCR]": "Program Name",
    "LE[Account]": "Account",
    "LE[Year]": "FY",
    "[SumJul]": "Jul",
    "[SumAug]": "Aug",
    "[SumSep]": "Sep",
    "[SumOct]": "Oct",
    "[SumNov]": "Nov",
    "[SumDec]": "Dec",
    "[SumJan]": "Jan",
    "[SumFeb]": "Feb",
    "[SumMar]": "Mar",
    "[SumApr]": "Apr",
    "[SumMay]": "May",
    "[SumJun]": "Jun"
})


columns_to_keep = [
    "FCST", "Customer", "Horizontal", "Stage", "Vertical", 
    "BI Geo", "OU", "Program Name", "FY", "Account"]+numerical_col
    
# Select only the specified columns
Bi_Inputs_df = Bi_Inputs_df[columns_to_keep]

#Fillter Account to select only Seats for Allocation_Adj_Store
Bi_Inputs_df = Bi_Inputs_df[Bi_Inputs_df['Account'] == "Seats for Allocation_Adj_Store"]

# Add a new column 'Seat Type' with all values set to 'Production'
# Bi_Inputs_df['Seat Type'] = 'Production'

# Define a function to determine the 'Seat Type' based on the 'Stage' value
def determine_seat_type(stage):
    if stage == 'Existing':
        return 'Production'
    elif stage == 'Stage 5':
        return 'BD Stage 5'
    elif stage in ['Stage 3', 'Stage 4']:
        return 'BD Stage 3 & 4'
    else:
        return 'BD Stage 2 & Below'
    
# Apply the function to create the new column 'Seat Type'
Bi_Inputs_df['Seat Type'] = Bi_Inputs_df['Stage'].apply(determine_seat_type)

# Keep only the first 4 characters of the 'OU' column
Bi_Inputs_df['OU'] = Bi_Inputs_df['OU'].str.slice(0, 4)

# Remove the first 2 characters from the 'Customer' column
Bi_Inputs_df['Customer'] = Bi_Inputs_df['Customer'].str.slice(2)

# Remove the last 2 characters from the 'Vertical' column
Bi_Inputs_df['Vertical'] = Bi_Inputs_df['Vertical'].str.slice(0, -2)

# Remove the last 2 characters from the 'Horizontal' column
Bi_Inputs_df['Horizontal'] = Bi_Inputs_df['Horizontal'].str.slice(0, -2)

# Create a new column 'Job Code' based on the provided conditions
Bi_Inputs_df['Job Code'] = Bi_Inputs_df['Program Name'].apply(lambda x: x[:5] if x[:1].isdigit() else x[:18])

# Merge the dataframes on the 'OU' column
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_Facility_df, on='OU', how='left')
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_City_df, on='OU', how='left')
Bi_Inputs_df = pd.merge(Bi_Inputs_df, OU_BIGeo_df, on='OU', how='left')




# Replace 'BI Geo_x' with 'BI Geo_y' where they are not equal
Bi_Inputs_df['BI Geo_x'] = np.where(Bi_Inputs_df['BI Geo_x'] != Bi_Inputs_df['BI Geo_y'], Bi_Inputs_df['BI Geo_y'], Bi_Inputs_df['BI Geo_x'])

# Drop the 'BI Geo_y' column
Bi_Inputs_df = Bi_Inputs_df.drop(columns=['BI Geo_y'])

# Rename 'BI Geo_x' to 'BI Geo'
Bi_Inputs_df = Bi_Inputs_df.rename(columns={'BI Geo_x': 'BI Geo'})

# replace NaN values with zero. This is optional if incase there is no value provided.
Bi_Inputs_df[numerical_col] = Bi_Inputs_df[numerical_col].fillna(0)

# Convert month column data types to integer using numerical_col
Bi_Inputs_df[numerical_col] = Bi_Inputs_df[numerical_col].astype(int)


In [8]:
Bi_Inputs_df

Unnamed: 0,FCST,Customer,Horizontal,Stage,Vertical,BI Geo,OU,Program Name,FY,Account,...,Jan,Feb,Mar,Apr,May,Jun,Seat Type,Job Code,Facility Name,City
0,FY25-Jan Forecast,Columbia University,Healthcare,Existing,Healthcare,Geo SHS India,IN27,12054_COLUMBIA - RCM FBO,FY25,Seats for Allocation_Adj_Store,...,9,9,9,9,9,9,Production,12054,IND_Hyderabad - Lanco SEZ-T99-6F,Hyderabad
1,FY25-Jan Forecast,Columbia University,Healthcare,Existing,Healthcare,Geo SHS India,IN46,12054_COLUMBIA - RCM FBO,FY25,Seats for Allocation_Adj_Store,...,54,54,54,54,54,54,Production,12054,IND_SHS_Chennai - Shriram Gateway-B2,Chennai
2,FY25-Jan Forecast,Columbia University,Healthcare,Existing,Healthcare,Geo SHS US,US02,12054_COLUMBIA - RCM FBO,FY25,Seats for Allocation_Adj_Store,...,2,2,2,2,2,2,Production,12054,USA_Clifton - Brighton Rd,Clifton
3,FY25-Jan Forecast,Government of Bermuda,Healthcare,Existing,Healthcare,Geo SHS India,IN27,12164_The Govt. of Bermuda - TPA,FY25,Seats for Allocation_Adj_Store,...,0,0,0,0,0,0,Production,12164,IND_Hyderabad - Lanco SEZ-T99-6F,Hyderabad
4,FY25-Jan Forecast,Government of Bermuda,Healthcare,Existing,Healthcare,Geo SHS India,IN46,12164_The Govt. of Bermuda - TPA,FY25,Seats for Allocation_Adj_Store,...,0,0,0,0,0,0,Production,12164,IND_SHS_Chennai - Shriram Gateway-B2,Chennai
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,FY25-Jan Forecast,NetApp Inc,Enterprise Business services,Existing,Technology,Geo Colombia,CO03,28083_NetApp_HARDWARE_Bogota,FY25,Seats for Allocation_Adj_Store,...,31,31,31,31,31,31,Production,28083,COL_Bogota - Torre Krystal,Bogota
1015,FY25-Jan Forecast,Los Angeles County Health Services,Healthcare,Existing,Healthcare,Geo SHS India,IN27,21522_LAC Coding,FY25,Seats for Allocation_Adj_Store,...,0,0,0,0,0,0,Production,21522,IND_Hyderabad - Lanco SEZ-T99-6F,Hyderabad
1016,FY25-Jan Forecast,SAP America,Digital Business Services,Existing,Technology,Geo India,IN29,10082_SAP - Concur Invoice Pilot,FY25,Seats for Allocation_Adj_Store,...,45,45,45,45,45,45,Production,10082,IND_SGS_Hyderabad-LHTPL SEZ T99-7_8F,Hyderabad
1017,FY25-Jan Forecast,SAP America,Digital Business Services,Existing,Technology,Geo India,IN45,10082_SAP - Concur Invoice Pilot,FY25,Seats for Allocation_Adj_Store,...,43,43,43,43,43,43,Production,10082,IND_SGS_Chennai - Shriram Gateway-B2,Chennai


In [9]:
# Extract the value from the first row of the "FCST" column
Scenario = Bi_Inputs_df.loc[0, "FCST"]

# Print the extracted value
print(f'Scenario: {Scenario}')

Scenario: FY25-Jan Forecast


In [10]:
Existing_Total_df = Bi_Inputs_df
BD_3andAbove_Total_df = Bi_Inputs_df
BD_2andBelow_Total_df = Bi_Inputs_df

In [11]:
StageLevelExisting = ['Existing']
StageLevel3to5 = ['Stage 3','Stage 4','Stage 5']
StageLevel2toBelow = ['Stage 1', 'Stage 2', 'Stage_Other']

In [12]:
# Filter the DataFrame to include only rows based on Stage
Existing_Total_df = Existing_Total_df[Existing_Total_df['Stage'].isin(StageLevelExisting)]
BD_3andAbove_Total_df = BD_3andAbove_Total_df[BD_3andAbove_Total_df['Stage'].isin(StageLevel3to5)]
BD_2andBelow_Total_df = BD_2andBelow_Total_df[BD_2andBelow_Total_df['Stage'].isin(StageLevel2toBelow)]

In [13]:
# Grouping the data to have single row item data for all 
Existing_Total_df = Existing_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
BD_3andAbove_Total_df = BD_3andAbove_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU',"City"])[numerical_col].sum().reset_index().fillna(0)
BD_2andBelow_Total_df = BD_2andBelow_Total_df.groupby(["FY","FCST",'BI Geo',"Facility Name" ,'OU', "City"])[numerical_col].sum().reset_index().fillna(0)



In [14]:
Existing_Total_df

Unnamed: 0,FY,FCST,BI Geo,Facility Name,OU,City,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun
0,FY25,FY25-Jan Forecast,Geo Bulgaria,BLG_Burgas - Galleria,BG03,Burgas,59,59,59,59,59,59,27,27,27,27,27,27
1,FY25,FY25-Jan Forecast,Geo Bulgaria,BLG_Sofia - BBC,BG07,Sofia,343,300,284,284,284,281,220,220,220,220,220,220
2,FY25,FY25-Jan Forecast,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,BG04,Sofia,71,67,69,69,69,69,57,57,57,57,57,57
3,FY25,FY25-Jan Forecast,Geo Canada,CAN_Windsor - Market Square,CA03,Windsor,197,194,203,195,195,195,178,171,168,174,172,177
4,FY25,FY25-Jan Forecast,Geo Colombia,COL_Barranquilla - Centro comercial,CO01,Barranquilla,974,1253,1254,1254,1254,1254,1254,1254,1254,1254,1254,1254
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,FY25,FY25-Jan Forecast,Geo South Africa,RSA_Pretoria,RS01,Pretoria,70,70,70,70,70,70,70,70,70,70,70,70
66,FY25,FY25-Jan Forecast,Geo US,USA_Common,US99,Common,0,0,0,0,0,0,5,5,5,5,5,5
67,FY25,FY25-Jan Forecast,Geo US,USA_Pittsford - Sullys Trail,US24,Rochester,183,183,183,183,183,183,162,162,162,146,146,146
68,FY25,FY25-Jan Forecast,Geo US,USA_Rochester - Jeff Road,US09,Rochester,4,4,4,4,4,4,3,3,3,4,3,3


In [15]:
#Base Capacity Sheet Data Working

# Grouping to sum up Base capacity if incase multiple entries or do manual check to remove duplicates from raw data
BaseCapacity_df = BaseCapacity_df.groupby(["FY",'OU','BI Geo',"Facility Name" ,"Seat Type"])[numerical_col].sum().reset_index().fillna(0)

# Merge the dataframes on the 'OU' column
BaseCapacity_df = pd.merge(BaseCapacity_df, OU_City_df, on='OU', how='left')

# Add FCST column
BaseCapacity_df['FCST']= Scenario

In [16]:
BaseCapacity_df

Unnamed: 0,FY,OU,BI Geo,Facility Name,Seat Type,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,City,FCST
0,FY25,BG03,Geo Bulgaria,BLG_Burgas - Galleria,Base Capacity,212,209,209,209,209,209,209,209,209,209,209,209,Burgas,FY25-Jan Forecast
1,FY25,BG04,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,Base Capacity,114,106,90,90,90,90,90,90,90,90,90,90,Sofia,FY25-Jan Forecast
2,FY25,BG07,Geo Bulgaria,BLG_Sofia - BBC,Base Capacity,538,538,538,538,538,538,538,538,538,538,538,538,Sofia,FY25-Jan Forecast
3,FY25,CA03,Geo Canada,CAN_Windsor - Market Square,Base Capacity,218,218,218,210,210,210,210,210,210,210,210,210,Windsor,FY25-Jan Forecast
4,FY25,CO01,Geo Colombia,COL_Barranquilla - Centro comercial,Base Capacity,1356,1361,1359,1359,1359,1359,1359,1359,1359,1359,1359,1359,Barranquilla,FY25-Jan Forecast
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,FY25,US13,Geo SHS US,USA_Springfield - MacArthur Blvd,Base Capacity,10,10,10,10,10,10,10,10,10,10,10,10,Springfield,FY25-Jan Forecast
64,FY25,US15,Geo SHS US,USA_Torrance - S Western Ave,Base Capacity,57,57,57,57,57,57,57,57,57,57,57,57,Torrance,FY25-Jan Forecast
65,FY25,US16,Geo US,USA_Tulsa - Corporate Woods,Base Capacity,0,32,35,35,35,35,35,35,35,35,35,35,Tulsa,FY25-Jan Forecast
66,FY25,US24,Geo US,USA_Pittsford - Sullys Trail,Base Capacity,208,208,208,208,208,208,208,208,208,208,208,208,Rochester,FY25-Jan Forecast


In [17]:
#Support Capacity Sheet Data Working

# Grouping to sum up Support capacity if incase multiple entries or do manual check to remove duplicates from raw data
Support_Capacity_df = Support_Capacity_df.groupby(["FY",'OU','BI Geo',"Facility Name" ,"Seat Type"])[numerical_col].sum().reset_index().fillna(0)

# Merge the dataframes on the 'OU' column
Support_Capacity_df = pd.merge(Support_Capacity_df, OU_City_df, on='OU', how='left')
# replace NaN values with zero. This is optional if incase there is no value provided.
Support_Capacity_df[numerical_col] = Support_Capacity_df[numerical_col].fillna(0)

# Add FCST column
Support_Capacity_df['FCST']= Scenario


In [18]:
Support_Capacity_df

Unnamed: 0,FY,OU,BI Geo,Facility Name,Seat Type,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,City,FCST
0,FY25,BG03,Geo Bulgaria,BLG_Burgas - Galleria,Support Capacity,15,15,15,15,15,15,15,15,15,15,15,15,Burgas,FY25-Jan Forecast
1,FY25,BG04,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,Support Capacity,5,5,5,5,5,5,5,5,5,5,5,5,Sofia,FY25-Jan Forecast
2,FY25,BG07,Geo Bulgaria,BLG_Sofia - BBC,Support Capacity,70,70,70,70,70,80,80,80,80,80,80,80,Sofia,FY25-Jan Forecast
3,FY25,CA03,Geo Canada,CAN_Windsor - Market Square,Support Capacity,15,15,15,15,15,15,15,15,15,15,15,15,Windsor,FY25-Jan Forecast
4,FY25,CO01,Geo Colombia,COL_Barranquilla - Centro comercial,Support Capacity,108,108,105,105,105,105,105,105,105,105,105,105,Barranquilla,FY25-Jan Forecast
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,FY25,US13,Geo SHS US,USA_Springfield - MacArthur Blvd,Support Capacity,4,0,3,3,3,3,3,3,3,3,3,3,Springfield,FY25-Jan Forecast
64,FY25,US15,Geo SHS US,USA_Torrance - S Western Ave,Support Capacity,7,7,7,7,7,7,7,7,7,7,7,7,Torrance,FY25-Jan Forecast
65,FY25,US16,Geo US,USA_Tulsa - Corporate Woods,Support Capacity,0,0,0,0,0,0,0,0,0,0,0,0,Tulsa,FY25-Jan Forecast
66,FY25,US24,Geo US,USA_Pittsford - Sullys Trail,Support Capacity,25,25,25,25,25,25,25,25,25,25,25,25,Rochester,FY25-Jan Forecast


In [19]:
#Support Sheet Data Working
#Fillter Sum column to remove rows if its value is 0
Support_df = Support_df[Support_df['Sum'] != 0]
# Merge the dataframes on the 'OU' column
Support_df = pd.merge(Support_df, OU_City_df, on='OU', how='left')
# replace NaN values with zero. This is optional if incase there is no value provided.
Support_df[numerical_col] = Support_df[numerical_col].fillna(0)

# Add FCST column
Support_df['FCST']= Scenario


In [20]:
Support_df

Unnamed: 0,FY,OU,BI Geo,Facility Name,Vertical,Job Code,Program Name,Customer,Horizontal,Seat Type,...,Dec,Jan,Feb,Mar,Apr,May,Jun,Sum,City,FCST
0,FY25,IN15,Geo India,IND_SGS_Chennai - Shriram Gateway-A1-5FB,Corporate,91000,Service Delivery,Corporate,Others,Support,...,1,1,1,1,1,1,1,12,Chennai,FY25-Jan Forecast
1,FY25,IN15,Geo India,IND_SGS_Chennai - Shriram Gateway-A1-5FB,Corporate,92201,Facility & Property Mgmt - SGA,Facilities,Others,Support,...,6,6,6,6,6,6,6,72,Chennai,FY25-Jan Forecast
2,FY25,IN13,Geo India,IND_SGS_Chennai - Shriram Gateway-A1-6F,Corporate,92201,Facility & Property Mgmt - SGA,Facilities,Others,Support,...,7,7,7,7,7,7,7,84,Chennai,FY25-Jan Forecast
3,FY25,IN13,Geo India,IND_SGS_Chennai - Shriram Gateway-A1-6F,Corporate,92104,Security Management - GM,Facilities,Others,Support,...,6,6,6,6,6,6,6,72,Chennai,FY25-Jan Forecast
4,FY25,IN13,Geo India,IND_SGS_Chennai - Shriram Gateway-A1-6F,Corporate,95103,Deskside Support,Technology IT,Others,Support,...,11,11,11,11,11,11,11,127,Chennai,FY25-Jan Forecast
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
426,FY25,IN25,Geo SHS India,IND_SHS_Hyderabad - Lanco SEZ-T99-9_10F,Corporate,97225,Recruitment - Healthcare,Recruitment,Others,Support,...,0,8,8,8,8,8,8,88,Hyderabad,FY25-Jan Forecast
427,FY25,IN55,Geo SHS India,IND_SHS_Hyderabad - Divyasree,Corporate,86225,SQMS - PY Quality BLM,SQMS,Others,Support,...,13,13,13,13,13,13,13,152,Hyderabad,FY25-Jan Forecast
428,FY25,IN55,Geo SHS India,IND_SHS_Hyderabad - Divyasree,Corporate,86226,SQMS - PY Quality CLM,SQMS,Others,Support,...,0,0,0,0,0,0,0,1,Hyderabad,FY25-Jan Forecast
429,FY25,IN55,Geo SHS India,IND_SHS_Hyderabad - Divyasree,Corporate,88136,PY - Operational Mgmt (Payer),SHSI - Common,BPaaS,Support,...,7,3,3,3,3,3,3,39,Hyderabad,FY25-Jan Forecast


In [21]:
# Set index for each DataFrame without inplace=True
BC_df = BaseCapacity_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"])
SC_df = Support_Capacity_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"])

# Ensure indexes are unique
BC_df = BC_df[~BC_df.index.duplicated(keep='first')]
SC_df = SC_df[~SC_df.index.duplicated(keep='first')]

# Align the indexes and fill missing rows/values with 0
all_indexes = BC_df.index.union(SC_df.index).union(
    Existing_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
).union(
    BD_3andAbove_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
).union(
    BD_2andBelow_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).index
)

BC_df = BC_df.reindex(all_indexes, fill_value=0)
SC_df = SC_df.reindex(all_indexes, fill_value=0)
EX_df = Existing_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)
BD_3andAbove_df = BD_3andAbove_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)
BD_2andBelow_df = BD_2andBelow_Total_df.set_index(["OU", "Facility Name", "FY", "BI Geo", "City"]).reindex(all_indexes, fill_value=0)

# Perform the calculations with NaN values replaced by 0
FS_afterSD_df = BC_df[numerical_col].fillna(0) - (SC_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0))
FS_afterBD3andAbove_df = BC_df[numerical_col].fillna(0) - (SC_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0) + BD_3andAbove_df[numerical_col].fillna(0))
FS_afterAllBD_df = BC_df[numerical_col].fillna(0) - (SC_df[numerical_col].fillna(0) + EX_df[numerical_col].fillna(0) + BD_3andAbove_df[numerical_col].fillna(0) + BD_2andBelow_df[numerical_col].fillna(0))

# Reset index to bring back the index columns
FS_afterSD_df = FS_afterSD_df.reset_index()
FS_afterBD3andAbove_df = FS_afterBD3andAbove_df.reset_index()
FS_afterAllBD_df = FS_afterAllBD_df.reset_index()

# Add Seat Type column
FS_afterSD_df['Seat Type'] = "Free Seats After SD"
FS_afterBD3andAbove_df['Seat Type'] = "Free Seats After BD Stage 3 and Above"
FS_afterAllBD_df['Seat Type'] = "Free Seats After all BD"

# Add FCST column
FS_afterSD_df['FCST'] = Scenario
FS_afterBD3andAbove_df['FCST'] = Scenario
FS_afterAllBD_df['FCST'] = Scenario


In [22]:
# New working for Production Capacity
# Define the column headers
index_columns = ["FY", "BI Geo", "Facility Name", "OU", "City"]

# Create an empty DataFrame with the specified headers
Pro_Capacity_df = pd.DataFrame(columns=index_columns)
PC_df = Pro_Capacity_df

# Set index for each DataFrame without inplace=True
BC_df = BaseCapacity_df.set_index(index_columns)
SC_df = Support_Capacity_df.set_index(index_columns)

# Ensure indexes are unique
BC_df = BC_df[~BC_df.index.duplicated(keep='first')]
SC_df = SC_df[~SC_df.index.duplicated(keep='first')]

# Create the combined index
all_indexes = BC_df.index.union(SC_df.index).union(
    Pro_Capacity_df.set_index(index_columns).index
)

BC_df = BC_df.reindex(all_indexes, fill_value=0)
SC_df = SC_df.reindex(all_indexes, fill_value=0)
PC_df = PC_df.set_index(index_columns).reindex(all_indexes, fill_value=0)

# Perform the calculations with NaN values replaced by 0
PC_df = BC_df[numerical_col].fillna(0) - SC_df[numerical_col].fillna(0)

# Reset index to bring back the index columns
PC_df = PC_df.reset_index()
# Reset index to bring back the index columns Trial only for now not sure if this is making any difference in final output
BC_df = BC_df.reset_index()
SC_df = SC_df.reset_index()
# Add Seat Type column
PC_df['Seat Type'] = "Production Capacity"

# Add FCST column
PC_df['FCST'] = Scenario

In [23]:
PC_df.head(2)

Unnamed: 0,FY,BI Geo,Facility Name,OU,City,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,Seat Type,FCST
0,FY25,Geo Bulgaria,BLG_Burgas - Galleria,BG03,Burgas,197,194,194,194,194,194,194,194,194,194,194,194,Production Capacity,FY25-Jan Forecast
1,FY25,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,BG04,Sofia,109,101,85,85,85,85,85,85,85,85,85,85,Production Capacity,FY25-Jan Forecast


In [24]:
BaseCapacity_df.head(2)

Unnamed: 0,FY,OU,BI Geo,Facility Name,Seat Type,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,City,FCST
0,FY25,BG03,Geo Bulgaria,BLG_Burgas - Galleria,Base Capacity,212,209,209,209,209,209,209,209,209,209,209,209,Burgas,FY25-Jan Forecast
1,FY25,BG04,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,Base Capacity,114,106,90,90,90,90,90,90,90,90,90,90,Sofia,FY25-Jan Forecast


In [25]:
BC_df.head(2)

Unnamed: 0,FY,BI Geo,Facility Name,OU,City,Seat Type,Jul,Aug,Sep,Oct,Nov,Dec,Jan,Feb,Mar,Apr,May,Jun,FCST
0,FY25,Geo Bulgaria,BLG_Burgas - Galleria,BG03,Burgas,Base Capacity,212,209,209,209,209,209,209,209,209,209,209,209,FY25-Jan Forecast
1,FY25,Geo Bulgaria,BLG_Sofia - Black Sea Capital Center,BG04,Sofia,Base Capacity,114,106,90,90,90,90,90,90,90,90,90,90,FY25-Jan Forecast


In [26]:
# Append all required dataframes to make final GCD

Final_df = pd.concat([BC_df, SC_df,PC_df, Support_df, Bi_Inputs_df, FS_afterSD_df,FS_afterBD3andAbove_df,FS_afterAllBD_df], ignore_index=True)

# Replace NaN values with 0 in the specified numerical columns
Final_df[numerical_col] = Final_df[numerical_col].fillna(0)

# Replace NaN values in all other columns with " - "
Final_df = Final_df.apply(lambda x: x.fillna(" - ") if x.name not in numerical_col else x)

# Convert month column data types to integer using numerical_col
Final_df[numerical_col] = Final_df[numerical_col].astype(int)

# Merge the dataframes on the 'BI Geo' column to get Geo
Final_df = pd.merge(Final_df, BIGeo_Geo_df, on='BI Geo', how='left')

# Merge the dataframes on the 'BI Geo' column to get Geo Type
Final_df = pd.merge(Final_df, BIGeo_GeoType_df, on='BI Geo', how='left')

# Create the new column 'SD Geo' by concatenating 'Geo Type' and 'Geo' columns
Final_df["SD Geo"] = Final_df["Geo Type"] + " " +Final_df["Geo"]
Final_df["SD Geo OU"]= Final_df["SD Geo"] + "_" + Final_df["OU"]




In [27]:
# Reording the columns in Final GCD
Final_df.columns

Index(['FY', 'BI Geo', 'Facility Name', 'OU', 'City', 'Seat Type', 'Jul',
       'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May',
       'Jun', 'FCST', 'Vertical', 'Job Code', 'Program Name', 'Customer',
       'Horizontal', 'Sum', 'Stage', 'Account', 'Geo', 'Geo Type', 'SD Geo',
       'SD Geo OU'],
      dtype='object')

In [28]:
# Reordering the columns in Final GCD
Final_df = Final_df[['FCST', 'FY',  'BI Geo','Geo', "SD Geo","Geo Type","SD Geo OU",'OU', 'City', 'Facility Name', 'Vertical', 'Stage',
                     'Job Code', 'Customer', 'Seat Type','Account', 'Jul', 'Aug', 'Sep', 'Oct',
                     'Nov', 'Dec', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']]

In [29]:
#Write all required data to new excel file
# Try to export the merged dataframe to an Excel file
try:
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    with pd.ExcelWriter('PythonGCD.xlsx', engine='xlsxwriter') as writer:
        # Write each dataframe to a different worksheet.
        # OU_Facility_df.to_excel(writer, sheet_name='OU_Facility', index=False)
        # OU_City_df.to_excel(writer, sheet_name='OU_City', index=False)
        # BaseCapacity_df.to_excel(writer, sheet_name='Base Capacity', index=False)
        # PC_df.to_excel(writer, sheet_name='Production Capacity', index=False)
        # Support_Capacity_df.to_excel(writer, sheet_name='Support Capacity', index=False)
        # BIGeo_Geo_df.to_excel(writer, sheet_name='BIGeo_Geo_df', index=False)
        
        # Support_df.to_excel(writer, sheet_name='Support', index=False)
        # Existing_Total_df.to_excel(writer, sheet_name='Existing Total', index=False)
        # FS_afterSD_df.to_excel(writer, sheet_name='Free Seats after SD', index=False)
        # Bi_Inputs_df.to_excel(writer, sheet_name='BI Inputs', index=False)
        Final_df.to_excel(writer, sheet_name='Final GCD', index=False)
    print("Data has been exported to 'PythonGCD.xlsx'")
except PermissionError:
    print("The file 'PythonGCD.xlsx' is already open. Please close the file and try again.")

Data has been exported to 'PythonGCD.xlsx'


In [30]:
Trial_df = pd.concat([BC_df, SC_df,PC_df, Support_df, Bi_Inputs_df, FS_afterAllBD_df], ignore_index=True)


In [31]:
Trial_df.head

<bound method NDFrame.head of         FY        BI Geo                         Facility Name    OU  \
0     FY25  Geo Bulgaria                 BLG_Burgas - Galleria  BG03   
1     FY25  Geo Bulgaria  BLG_Sofia - Black Sea Capital Center  BG04   
2     FY25  Geo Bulgaria                       BLG_Sofia - BBC  BG07   
3     FY25    Geo Canada           CAN_Windsor - Market Square  CA03   
4     FY25  Geo Colombia   COL_Barranquilla - Centro comercial  CO01   
...    ...           ...                                   ...   ...   
1735  FY25        Geo US          USA_Pittsford - Sullys Trail  US24   
1736  FY25        Geo US                              USA_Troy  US25   
1737  FY25        Geo US                          USA_Off Site  US52   
1738  FY25        Geo US                           USA_WAH DES  US59   
1739  FY25        Geo US                            USA_Common  US99   

               City                Seat Type   Jul   Aug   Sep   Oct  ...  \
0            Burgas         