### Imports

In [2]:
import pandas as pd

### Define a dictionary for Infor's relevant activities

In [3]:
activities = [
    ("NB_Infor_101_Ingest_Notebooks"),
    ("NB_Infor_100_FSM_Tables"),
    ("NB_Infor_100_GeneralLedgerTransactionDetail_Small"),
    ("NB_Infor_100_FSM_GeneralLedgerTotal"),
    ("NB_Infor_200_Curated_Gold_Tables"),
    ("NB_Infor_200_DAG"),
    ("NB_Infor_102_Landing_To_Bronze"),
    ("NB_Infor_103_Bronze_To_Silver"),
    ("NB_Infor_104_Silver_To_Gold"),
    ("NB_Infor_200_FinanceJobCost"),
    ("NB_Infor_200_WIP"),
    ("NB_Infor_200_HardDollar"),
    ("NB_Infor_200_InvoiceAudit"),
    ("NB_Infor_200_Profit_And_Loss"),
    ("NB_Infor_200_JobCostDetail"),
    ("NB_Infor_200_ITActuals"),
    ("NB_Infor_200_AMWorkforcePerformance"),
    ("PL_Infor_100_IngestMaster_AT"),
    ("PL_Infor_101_IngestOrchestrator_AT")
]

activities_df = pd.DataFrame(activities, columns=["Item_name"])
display(activities_df)

Unnamed: 0,Item_name
0,NB_Infor_101_Ingest_Notebooks
1,NB_Infor_100_FSM_Tables
2,NB_Infor_100_GeneralLedgerTransactionDetail_Small
3,NB_Infor_100_FSM_GeneralLedgerTotal
4,NB_Infor_200_Curated_Gold_Tables
5,NB_Infor_200_DAG
6,NB_Infor_102_Landing_To_Bronze
7,NB_Infor_103_Bronze_To_Silver
8,NB_Infor_104_Silver_To_Gold
9,NB_Infor_200_FinanceJobCost


### Load The Data

In [4]:
# Load data
excel_file = "Fabric_CU_metrics14days.xlsx"
df = pd.read_excel(excel_file)
csv_file = "fabric_capacity_units.csv" 
df.to_csv(csv_file, index=False)

#Remove total row in dataset
df = df[df["Workspace"] != "Total"]

columns_to_drop = ["Billing type", "Users","Workspace", "Item kind","Successful count", "Failed count"] 
df.drop(columns=columns_to_drop, inplace=True, errors='ignore')

rename_dict = {
    "Item name": "Item_name",
    "CU (s)": "CU(s)",
    "Duration (s)": "Duration(s)",
}

df.rename(columns=rename_dict, inplace=True)
df = df[df["Item_name"].str.contains("Infor", case=False, na=False)]

df.tail()

Unnamed: 0,Item_name,CU(s),Duration(s)
8,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,420958.796,21181.761
16,InforTableSummary,22976.1025,2787.862
25,PL_Infor_101_IngestOrchestrator_AT,2318.4,107163.431
27,PL_Infor_100_IngestMaster_AT,1149.12,107517.675
28,PL_Infor_100_GLDetail_Refresh_AT,826.56,50775.201


### Filter for Infor Artifacts Runs in FCM data

In [5]:
import re

# Step 1: Ensure all values in Item_name columns are strings
df["Item_name"] = df["Item_name"].astype(str)
activities_df["Item_name"] = activities_df["Item_name"].astype(str)

# Step 2: Perform substring match and retain full Item_name from df
activity_names = activities_df["Item_name"].tolist() 
df_filtered = df[df["Item_name"].apply(lambda x: any(activity in x for activity in activity_names))].copy()

# Step 3: Assign execution type and other columns from activities_df based on substring match
for _, row in activities_df.iterrows():
    mask = df_filtered["Item_name"].str.contains(re.escape(row["Item_name"]), na=False, case=False)

# Step 4: Convert CU/s and Duration/s to per hour
columns_to_convert = ["CU(s)", "Duration(s)"]
df_filtered[[col.replace("(s)", "(h)") for col in columns_to_convert]] = df_filtered[columns_to_convert].div(3600)

# Step 5: Drop original second-based columns
df_filtered.drop(columns=columns_to_convert, inplace=True)

# Display the result
display(df_filtered)


Unnamed: 0,Item_name,CU(h),Duration(h)
0,NB_Infor_101_Ingest_Notebooks,1372.330577,29.305434
5,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,222.071407,5.172995
6,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,155.866878,2.983577
8,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,116.932999,5.883822
25,PL_Infor_101_IngestOrchestrator_AT,0.644,29.76762
27,PL_Infor_100_IngestMaster_AT,0.3192,29.866021


### Calculating Daily Capacity Unit Allocation For F64 SKU

In [6]:
# Define variables for flexibility
CU_capacity = 64
seconds_per_minute = 60
minutes_per_hour = 60
hours_per_day = 24
PAYG = 11.52
reserved = 6.853

# Compute total CU allocation
total_CU_seconds = CU_capacity * seconds_per_minute * minutes_per_hour * hours_per_day
total_CU_hours = total_CU_seconds/3600  # Convert to CU-hours

# Print the results with clear formatting
print(f"Total CU Allocation for F64 SKU:\n"
      f" - {total_CU_seconds:,} CU-seconds per day\n"
      f" - {total_CU_hours:,} CU-hours per day")



Total CU Allocation for F64 SKU:
 - 5,529,600 CU-seconds per day
 - 1,536.0 CU-hours per day


In [20]:
# Define global cost rates
TOTAL_DAILYCOST_PAYG = 11.52 * 24
TOTAL_DAILYCOST_RESERVED = 6.853 * 24
TOTAL_CU_HOURS = 1536
WEEK = 7
MONTH = 30
YEAR = 356

def calculate_execution_costs(df_filtered):
    """
    Calculate PAYG and Reserved cost per execution based on Compute Unit (CU) usage.
    
    Parameters:
    df_filtered (pd.DataFrame): Input DataFrame containing CU(h) column.
    total_CU_hours (int, optional): Total Compute Units per hour for the given capacity. Default is 1536 for F64.
    
    Returns:
    pd.DataFrame: Updated DataFrame with PAYG and Reserved costs.
    """
    
    # Calculate cost per execution
    df_filtered = df_filtered.copy()
    df_filtered["PAYG Cost($)"] = ((df_filtered["CU(h)"] / 14) / TOTAL_CU_HOURS) * TOTAL_DAILYCOST_PAYG
    df_filtered["Reserved Cost($)"] = ((df_filtered["CU(h)"] / 14) / TOTAL_CU_HOURS) * TOTAL_DAILYCOST_RESERVED
    
    print("Cost Per Day for Each Execution")
    return df_filtered

df_filtered = calculate_execution_costs(df_filtered)
df_filtered


Cost Per Day for Each Execution


Unnamed: 0,Item_name,CU(h),Duration(h),PAYG Cost($),Reserved Cost($)
0,NB_Infor_101_Ingest_Notebooks,1372.330577,29.305434,17.64425,10.496185
5,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,222.071407,5.172995,2.855204,1.698499
6,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,155.866878,2.983577,2.004003,1.192138
8,NB_Infor_100_GeneralLedgerTransactionDetail_Sm...,116.932999,5.883822,1.503424,0.894355
25,PL_Infor_101_IngestOrchestrator_AT,0.644,29.76762,0.00828,0.004926
27,PL_Infor_100_IngestMaster_AT,0.3192,29.866021,0.004104,0.002441


In [22]:
# Compute all the costs for all synapse notebooks and pipelines run 
# Do this for daily, weekly, monthly, and yearly costs
summary_df = pd.DataFrame({
    "Total CU(h)": [
        df_filtered["CU(h)"].sum(),
        df_filtered["CU(h)"].sum() * WEEK,
        df_filtered["CU(h)"].sum() * MONTH,
        df_filtered["CU(h)"].sum() * YEAR
    ],
    "Total PAYG Cost ($)": [
        df_filtered["PAYG Cost($)"].sum(),
        df_filtered["PAYG Cost($)"].sum() * WEEK,
        df_filtered["PAYG Cost($)"].sum() * MONTH,
        df_filtered["PAYG Cost($)"].sum() * YEAR
    ],
    "Total Reserved Cost ($)": [
        df_filtered["Reserved Cost($)"].sum(),
        df_filtered["Reserved Cost($)"].sum() * WEEK,
        df_filtered["Reserved Cost($)"].sum() * MONTH,
        df_filtered["Reserved Cost($)"].sum() * YEAR
    ]
}, index=["Daily", "Weekly", "Monthly", "Yearly"])

# Print another distinct heading
print("\n" + "🔹" * 25)
print("🔹🔹🔹 COST SUMMARY For Current Infor Runs Schedule 🔹🔹🔹")
print("🔹" * 25 + "\n")

# Display the extended summary DataFrame
display(summary_df)



🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔹🔹🔹 COST SUMMARY For Current Infor Runs Schedule 🔹🔹🔹
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹



Unnamed: 0,Total CU(h),Total PAYG Cost ($),Total Reserved Cost ($)
Daily,1868.165062,24.019265,14.288544
Weekly,13077.155432,168.134856,100.019806
Monthly,56044.95185,720.577952,428.656311
Yearly,665066.761953,8550.858368,5086.721562


In [23]:
# Compute the total sums dynamically from df_filtered
total_CU_4runs = df_filtered["CU(h)"].sum()
total_PAYG_4runs = df_filtered["PAYG Cost($)"].sum()
total_reserved_4runs = df_filtered["Reserved Cost($)"].sum()

# Calculate per-run costs and CU based on 4 runs per day
per_run_CU = total_CU_4runs / 4
per_run_PAYG = total_PAYG_4runs / 4
per_run_reserved = total_reserved_4runs / 4

# Compute the new values for an hourly schedule (24 runs per day)
total_CU_24runs = per_run_CU * 24
total_PAYG_24runs = per_run_PAYG * 24
total_reserved_24runs = per_run_reserved * 24

# Compute weekly, monthly, and yearly costs dynamically
summary_24runs = pd.DataFrame({
    "Total CU(h)": [total_CU_24runs, total_CU_24runs * 7, total_CU_24runs * 30, total_CU_24runs * 365],
    "Total PAYG Cost ($)": [total_PAYG_24runs, total_PAYG_24runs * 7, total_PAYG_24runs * 30, total_PAYG_24runs * 365],
    "Total Reserved Cost ($)": [total_reserved_24runs, total_reserved_24runs * 7, total_reserved_24runs * 30, total_reserved_24runs * 365]
}, index=["Daily", "Weekly", "Monthly", "Yearly"])

# Print distinct header
print("\n" + "🔹" * 31)
print("🔹🔹🔹Infor COST SUMMARY (HOURLY RUNS - 24x per day) - New Schedule 🔹🔹🔹")
print("🔹" * 31 + "\n")

display(summary_24runs)



🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔹🔹🔹Infor COST SUMMARY (HOURLY RUNS - 24x per day) - New Schedule 🔹🔹🔹
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹



Unnamed: 0,Total CU(h),Total PAYG Cost ($),Total Reserved Cost ($)
Daily,11208.99,144.11559,85.731262
Weekly,78462.93,1008.809133,600.118836
Monthly,336269.7,4323.467714,2571.937868
Yearly,4091281.0,52602.190522,31291.910733


In [25]:
# Extract CU and costs for a single run
single_run_CU = df_filtered["CU(h)"].sum() / 4  # Since daily total is from 4 runs
single_run_PAYG = df_filtered["PAYG Cost($)"].sum() / 4
single_run_reserved = df_filtered["Reserved Cost($)"].sum() / 4

# Compute costs for 4 runs per day (standard)
daily_CU = single_run_CU * 4
daily_PAYG = single_run_PAYG * 4
daily_reserved = single_run_reserved * 4

# Adjust the last 7 days of the month (24 runs per day)
last_7_days_CU = single_run_CU * 24 * 7
last_7_days_PAYG = single_run_PAYG * 24 * 7
last_7_days_reserved = single_run_reserved * 24 * 7

# Compute total for the month
first_23_days_CU = daily_CU * 23
monthly_CU = first_23_days_CU + last_7_days_CU

first_23_days_PAYG = daily_PAYG * 23
monthly_PAYG = first_23_days_PAYG + last_7_days_PAYG

first_23_days_reserved = daily_reserved * 23
monthly_reserved = first_23_days_reserved + last_7_days_reserved

# Compute weekly and yearly values
weekly_CU = monthly_CU / 4  # Approximate, assuming 4 weeks in a month
yearly_CU = monthly_CU * 12  # 12 months

weekly_PAYG = monthly_PAYG / 4
yearly_PAYG = monthly_PAYG * 12

weekly_reserved = monthly_reserved / 4
yearly_reserved = monthly_reserved * 12

# Create DataFrame
summary_df = pd.DataFrame({
    "Total CU(h)": [daily_CU, weekly_CU, monthly_CU, yearly_CU],
    "Total PAYG Cost ($)": [daily_PAYG, weekly_PAYG, monthly_PAYG, yearly_PAYG],
    "Total Reserved Cost ($)": [daily_reserved, weekly_reserved, monthly_reserved, yearly_reserved]
}, index=["Daily", "Weekly", "Monthly", "Yearly"])

print("\n" + "🔹" * 22)
print("🔹🔹🔹 COST SUMMARY For Proposed Scheduled Runs 🔹🔹🔹")
print("🔹" * 22 + "\n")

# Display the updated summary DataFrame
display(summary_df)


🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔹🔹🔹 COST SUMMARY For Proposed Scheduled Runs 🔹🔹🔹
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹



Unnamed: 0,Total CU(h),Total PAYG Cost ($),Total Reserved Cost ($)
Daily,1868.165,24.019265,14.288544
Weekly,30357.68,390.313058,232.188835
Monthly,121430.7,1561.25223,928.755341
Yearly,1457169.0,18735.026761,11145.064097


In [26]:
# Define F64 Capacity Limits
F64_CU_per_hour = 1536  # CU per hour
F64_CU_per_day = F64_CU_per_hour * 24  # 36,864 CU per day
F64_CU_per_week = F64_CU_per_day * 7
F64_CU_per_month = F64_CU_per_day * 30
F64_CU_per_year = F64_CU_per_day * 365


# Define cost rates as global constants
total_dailycost_PAYG = 11.52 * 24
total_dailycost_reserved = 6.853 * 24
F64_CU_per_day = 1536 * 24  # 36,864 CU per day

def calculate_execution_costs(df_filtered):
    """
    Calculate PAYG and Reserved cost per execution based on Compute Unit (CU) usage.
    
    Parameters:
    df_filtered (pd.DataFrame): Input DataFrame containing CU(h) column.
    total_CU_hours (int, optional): Total Compute Units per hour for the given capacity. Default is 1536 for F64.
    
    Returns:
    pd.DataFrame: Updated DataFrame with PAYG and Reserved costs and utilization percentage.
    """
    # Calculate cost per execution
    df_filtered = df_filtered.copy()
    df_filtered["PAYG Cost($)"] = ((df_filtered["CU(h)"] / 14) / total_CU_hours) * total_dailycost_PAYG
    df_filtered["Reserved Cost($)"] = ((df_filtered["CU(h)"] / 14) / total_CU_hours) * total_dailycost_reserved
    
    # Compute total CU and costs per day
    daily_CU = df_filtered["CU(h)"].sum()
    daily_PAYG = df_filtered["PAYG Cost($)"].sum()
    daily_reserved = df_filtered["Reserved Cost($)"].sum()
    
    # Create summary DataFrame
    summary_df = pd.DataFrame({
        "Total CU(h)": [daily_CU],
        "Total PAYG Cost ($)": [daily_PAYG],
        "Total Reserved Cost ($)": [daily_reserved],
        "F64 Limit (CU)": [F64_CU_per_day],
        "Utilization (%)": [(daily_CU / F64_CU_per_day) * 100]
    }, index=["Daily"])
    
    print("\n" + "🔹" * 22)
    print("🔹🔹🔹 COST SUMMARY For Current Runs Schedule 🔹🔹🔹")
    print("🔹" * 22 + "\n")
    
    # Display the updated summary DataFrame
    return summary_df

calculate_execution_costs(df_filtered)


🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔹🔹🔹 COST SUMMARY For Current Runs Schedule 🔹🔹🔹
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹



Unnamed: 0,Total CU(h),Total PAYG Cost ($),Total Reserved Cost ($),F64 Limit (CU),Utilization (%)
Daily,1868.165062,24.019265,14.288544,36864,5.067722
