In [1]:
import pandas as pd
import pyodbc
from datetime import datetime

In [2]:
sql_query = """
    WITH all_maint_wo AS (
        SELECT
            WorkOrderNumber
            ,WorkOrderDescription
            ,BasicStartDate
            ,BasicFinishDate
            ,ScheduledStartDate
            ,PriorityType
            ,PriorityTypeDesc
            ,Priority
            ,PriorityDesc
            ,Equipment
            ,Assembly
            ,LocationAccountAssignment
            ,ILOAIndividual
            ,MaintenancePlanningPlant
            ,OrderType
            ,OrderTypeDescription
            ,OrderCategory
            ,CreatedBy
            ,CreatedOnDate
            ,ChangedBy
            ,ChangedOnDate
            ,CompanyCode
            ,CompanyCodeDesc
            ,Plant
            ,PlantDesc
            ,BusinessArea
            ,ControllingArea
            ,Location
            ,LocationPlant
            ,ObjectNumber
            ,WBSElement
            ,ProjectInternalKey
            ,ProjectDescription
            ,ObjectCategory
            ,StatusProfile
            ,FunctionalLocationInternal
            ,MaintenancePlant
            ,MaintenancePlantDesc
            ,ModifictaionID
            ,Notification
            ,MainWorkCenter
            ,WorkcenterDesc
            ,LocationCostCenter
            ,ComplianceDate
            ,TechCompletionDate
            ,HeaderCosts
            ,AurizonIntCompCosts
            ,MaintenanceActivityType
            ,MaintenanceActivityTypeDesc
            ,OrderDeleteFlag
            ,OrderFunctionalArea
            ,OrderProfitCenter
            ,OrderRespCostCenter
            ,OrderRespCostCenterText
            ,OrderRespPerson
            ,OrderReqstCostCenter
            ,OrderReqstCostCenterText
            ,OrderReqstPerson
            ,OrderMaintStrategy
            ,OrderMaintItem
            ,OrderMainItemText
            ,OrderMaintPlan
            ,OrderMaintPlanCallNumber
            ,PlanningPlantDesc
            ,PlanningGroup
            ,SortField
            ,PlantofMainWorkCenter
            ,PlantofMainWorkcenterDesc
            ,OrderRespPersNumber
            ,OrderRespPersName
            ,LocationText
            ,EarliestBasicStartD
            ,OrderTaskListType
            ,OrderTaskListGroup
            ,OrderTaskListGRoupCounter
            ,OrderStartPoint
            ,OrderEndPoint
            ,OrderLengthUnit
            ,OrderLinearLength
            ,OrderMarkerStart
            ,OrderMarkerEnd
            ,OrderDistStartMark
            ,OrderDIstEndMark
            ,OrderDistMarkUnit
            ,OrderLinRefPattern
            ,OrderStartLatitude
            ,OrderEndLatitude
            ,OrderStartLongitude
            ,OrderEndLongitude
            ,OrderRespPersonName
            ,OrderRespPersonNumber
            ,FunctionLocCategoryDesc
            ,FunctionLocTechObjectType
            ,FunctionLocation
            ,FunctionLocationDesc
            ,FunctionLocationCategory
            ,EquipmentDesc
            ,EquipmentCategory
            ,EquipmentCategoryDesc
            ,EquipmentTechObjectType
            ,EquipmentTechObjectTypeDesc
            ,MainSystemStaus
            ,MainSystemStatusDesc
            ,MainUserStatus
            ,MainUserStatusDesc
            ,OrderStatCompCode
            ,OrderStatOrderCategory
            ,OrderStatOrderType
            ,PlannedTotalCost
            ,ActualTotalCost
            ,OrderCreatedTS
            ,OrderUpdatedTS
            ,OrderMarkerStartKm
            ,OrderMarkerEndKm
            ,GEOMETRY
            ,GeometrySTSartPoint
            ,GeometrySTEndpoint
            ,SAPPortalURL
            ,StartThroughmetre
            ,EndThroughmetre
            ,WBSElementSAP
            ,PCOM_DATE
            ,PCOM_TIME
            ,PCOM_BY
        FROM
            [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
        WHERE
            [MaintenanceActivityType] IN
                (
                    'C01', 'C14', 'C20', 'C02', 'C13', 'C03', 'C19', 'C23', 'C25', 'C26', 'C10', -- Mechanised Track Maintenance
                    'C29', 'C31', 'T31', 'C37', 'C08', 'C47', 'C54', 'C43', 'C51', 'C53', 'C57', 'C70', 'C72', 'C50', 'C10', 'C30', 'C48', 'C54', 'C52', 'C06', 'C07', 'C44', 'C01', 'C41', -- General Track Maintenance
                    'NIP', 'NRP', 'B50', 'B53', 'C67', 'B04', 'B05', 'B06', 'B55', 'B57', -- Structures Maintenance
                    'T28', 'T29', 'T33', 'T40', 'T41', 'T58', 'T44', 'T45', 'T34', 'T46', 'T47', 'T48', 'T54', 'T42', 'T43', -- Control Systems - Signalling and Wayside Maintenance
                    'T10', 'T11', 'T32', -- Control Systems - Telecommunications Maintenance
                    'C54', 'T32', -- Control Systems - Operational Systems Maintenance
                    'T26', 'T27', 'NSV', 'T32', 'T24', 'T25', 'E31' -- Traction Power Maintenance
                )
            AND [CompanyCode] = '5000'
            AND [MainUserStatusDesc] = 'Practically Completed'
            AND [ActualTotalCost] IS NOT NULL
            AND [ActualTotalCost] > 0
            AND [OrderType] <> 'MW04'
    ),
    all_floc AS (
        SELECT FLOC_TPLNR_FunctionalLocationInternalKey
            ,FLOC_STRNO_FunctionalLocation
            ,FLOC_PLTXT_FunctionalLocationDescr
            ,FLOC_FLTYP_FunctionalLocationCategory
            ,FLOC_TYPTX_FunctionalLocationCategoryDescr
            ,FLOC_EQART_TechnicalObjectType
            ,FLOC_EARTX_TechnicalObjectTypeDescr
            ,FLOC_SWERK_MaintenancePlant
            ,FLOC_NAME1_MaintenancePlantDescr
            ,FLOC_BUKRS_CompanyCode
            ,FLOC_BUTXT_CompanyCodeDescr
            ,FLOC_TPLMA_SuperiorFunctionalLocationInternal
            ,FLOC_STRNO_SuperiorFunctionalLocation
            ,FLOC_INGRP_PlannerGroup
            ,FLOC_IWERK_PlanningPlant
            ,FLOC_NAME1_PlanningPlantDescr
            ,FLOC_IEQUI_InstallOfEquipmentAllowedAtFunctionalLocation
            ,FLOC_ARBPL_WorkCenter
            ,FLOC_MSGRP_FunctionalLocationRoom
            ,FLOC_BEBER_PlantSection
            ,FLOC_STORT_Location
            ,FLOC_STORT_LocationDescr
        FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
        WHERE
            FLOC_BUKRS_CompanyCode = '5000'
    )

    SELECT *
    FROM all_maint_wo
        LEFT JOIN all_floc ON all_maint_wo.FunctionLocation = all_floc.FLOC_STRNO_FunctionalLocation
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

# df1.to_pickle("../pkl/work_order_main_all.pkl")
# Specify the file path and sheet name
file_path = '../xlsx/all_maint_wo.xlsx'
sheet_name = 'all_maint_wo'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='w') as writer:  # mode='a' for append, use mode='w' to write a new file
    df1.to_excel(writer, sheet_name=sheet_name, index=False)

df1

  df1 = pd.read_sql_query(sql_query, conn)


In [4]:
sql_query = """
    WITH all_wo AS (
        SELECT
            [WorkOrderNumber],
            [OrderType],
            [CompanyCode],
            [FunctionLocation],
            CASE
                WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
                ELSE [TechCompletionDate]
            END AS 'CompletionDate',
            [MaintenanceActivityType],
            [MaintenanceActivityTypeDesc],
            [MainUserStatus],
            [MainUserStatusDesc],
            [ActualTotalCost]
        FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
        WHERE
            [MaintenanceActivityType] IN ('T26', 'T27', 'E31')
            AND [CompanyCode] = '5000'
            AND [MainUserStatusDesc] = 'Practically Completed'
            AND [ActualTotalCost] IS NOT NULL
            AND [ActualTotalCost] > 0
    ),
    all_floc AS (
        SELECT
            FLOC_STRNO_FunctionalLocation,
            FLOC_PLTXT_FunctionalLocationDescr,
            FLOC_EARTX_TechnicalObjectTypeDescr,
            FLOC_STORT_LocationDescr
        FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
        WHERE
            FLOC_BUKRS_CompanyCode = '5000'
    )

    SELECT
        all_floc.FLOC_EARTX_TechnicalObjectTypeDescr,
        all_wo.MaintenanceActivityType,
        all_wo.OrderType,
        COUNT(*) AS 'Count'
    FROM all_wo
        LEFT JOIN all_floc ON all_floc.FLOC_STRNO_FunctionalLocation = all_wo.FunctionLocation
    GROUP BY
        all_floc.FLOC_EARTX_TechnicalObjectTypeDescr,
        all_wo.MaintenanceActivityType,
        all_wo.OrderType
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
test = pd.read_sql_query(sql_query, conn)

# Specify the file path and sheet name
file_path = '../xlsx/avg_maint_cost_per_activity_type_substations_only.xlsx'
sheet_name = 'T26_T27_E31_count'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    test.to_excel(writer, sheet_name=sheet_name, index=False)
    
# Display the DataFrame
test

  test = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,MaintenanceActivityType,OrderType,Count
0,LV Power Systems,T27,MW04,10
1,SFC Cooling,E31,NW02,1
2,HV Switchgear,E31,NW02,1
3,Power Quality,E31,NW02,4
4,LV Power Systems,E31,NW01,1
5,Electrical Section,T27,MW04,2
6,HV Switchgear,E31,NW01,2
7,Section Insulator,E31,NW02,1
8,SFC Cooling,E31,NW03,10
9,Site,E31,NW02,1


In [5]:
sql_query = """
    WITH all_wo AS (
        -- all WO and their flocid
        SELECT
            [WorkOrderNumber],
            [OrderType],
            [CompanyCode],
            [FunctionLocation],
            CASE
                WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
                ELSE [TechCompletionDate]
            END AS 'CompletionDate',
            [MaintenanceActivityType],
            [MaintenanceActivityTypeDesc],
            [MainUserStatus],
            [MainUserStatusDesc],
            [ActualTotalCost]
        FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
        WHERE
            [MaintenanceActivityType] IN ('T26', 'T27', 'E31')
            AND [CompanyCode] = '5000'
            AND [MainUserStatusDesc] = 'Practically Completed'
            AND [ActualTotalCost] IS NOT NULL
            AND [ActualTotalCost] > 0
            --AND OrderType <> 'MW04'
    ),
    all_floc AS (
        SELECT
            FLOC_STRNO_FunctionalLocation,
            FLOC_PLTXT_FunctionalLocationDescr,
            FLOC_EARTX_TechnicalObjectTypeDescr,
            FLOC_STORT_LocationDescr
        FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
        WHERE
            FLOC_BUKRS_CompanyCode = '5000'
            --AND FLOC_EARTX_TechnicalObjectTypeDescr = 'Substation'
    )

    SELECT
        all_wo.WorkOrderNumber,
        all_floc.FLOC_STRNO_FunctionalLocation,
        all_floc.FLOC_PLTXT_FunctionalLocationDescr,
        all_floc.FLOC_EARTX_TechnicalObjectTypeDescr,
        all_wo.MaintenanceActivityType
    FROM all_wo
        LEFT JOIN all_floc ON all_floc.FLOC_STRNO_FunctionalLocation = all_wo.FunctionLocation
    ORDER BY all_wo.MaintenanceActivityType
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
test2 = pd.read_sql_query(sql_query, conn)

# Specify the file path and sheet name
file_path = '../xlsx/avg_maint_cost_per_activity_type_substations_only.xlsx'
sheet_name = 'T26_T27_E31_all_wo'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    test2.to_excel(writer, sheet_name=sheet_name, index=False)
    
# Display the DataFrame
test2

  test2 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,MaintenanceActivityType
0,000058287382,ST008210-14,MT TCU,Substation,E31
1,000058280368,ST008210-13,MT TCU 331 Fault Locator,HV Monitoring,E31
2,000058287388,ST008230-03,MT AT2 PSC 10Km,Substation,E31
3,000058287381,ST008230-03,MT AT2 PSC 10Km,Substation,E31
4,000058280364,ST008230-03,MT AT2 PSC 10Km,Substation,E31
...,...,...,...,...,...
781,000080107846,ST002180-02,EDUNGALBA ATS 84.668 km BATTERY CHARGER,LV Power Systems,T27
782,000080107848,ST002210-02,TRYPHINIA 128.425 km BATTERY CHARGER,LV Power Systems,T27
783,000080107849,ST002200-02,WALLAROO 115.036 km BATTERY CHARGER,LV Power Systems,T27
784,000080106915,ST001930-05,AN AT AC/DC Power Supply,LV Power Systems,T27


: 

In [111]:
# Getting all FLOC from [vw_Dim_FunctionalLocation] where CompanyCode = 5000 -> df2_no_length
sql_query = """
    SELECT
        FLOC_STRNO_FunctionalLocation,
        FLOC_PLTXT_FunctionalLocationDescr,
        FLOC_EARTX_TechnicalObjectTypeDescr,
        FLOC_STORT_LocationDescr
    FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
	WHERE
        FLOC_BUKRS_CompanyCode = '5000'
        AND FLOC_EARTX_TechnicalObjectTypeDescr = 'Substation'
	ORDER BY FLOC_STRNO_FunctionalLocation
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df2 = pd.read_sql_query(sql_query, conn)

# Specify the file path and sheet name
file_path = '../xlsx/avg_maint_cost_per_activity_type_substations_only.xlsx'
sheet_name = 'all_substations'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='w') as writer:  # mode='a' for append, use mode='w' to write a new file
    df2.to_excel(writer, sheet_name=sheet_name, index=False)

df2

  df2 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
0,ST000020-08,Wycarbah FS,Substation,Wycarbah
1,ST000040-12,Dingo TSC,Substation,Dingo
2,ST000050-08,Bluff FS,Substation,Bluff
3,ST000070-06,Burngrove TCU,Substation,Burngrove
4,ST000090-04,Red Rock TSC,Substation,Red Rock
...,...,...,...,...
199,ST009500-03,KMMO/AT1 41.100Km PSC,Substation,Kenmare-Memooloo
200,ST009510-03,MOSE/AT1 67.500Km PSC,Substation,Memooloo-Starlee
201,ST009520-01,MOSE/AT2 78.650Km PSC,Substation,Starlee
202,ST009530-03,SERM/AT2 103.650Km PSC,Substation,Rolleston Mine


In [112]:
merged_df = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FLOC_STRNO_FunctionalLocation')
merged_df = merged_df[merged_df['FLOC_EARTX_TechnicalObjectTypeDescr'].notna()]
merged_df

Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
495,000056012902,NW01,5000,ST009450-23,2019-03-10,NIP,Inspect,PCOM,Practically Completed,870.00,ST009450-23,Struan Road FS,Substation,Struan Road
622,000057080543,NW02,5000,ST007760-38,2023-08-30,NIP,Inspect,PCOM,Practically Completed,195.49,ST007760-38,Edungalba TSC,Substation,Edungalba
1320,000058220212,NW03,5000,ST000120-13,2023-02-16,NSV,Service,PCOM,Practically Completed,425.09,ST000120-13,CALLEMONDAH FS,Substation,Callemondah
1321,000058220213,NW03,5000,ST001490-03,2023-09-18,NSV,Service,PCOM,Practically Completed,424.02,ST001490-03,OC 77.2 Km PSC,Substation,Oaky Creek
1921,000058286701,NW03,5000,ST001900-03,2024-05-30,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,755.20,ST001900-03,ME AT PSC,Substation,Midgee
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323189,000058198203,NW03,5000,ST007780-37,2022-06-08,NSV,Service,PCOM,Practically Completed,610.33,ST007780-37,Kabra TSC,Substation,Kabra
323190,000058198204,NW03,5000,ST008450-49,2022-06-08,NSV,Service,PCOM,Practically Completed,813.76,ST008450-49,Raglan CS,Substation,Raglan
323191,000058198205,NW03,5000,ST009130-01,2022-06-08,NSV,Service,PCOM,Practically Completed,258.88,ST009130-01,CN 2.780 Km PSC,Substation,Clinton
323192,000058198206,NW03,5000,ST009140-02,2022-06-08,NSV,Service,PCOM,Practically Completed,258.88,ST009140-02,CN 2.097 Km PSC,Substation,Clinton


In [113]:
all_maint_activity_code = merged_df[['MaintenanceActivityType', 'MaintenanceActivityTypeDesc']].drop_duplicates()
all_maint_activity_code

Unnamed: 0,MaintenanceActivityType,MaintenanceActivityTypeDesc
495,NIP,Inspect
1320,NSV,Service
1921,E31,Feed Stns&Trck Sect Cabin Mtce
5398,NRP,Repair
8016,T32,Power Systems Control Traction
8018,C44,Fire & Vegetation Management
238710,T02,Overhead Maintenance


In [114]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
avg_maint_cost_per_activity_type = merged_df.groupby('MaintenanceActivityType').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('MaintenanceActivityType', 'size')
).reset_index()

avg_maint_cost_per_activity_type = pd.merge(avg_maint_cost_per_activity_type, all_maint_activity_code, how='left', left_on='MaintenanceActivityType', right_on='MaintenanceActivityType')
avg_maint_cost_per_activity_type = avg_maint_cost_per_activity_type[['MaintenanceActivityType', 'MaintenanceActivityTypeDesc', 'Average_ActualTotalCost_All_Time', 'Sample_Size_All_Time']]

# Specify the file path and sheet name
file_path = '../xlsx/avg_maint_cost_per_activity_type_substations_only.xlsx'
sheet_name = 'avg_maint_cost_per_activity_type'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    avg_maint_cost_per_activity_type.to_excel(writer, sheet_name=sheet_name, index=False)

avg_maint_cost_per_activity_type



Unnamed: 0,MaintenanceActivityType,MaintenanceActivityTypeDesc,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,C44,Fire & Vegetation Management,2078.82,1
1,E31,Feed Stns&Trck Sect Cabin Mtce,1346.800074,406
2,NIP,Inspect,2362.04907,215
3,NRP,Repair,3274.054067,359
4,NSV,Service,797.31817,2541
5,T02,Overhead Maintenance,1385.11,1
6,T32,Power Systems Control Traction,1268.085,10


In [115]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
grouped_all_time = merged_df.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_all_time

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,Substation,1209.215126,3533


In [116]:
# Calculate current year average
current_year = datetime.now().year

df_current_year = merged_df[merged_df['CompletionDate'].dt.year == current_year]

grouped_current_year = df_current_year.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_current_Year=('ActualTotalCost', 'mean'),
    Sample_Size_current_Year=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_current_year

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_current_Year,Sample_Size_current_Year
0,Substation,1371.981931,725


In [117]:
# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year = merged_df[merged_df['CompletionDate'].dt.year == previous_year]

grouped_previous_year = df_previous_year.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_previous_year

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year
0,Substation,1110.978073,1240


In [118]:
# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_3_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_3_years = df_last_3_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_3_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_3_Years,Sample_Size_Last_3_Years
0,Substation,1156.344664,2742


In [119]:
# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_5_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_5_years = df_last_5_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_5_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Substation,1154.562914,2780


In [120]:
result_df = pd.merge(grouped_all_time, grouped_current_year, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_previous_year, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_3_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_5_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')

# Specify the file path and sheet name
file_path = '../xlsx/avg_maint_cost_per_activity_type_substations_only.xlsx'
sheet_name = 'avg_maint_cost'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    result_df.to_excel(writer, sheet_name=sheet_name, index=False)

result_df



Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_current_Year,Sample_Size_current_Year,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Substation,1209.215126,3533,1371.981931,725,1110.978073,1240,1156.344664,2742,1154.562914,2780


In [65]:
# result_df = pd.merge(all_main_type_df, result_df, on='Maintenance Product', how='left')
# result_df.to_pickle("../pkl/average_main_cost.pkl")

# result_df

Unnamed: 0,Maintenance Product,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,"Points (mechanical, electric, electro-hydrauli...",4531.398329,425.0,31150.930000,12.0,25131.625098,51.0,22172.155085,59.0
1,Level Crossing Warning Control,638.559073,399.0,1071.244000,10.0,1071.244000,10.0,1071.244000,10.0
2,"Train Control Systems (DTC signs, Local Panel,...",33201.240000,1.0,,,33201.240000,1.0,33201.240000,1.0
3,Isolator,39436.809000,30.0,49432.001538,13.0,38018.967273,22.0,38018.967273,22.0
4,Housings (Apparatus Case & Hut),849.329205,88.0,,,2776.480000,3.0,3944.610000,5.0
...,...,...,...,...,...,...,...,...,...
53,Track Resurfacing,19044.818991,3103.0,244076.497207,111.0,182700.735652,230.0,158930.123872,266.0
54,Track Geometry Recording,19369.187372,3048.0,244076.497207,111.0,182700.735652,230.0,158930.123872,266.0
55,Fencing,6069.400486,185.0,,,,,36039.002500,4.0
56,GPR / PVC Inspecting and Testing,23842.489331,2406.0,262775.513663,101.0,189212.850639,219.0,163497.482510,255.0
