In [42]:
import pandas as pd
import pyodbc
from datetime import datetime
pd.set_option('display.max_rows', 10)

In [43]:
sql_query = """
    -- all WO and their flocid
    SELECT
        [WorkOrderNumber],
        [OrderType],
        [CompanyCode],
        [FunctionLocation],
        CASE
            WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
            ELSE [TechCompletionDate]
        END AS 'CompletionDate',
        [MaintenanceActivityType],
        [MaintenanceActivityTypeDesc],
        [MainUserStatus],
        [MainUserStatusDesc],
        [ActualTotalCost]
    FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
    WHERE
        [MaintenanceActivityType] IN
            (
                'C01', 'C14', 'C20', 'C02', 'C13', 'C03', 'C19', 'C23', 'C25', 'C26', 'C10', -- Mechanised Track Maintenance
                'C29', 'C37', 'C08', 'C47', 'C54', 'C43', 'C51', 'C53', 'C57', 'C50', 'C10', 'C30', 'C48', 'C54', 'C52', 'C06', 'C07', 'C44', 'C01', 'C57', -- General Track Maintenance
                'B50', 'B53', 'C67', 'B04', 'B05', 'B06', 'B55', 'B57', 'C67', -- Structures Maintenance
                'T28', 'T29', 'T40', 'T41', 'T58', 'T44', 'T45', 'T46', 'T47', 'T48', 'T54', 'T42', 'T43', -- Control Systems - Signalling and Wayside Maintenance
                'T10', 'T11', 'T32', -- Control Systems - Telecommunications Maintenance
                'C54', 'T32', -- Control Systems - Operational Systems Maintenance
                'T26', 'T27', 'T32', 'T24', 'T25' -- Traction Power Maintenance
            )
        AND [CompanyCode] = '5000'
        AND [MainUserStatusDesc] = 'Practically Completed'
        AND [ActualTotalCost] IS NOT NULL
        AND [ActualTotalCost] > 0
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

df1.to_pickle("../pkl/work_order_main_all.pkl")
# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,000080078613,MW04,5000,LU000040,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,2291.71
1,000080078614,MW04,5000,LU000034,2017-09-14,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,2036.69
2,000080078615,MW04,5000,LU000033,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,1316.68
3,000080078616,MW04,5000,LU000035,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,1316.69
4,000080078656,MW04,5000,BW-03ML,2017-09-14,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,10992.73
...,...,...,...,...,...,...,...,...,...,...
9544,000058286508,NW03,5000,BW-28YR,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95
9545,000058286535,NW03,5000,MA-01FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,508.23
9546,000058286530,NW03,5000,GA-01ML,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,4553.29
9547,000058286534,NW03,5000,MA-02FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95


In [44]:
df2 = pd.read_pickle(r'C:\Users\R893859\Aurizon Operations Limited\Data, Reporting & Analytics - DS08 - NSAP2\Data\Huy\Flocs List\maint_flocs.pkl')
df2

Unnamed: 0,FuncLocID,Maintenance Product
0,TO000226-01,"Points (mechanical, electric, electro-hydrauli..."
2,TO000226-02,"Points (mechanical, electric, electro-hydrauli..."
4,LX006284-01,Level Crossing Warning Control
7,TO000310-01,"Points (mechanical, electric, electro-hydrauli..."
9,TO000147-01,"Points (mechanical, electric, electro-hydrauli..."
...,...,...
76328,ST008155-15,Power Supplies (Electric)
76330,ST008155-19,Power Supplies (Electric)
76332,ST008155-22,Power Supplies (Electric)
76334,ST009535-06,Power Supplies (Electric)


In [45]:
all_main_type_df = pd.DataFrame(df2['Maintenance Product'].unique(), columns=['Maintenance Product'])
all_main_type_df

Unnamed: 0,Maintenance Product
0,"Points (mechanical, electric, electro-hydrauli..."
1,Level Crossing Warning Control
2,"Train Control Systems (DTC signs, Local Panel,..."
3,Isolator
4,Housings (Apparatus Case & Hut)
...,...
53,Track Resurfacing
54,Track Geometry Recording
55,Fencing
56,GPR / PVC Inspecting and Testing


In [46]:
merged_df = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FuncLocID')
merged_df

Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost,FuncLocID,Maintenance Product
0,000080078613,MW04,5000,LU000040,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,2291.71,,
1,000080078614,MW04,5000,LU000034,2017-09-14,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,2036.69,,
2,000080078615,MW04,5000,LU000033,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,1316.68,,
3,000080078616,MW04,5000,LU000035,2017-06-26,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,1316.69,,
4,000080078656,MW04,5000,BW-03ML,2017-09-14,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,10992.73,,
...,...,...,...,...,...,...,...,...,...,...,...,...
36067,000058286533,NW03,5000,MA-03FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95,MA-03FK,Track Geometry Recording
36068,000058286533,NW03,5000,MA-03FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95,MA-03FK,Track Inspections
36069,000058286533,NW03,5000,MA-03FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95,MA-03FK,Rail Flaw Detection
36070,000058286533,NW03,5000,MA-03FK,2024-05-14,C50,Track Inspections,PCOM,Practically Completed,126.95,MA-03FK,General Earthworks Maintenance (incl. access r...


In [47]:
nan_count = merged_df['Maintenance Product'].isna().sum()
nan_count

2834

In [48]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
grouped_all_time = merged_df.groupby('Maintenance Product').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('Maintenance Product', 'size')
).reset_index()

grouped_all_time

Unnamed: 0,Maintenance Product,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,Alternator Set,82740.960000,1
1,Ballast Undercutting,22268.586900,3264
2,Ballast Undercutting -Turnouts,13030.126989,1458
3,Copper Cable Route - Underground,2140.168725,102
4,DC Power Supply - Battery Charger,4655.836000,10
...,...,...,...
38,Transformers,14076.057500,4
39,Transmission (Microwave Radio),219.770000,2
40,Transmission (Optic Fibre Systems),769.789927,137
41,Turnout Grinding,12358.809883,1545


In [49]:
# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year = merged_df[merged_df['CompletionDate'].dt.year == previous_year]

grouped_previous_year = df_previous_year.groupby('Maintenance Product').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('Maintenance Product', 'size')
).reset_index()

grouped_previous_year

Unnamed: 0,Maintenance Product,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year
0,Ballast Undercutting,209102.118065,155
1,Ballast Undercutting -Turnouts,55635.644395,157
2,DC Power Supply - Battery Charger,4534.940000,1
3,GPR / PVC Inspecting and Testing,262775.513663,101
4,General Earthworks Maintenance (incl. access r...,238082.935351,114
...,...,...,...
16,Track Geometry Recording,244076.497207,111
17,Track Inspections,132384.689299,271
18,Track Resurfacing,244076.497207,111
19,Turnout Grinding,55635.644395,157


In [50]:
# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_3_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_3_years = df_last_3_years.groupby('Maintenance Product').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('Maintenance Product', 'size')
).reset_index()

grouped_last_3_years

Unnamed: 0,Maintenance Product,Average_Last_3_Years,Sample_Size_Last_3_Years
0,Alternator Set,82740.960000,1
1,Ballast Undercutting,151561.584684,348
2,Ballast Undercutting -Turnouts,39028.878291,357
3,DC Power Supply - Battery Charger,4655.836000,10
4,Fixed Radio Systems,3852.370000,1
...,...,...,...
27,"Train Control Systems (DTC signs, Local Panel,...",33201.240000,1
28,Transformers,18516.343333,3
29,Transmission (Optic Fibre Systems),1733.440000,1
30,Turnout Grinding,39028.878291,357


In [51]:
# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_5_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_5_years = df_last_5_years.groupby('Maintenance Product').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('Maintenance Product', 'size')
).reset_index()

grouped_last_5_years

Unnamed: 0,Maintenance Product,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Alternator Set,82740.960000,1
1,Ballast Undercutting,137339.411979,389
2,Ballast Undercutting -Turnouts,34854.352291,406
3,DC Power Supply - Battery Charger,4655.836000,10
4,Fencing,36039.002500,4
...,...,...,...
29,"Train Control Systems (DTC signs, Local Panel,...",33201.240000,1
30,Transformers,18516.343333,3
31,Transmission (Optic Fibre Systems),1733.440000,1
32,Turnout Grinding,34854.352291,406


In [52]:
result_df = pd.merge(grouped_all_time, grouped_previous_year, on='Maintenance Product', how='left')
result_df = pd.merge(result_df, grouped_last_3_years, on='Maintenance Product', how='left')
result_df = pd.merge(result_df, grouped_last_5_years, on='Maintenance Product', how='left')

result_df

Unnamed: 0,Maintenance Product,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Alternator Set,82740.960000,1,,,82740.960000,1.0,82740.960000,1.0
1,Ballast Undercutting,22268.586900,3264,209102.118065,155.0,151561.584684,348.0,137339.411979,389.0
2,Ballast Undercutting -Turnouts,13030.126989,1458,55635.644395,157.0,39028.878291,357.0,34854.352291,406.0
3,Copper Cable Route - Underground,2140.168725,102,,,,,,
4,DC Power Supply - Battery Charger,4655.836000,10,4534.940000,1.0,4655.836000,10.0,4655.836000,10.0
...,...,...,...,...,...,...,...,...,...
38,Transformers,14076.057500,4,,,18516.343333,3.0,18516.343333,3.0
39,Transmission (Microwave Radio),219.770000,2,,,,,,
40,Transmission (Optic Fibre Systems),769.789927,137,,,1733.440000,1.0,1733.440000,1.0
41,Turnout Grinding,12358.809883,1545,55635.644395,157.0,39028.878291,357.0,34854.352291,406.0


In [53]:
result_df = pd.merge(all_main_type_df, result_df, on='Maintenance Product', how='left')
result_df.to_pickle("../pkl/average_main_cost.pkl")

result_df

Unnamed: 0,Maintenance Product,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,"Points (mechanical, electric, electro-hydrauli...",4531.398329,425.0,31150.930000,12.0,25131.625098,51.0,22172.155085,59.0
1,Level Crossing Warning Control,638.559073,399.0,1071.244000,10.0,1071.244000,10.0,1071.244000,10.0
2,"Train Control Systems (DTC signs, Local Panel,...",33201.240000,1.0,,,33201.240000,1.0,33201.240000,1.0
3,Isolator,39436.809000,30.0,49432.001538,13.0,38018.967273,22.0,38018.967273,22.0
4,Housings (Apparatus Case & Hut),849.329205,88.0,,,2776.480000,3.0,3944.610000,5.0
...,...,...,...,...,...,...,...,...,...
53,Track Resurfacing,19044.818991,3103.0,244076.497207,111.0,182700.735652,230.0,158930.123872,266.0
54,Track Geometry Recording,19369.187372,3048.0,244076.497207,111.0,182700.735652,230.0,158930.123872,266.0
55,Fencing,6069.400486,185.0,,,,,36039.002500,4.0
56,GPR / PVC Inspecting and Testing,23842.489331,2406.0,262775.513663,101.0,189212.850639,219.0,163497.482510,255.0
