In [2]:
import pandas as pd
import pyodbc
from datetime import datetime

In [3]:
sql_query = """
    -- all WO and their flocid
    SELECT
        [WorkOrderNumber],
        [OrderType],
        [CompanyCode],
        [FunctionLocation],
        CASE
            WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
            ELSE [TechCompletionDate]
        END AS 'CompletionDate',
        [MaintenanceActivityType],
        [MaintenanceActivityTypeDesc],
        [MainUserStatus],
        [MainUserStatusDesc],
        [ActualTotalCost],
        [WBSElementSAP]
    FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
    WHERE
        [MaintenanceActivityType] IN
            (
                'C01', 'C14', 'C20', 'C02', 'C13', 'C03', 'C19', 'C23', 'C25', 'C26', 'C10', -- Mechanised Track Maintenance
                'C29', 'C31', 'T31', 'C37', 'C08', 'C47', 'C54', 'C43', 'C51', 'C53', 'C57', 'C70', 'C72', 'C50', 'C10', 'C30', 'C48', 'C54', 'C52', 'C06', 'C07', 'C44', 'C01', 'C41', -- General Track Maintenance
                'NIP', 'NRP', 'B50', 'B53', 'C67', 'B04', 'B05', 'B06', 'B55', 'B57', -- Structures Maintenance
                'T28', 'T29', 'T33', 'T40', 'T41', 'T58', 'T44', 'T45', 'T34', 'T46', 'T47', 'T48', 'T54', 'T42', 'T43', -- Control Systems - Signalling and Wayside Maintenance
                'T10', 'T11', 'T32', -- Control Systems - Telecommunications Maintenance
                'C54', 'T32', -- Control Systems - Operational Systems Maintenance
                'T26', 'T27', 'NSV', 'T32', 'T24', 'T25', 'E31' -- Traction Power Maintenance
            )
        AND [CompanyCode] = '5000'
        AND [MainUserStatusDesc] = 'Practically Completed'
        AND [ActualTotalCost] IS NOT NULL
        AND [ActualTotalCost] > 0
        AND OrderType <> 'MW04'
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

df1.to_pickle("../pkl/work_order_main_all.pkl")
# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,000057080030,NW02,5000,TO000619,2024-05-16,C10,Turnout Maintenance,PCOM,Practically Completed,1883.38
1,000057080032,NW02,5000,TO000619,2024-05-16,C10,Turnout Maintenance,PCOM,Practically Completed,6532.42
2,000057080178,NW02,5000,GA-01ML,2024-06-28,C70,Corridor Maintenance,PCOM,Practically Completed,1338.34
3,000057080395,NW02,5000,LX003289-91,2024-04-15,C57,Level Crossing Maintenance,PCOM,Practically Completed,2099.18
4,000058280231,NW03,5000,ST001740-02,2024-03-28,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,436.54
...,...,...,...,...,...,...,...,...,...,...
14504,000058268354,NW03,5000,TO000774-01,2024-05-09,T33,Signalling Field Maintenance,PCOM,Practically Completed,569.18
14505,000058268355,NW03,5000,TO000775-01,2024-05-09,T33,Signalling Field Maintenance,PCOM,Practically Completed,711.46
14506,000058268356,NW03,5000,TO000711-01,2024-05-29,T33,Signalling Field Maintenance,PCOM,Practically Completed,1328.07
14507,000058268369,NW03,5000,TO000247-02,2024-05-02,T33,Signalling Field Maintenance,PCOM,Practically Completed,525.81


In [30]:
# Getting all FLOC from [vw_Dim_FunctionalLocation] where CompanyCode = 5000 -> df2_no_length
sql_query = """
    SELECT
        FLOC_STRNO_FunctionalLocation,
        FLOC_PLTXT_FunctionalLocationDescr,
        FLOC_EARTX_TechnicalObjectTypeDescr,
        FLOC_STORT_LocationDescr
    FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
	WHERE
        FLOC_BUKRS_CompanyCode = '5000'
	ORDER BY FLOC_STRNO_FunctionalLocation
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df2 = pd.read_sql_query(sql_query, conn)

df2

  df2 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
0,AC-01CH,CALLEMONDAH BLUE ROAD 1,Yard Track,Callemondah
1,AC-01JN,Jilalan Provisioning 1,Yard Track,Jilalan
2,AC-01JN-JNJN,Jilalan Provisioning 1,Lvl 3 Segmentation,Jilalan
3,AC-01NA,NOGOA YARD BLUE ROAD TRACK 1,Yard Track,Nogoa-Yamala
4,AC-01PA,PARANA BLUE ROAD MAINLINE,Main Line,Parana
...,...,...,...,...
174445,WL000203,Baralaba WB Strain Gauge Equip 4.225Km,Wayleave Objects,Baralaba
174446,WL000204,Baralaba WB Location Box 4.225Km,Wayleave Objects,Baralaba
174447,WL000205,Baralaba WB Location Box 4.165Km,Wayleave Objects,Baralaba
174448,WL999999,Electric WLNL 01CR BYAA16.652Km,Wayleave Objects,Armuna-Buckley


In [27]:
# df2 = pd.read_pickle(r'C:\Users\R893859\Aurizon Operations Limited\Data, Reporting & Analytics - DS08 - NSAP2\Data\Huy\Flocs List\maint_flocs.pkl')
# df2

Unnamed: 0,FuncLocID,Maintenance Product
0,TO000226-01,"Points (mechanical, electric, electro-hydrauli..."
2,TO000226-02,"Points (mechanical, electric, electro-hydrauli..."
4,LX006284-01,Level Crossing Warning Control
7,TO000310-01,"Points (mechanical, electric, electro-hydrauli..."
9,TO000147-01,"Points (mechanical, electric, electro-hydrauli..."
...,...,...
76328,ST008155-15,Power Supplies (Electric)
76330,ST008155-19,Power Supplies (Electric)
76332,ST008155-22,Power Supplies (Electric)
76334,ST009535-06,Power Supplies (Electric)


In [31]:
all_main_type_df = pd.DataFrame(df2['FLOC_EARTX_TechnicalObjectTypeDescr'].unique(), columns=['FLOC_EARTX_TechnicalObjectTypeDescr'])
all_main_type_df

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr
0,Yard Track
1,Lvl 3 Segmentation
2,Main Line
3,Corridor Access Pt
4,Corridor Objects
...,...
252,Turnout Track
253,Manual Turnout
254,Points Remote Contro
255,Points Locking


In [32]:
merged_df = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FLOC_STRNO_FunctionalLocation')
merged_df

Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
0,000058280231,NW03,5000,ST001740-02,2024-03-28,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,436.54,ST001740-02,PD 43.789 Km AC/DC Power Supply,LV Power Systems,Peak Downs
1,000058280232,NW03,5000,ST001750-02,2024-03-28,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,436.54,ST001750-02,Dysart 88.4 km OC battery charger,LV Power Systems,Dysart
2,000058280233,NW03,5000,ST001760-02,2024-03-28,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,436.54,ST001760-02,Saraji 65.146 km OC battery charger,LV Power Systems,Saraji
3,000058280234,NW03,5000,ST001770-02,2024-04-11,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,1458.02,ST001770-02,Bundoora 129.64 km OC battery charger,LV Power Systems,Bundoora
4,000058280235,NW03,5000,ST000760-14,2024-03-28,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,436.54,ST000760-14,GC TSC 136.4 Km AC/DC Power Supply,LV Power Systems,German Creek
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17429,000058266105,NW03,5000,TO000449-01,2024-04-04,T33,Signalling Field Maintenance,PCOM,Practically Completed,2790.09,TO000449-01,HD8A Pts,Electric Points,Hatfield
17430,000058266178,NW03,5000,TO000770-01,2024-04-11,T33,Signalling Field Maintenance,PCOM,Practically Completed,379.45,TO000770-01,YA ER1 Frame C,Train Control System,Yamala
17431,000058266180,NW03,5000,TO000966-01,2024-03-31,T33,Signalling Field Maintenance,PCOM,Practically Completed,761.37,TO000966-01,DA TFP 16.398Km,Mechanical Points,Dakenba
17432,000058266181,NW03,5000,TO000964-01,2024-03-31,T33,Signalling Field Maintenance,PCOM,Practically Completed,380.70,TO000964-01,DA TFP 15.0Km,Mechanical Points,Dakenba


In [34]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
grouped_all_time = merged_df.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_all_time

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,AC/DC Track,1272.205000,2
1,ATP,1376.613611,36
2,Acoustic Detector Sy,1710.461667,6
3,Alternator Control E,2636.046000,5
4,Antenna,6365.990000,1
...,...,...,...
100,Weather Station Syst,792.230909,11
101,Westinghouse Telemet,1894.536667,3
102,Westrace Interlockin,2586.375000,8
103,Wheel Impact Load De,4686.633077,13


In [35]:
# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year = merged_df[merged_df['CompletionDate'].dt.year == previous_year]

grouped_previous_year = df_previous_year.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_previous_year

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year
0,Battery,723.52,1
1,Bridges,9608.626667,3
2,CER,1849.64,1
3,Connecting Track,32541.03,1
4,Corridor Objects,6564.0,1
5,Crossovers,16321.146667,3
6,Culverts,5438.633784,37
7,Curves - Med Radius,2732.116667,3
8,DED Systems,350.56,1
9,Electric Points,25895.873333,3


In [36]:
# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_3_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_3_years = df_last_3_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_3_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_3_Years,Sample_Size_Last_3_Years
0,Axle Counter Detecti,1343.58,1
1,Battery,723.52,1
2,Bridges,145760.9925,4
3,CER,1849.64,1
4,Cables,1733.44,1
5,Conductors/Wires,172306.17,1
6,Connecting Track,32541.03,1
7,Corridor Objects,6564.0,1
8,Crossovers,16321.146667,3
9,Culverts,5348.527105,38


In [37]:
# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_5_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_5_years = df_last_5_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_5_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Axle Counter Detecti,1343.58,1
1,Battery,723.52,1
2,Bridges,66407.32875,16
3,CER,1849.64,1
4,Cables,1733.44,1
5,Conductors/Wires,172306.17,1
6,Connecting Track,32541.03,1
7,Corridor Objects,30144.002,5
8,Crossovers,16321.146667,3
9,Culverts,5524.835641,39


In [38]:
result_df = pd.merge(grouped_all_time, grouped_previous_year, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_3_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_5_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')

result_df

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,AC/DC Track,1272.205000,2,,,,,,
1,ATP,1376.613611,36,,,,,,
2,Acoustic Detector Sy,1710.461667,6,,,,,,
3,Alternator Control E,2636.046000,5,,,,,,
4,Antenna,6365.990000,1,,,,,,
...,...,...,...,...,...,...,...,...,...
100,Weather Station Syst,792.230909,11,,,,,,
101,Westinghouse Telemet,1894.536667,3,,,,,,
102,Westrace Interlockin,2586.375000,8,5851.420,1.0,5851.420,1.0,5851.420,1.0
103,Wheel Impact Load De,4686.633077,13,,,,,,


In [40]:
result_df = pd.merge(all_main_type_df, result_df, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df.to_pickle("../pkl/average_main_cost.pkl")

result_df

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Yard Track,2678.006969,287.0,11098.205000,4.0,15156.684000,5.0,15156.684000,5.0
1,Lvl 3 Segmentation,3996.588000,5.0,3994.595000,2.0,3994.595000,2.0,3994.595000,2.0
2,Main Line,29359.375601,1439.0,260158.964571,70.0,194890.040789,152.0,166634.793128,179.0
3,Corridor Access Pt,7668.270000,1.0,,,,,,
4,Corridor Objects,6405.312363,347.0,6564.000000,1.0,6564.000000,1.0,30144.002000,5.0
...,...,...,...,...,...,...,...,...,...
252,Turnout Track,,,,,,,,
253,Manual Turnout,5364.094444,288.0,31150.930000,12.0,30053.856000,35.0,26088.565610,41.0
254,Points Remote Contro,2237.895000,2.0,,,,,,
255,Points Locking,2467.380000,2.0,,,,,,


: 