In [89]:
import pandas as pd
import pyodbc
from datetime import datetime
pd.set_option('display.max_rows', 10)

In [90]:
sql_query = """
    -- all WO and their flocid
    SELECT
        [WorkOrderNumber],
        [OrderType],
        [CompanyCode],
        [FunctionLocation],
        CASE
            WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
            ELSE [TechCompletionDate]
        END AS 'CompletionDate',
        [MaintenanceActivityType],
        [MaintenanceActivityTypeDesc],
        [MainUserStatus],
        [MainUserStatusDesc],
        [ActualTotalCost]
    FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
    WHERE
        OrderType = 'MW04'
        AND CompanyCode = '5000'
        AND MainUserStatusDesc = 'Practically Completed'
        AND ActualTotalCost IS NOT NULL
        AND ActualTotalCost > 0
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

df1.to_pickle("../pkl/work_order_all.pkl")
# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,000080085665,MW04,5000,TO000654,2018-12-19,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,71132.19
1,000080085667,MW04,5000,TO000470,2018-04-04,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,60383.24
2,000080085668,MW04,5000,CP000045,2018-05-08,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,5545.31
3,000080085669,MW04,5000,TO000352,2018-06-01,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,57677.62
4,000080085670,MW04,5000,TO000483,2018-06-05,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,50925.56
...,...,...,...,...,...,...,...,...,...,...
9687,000080115395,MW04,5000,BW-03ML,2024-04-08,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,73821.27
9688,000080116950,MW04,5000,BW-10SD,2024-05-27,C21,Track Upgrade,PCOM,Practically Completed,6451.46
9689,000080116951,MW04,5000,LX003196,2024-05-27,C21,Track Upgrade,PCOM,Practically Completed,3809.76
9690,000080116952,MW04,5000,LX005559,2024-05-27,C58,Level Xing Refurb/Renew,PCOM,Practically Completed,5514.38


In [91]:
df2 = pd.read_pickle(r'C:\Users\R893859\Aurizon Operations Limited\Data, Reporting & Analytics - DS08 - NSAP2\Data\Huy\Flocs List\renewal_flocs.pkl')
df2

Unnamed: 0,FuncLocID,Asset Type,Category
0,SG000010,SIGNAL GANTRY,Control & Electrical
1,SG000012,SIGNAL GANTRY,Control & Electrical
2,SG000013,SIGNAL GANTRY,Control & Electrical
3,SG000015,SIGNAL GANTRY,Control & Electrical
4,SG000018,SIGNAL GANTRY,Control & Electrical
...,...,...,...
46321,ST001900-07,MOTORISED ISOLATOR MOTOR UNITS,Control & Electrical
46322,SQ030456,MOTORISED ISOLATOR MOTOR UNITS,Control & Electrical
46323,SQ030458,MOTORISED ISOLATOR MOTOR UNITS,Control & Electrical
46324,SQ030464,MOTORISED ISOLATOR MOTOR UNITS,Control & Electrical


In [92]:
all_asset_type_df = pd.DataFrame(df2['Asset Type'].unique(), columns=['Asset Type'])
all_asset_type_df

Unnamed: 0,Asset Type
0,SIGNAL GANTRY
1,LEVEL CROSSING MONITORS
2,TRAIN PROTECTION SYSTEMS (ATP)
3,WEATHER STATION
4,RADIO POLES
...,...
87,AUTOTRANSFORMERS
88,HARMONIC FILTERS
89,TRACTION SCADA
90,AUXILIARY SUPPLY TRANSFORMERS


In [93]:
merged_df = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FuncLocID')
merged_df

Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost,FuncLocID,Asset Type,Category
0,000080085665,MW04,5000,TO000654,2018-12-19,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,71132.19,TO000654,1 IN 16 (60KG/M) SWING NOSE POINTS AND CROSSING,Track & Ballast
1,000080085667,MW04,5000,TO000470,2018-04-04,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,60383.24,TO000470,1 IN 16 (60KG/M) SWING NOSE POINTS AND CROSSING,Track & Ballast
2,000080085668,MW04,5000,CP000045,2018-05-08,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,5545.31,,,
3,000080085669,MW04,5000,TO000352,2018-06-01,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,57677.62,TO000352,1 IN 12 (60KG/M) SWING NOSE POINTS AND CROSSING,Track & Ballast
4,000080085670,MW04,5000,TO000483,2018-06-05,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,50925.56,TO000483,1 IN 16 (60KG/M) SWING NOSE POINTS AND CROSSING,Track & Ballast
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10195,000080115395,MW04,5000,BW-03ML,2024-04-08,C14,Ballast Excavator U/C Major,PCOM,Practically Completed,73821.27,,,
10196,000080116950,MW04,5000,BW-10SD,2024-05-27,C21,Track Upgrade,PCOM,Practically Completed,6451.46,,,
10197,000080116951,MW04,5000,LX003196,2024-05-27,C21,Track Upgrade,PCOM,Practically Completed,3809.76,LX003196,"PUBLIC LEVEL XING REFURBISHMENT (SIGNAGE, DRAI...",Structure & Civils
10198,000080116952,MW04,5000,LX005559,2024-05-27,C58,Level Xing Refurb/Renew,PCOM,Practically Completed,5514.38,LX005559,"PUBLIC LEVEL XING REFURBISHMENT (SIGNAGE, DRAI...",Structure & Civils


In [94]:
nan_count = merged_df['Asset Type'].isna().sum()
nan_count

7704

In [95]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
grouped_all_time = merged_df.groupby('Asset Type').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('Asset Type', 'size')
).reset_index()

grouped_all_time

Unnamed: 0,Asset Type,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,1 IN 12 (47KG/M) FABRICATED POINTS AND CROSSING,53840.865385,13
1,1 IN 12 (53KG/M) FABRICATED POINTS AND CROSSING,43754.905455,11
2,1 IN 12 (53KG/M) RBM POINTS AND CROSSING,25101.284000,20
3,1 IN 12 (60KG/M) RBM POINTS AND CROSSING,40374.682887,142
4,1 IN 12 (60KG/M) SPRING WING CROSSING,41477.865843,89
...,...,...,...
41,REPLACE PANDROL E-CLIP FASTENINGS (<10KM FROM ...,177356.831667,180
42,SECTION INSULATORS,35706.427692,26
43,TRACTION POWER SUPPLY TRANSFORMERS,83722.333333,15
44,UPS,41392.480000,2


In [96]:
# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year = merged_df[merged_df['CompletionDate'].dt.year == previous_year]

grouped_previous_year = df_previous_year.groupby('Asset Type').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('Asset Type', 'size')
).reset_index()

grouped_previous_year

Unnamed: 0,Asset Type,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year
0,1 IN 12 (47KG/M) FABRICATED POINTS AND CROSSING,125902.406000,5
1,1 IN 12 (53KG/M) RBM POINTS AND CROSSING,8230.625000,2
2,1 IN 12 (60KG/M) RBM POINTS AND CROSSING,40136.250000,33
3,1 IN 12 (60KG/M) SPRING WING CROSSING,26773.940000,18
4,1 IN 12 (60KG/M) SWING NOSE POINTS AND CROSSING,47786.222909,55
...,...,...,...
31,REPLACE PANDROL E-CLIP FASTENINGS (<10KM FROM ...,184903.414086,93
32,SECTION INSULATORS,27413.282000,20
33,TRACTION POWER SUPPLY TRANSFORMERS,83633.158000,5
34,UPS,44.000000,1


In [97]:
# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_3_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_3_years = df_last_3_years.groupby('Asset Type').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('Asset Type', 'size')
).reset_index()

grouped_last_3_years

Unnamed: 0,Asset Type,Average_Last_3_Years,Sample_Size_Last_3_Years
0,1 IN 12 (47KG/M) FABRICATED POINTS AND CROSSING,125902.406000,5
1,1 IN 12 (53KG/M) FABRICATED POINTS AND CROSSING,30655.857500,4
2,1 IN 12 (53KG/M) RBM POINTS AND CROSSING,9420.830000,6
3,1 IN 12 (60KG/M) RBM POINTS AND CROSSING,44632.890723,83
4,1 IN 12 (60KG/M) SPRING WING CROSSING,25667.624490,49
...,...,...,...
39,REPLACE PANDROL E-CLIP FASTENINGS (<10KM FROM ...,142057.057500,132
40,SECTION INSULATORS,37642.132727,22
41,TRACTION POWER SUPPLY TRANSFORMERS,68045.527000,10
42,UPS,41392.480000,2


In [98]:
# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_5_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_5_years = df_last_5_years.groupby('Asset Type').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('Asset Type', 'size')
).reset_index()

grouped_last_5_years

Unnamed: 0,Asset Type,Average_Last_5_Years,Sample_Size_Last_5_Years
0,1 IN 12 (47KG/M) FABRICATED POINTS AND CROSSING,71096.215556,9
1,1 IN 12 (53KG/M) FABRICATED POINTS AND CROSSING,29344.892857,7
2,1 IN 12 (53KG/M) RBM POINTS AND CROSSING,21808.769167,12
3,1 IN 12 (60KG/M) RBM POINTS AND CROSSING,42726.383178,107
4,1 IN 12 (60KG/M) SPRING WING CROSSING,26741.093284,67
...,...,...,...
40,REPLACE PANDROL E-CLIP FASTENINGS (<10KM FROM ...,142057.057500,132
41,SECTION INSULATORS,37642.132727,22
42,TRACTION POWER SUPPLY TRANSFORMERS,68045.527000,10
43,UPS,41392.480000,2


In [99]:
result_df = pd.merge(grouped_all_time, grouped_previous_year, on='Asset Type', how='left')
result_df = pd.merge(result_df, grouped_last_3_years, on='Asset Type', how='left')
result_df = pd.merge(result_df, grouped_last_5_years, on='Asset Type', how='left')

result_df

Unnamed: 0,Asset Type,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,1 IN 12 (47KG/M) FABRICATED POINTS AND CROSSING,53840.865385,13,125902.406000,5.0,125902.406000,5.0,71096.215556,9.0
1,1 IN 12 (53KG/M) FABRICATED POINTS AND CROSSING,43754.905455,11,,,30655.857500,4.0,29344.892857,7.0
2,1 IN 12 (53KG/M) RBM POINTS AND CROSSING,25101.284000,20,8230.625000,2.0,9420.830000,6.0,21808.769167,12.0
3,1 IN 12 (60KG/M) RBM POINTS AND CROSSING,40374.682887,142,40136.250000,33.0,44632.890723,83.0,42726.383178,107.0
4,1 IN 12 (60KG/M) SPRING WING CROSSING,41477.865843,89,26773.940000,18.0,25667.624490,49.0,26741.093284,67.0
...,...,...,...,...,...,...,...,...,...
41,REPLACE PANDROL E-CLIP FASTENINGS (<10KM FROM ...,177356.831667,180,184903.414086,93.0,142057.057500,132.0,142057.057500,132.0
42,SECTION INSULATORS,35706.427692,26,27413.282000,20.0,37642.132727,22.0,37642.132727,22.0
43,TRACTION POWER SUPPLY TRANSFORMERS,83722.333333,15,83633.158000,5.0,68045.527000,10.0,68045.527000,10.0
44,UPS,41392.480000,2,44.000000,1.0,41392.480000,2.0,41392.480000,2.0


In [100]:
result_df = pd.merge(all_asset_type_df, result_df, on='Asset Type', how='left')
result_df.to_pickle("../pkl/average_renewal_cost.pkl")

result_df

Unnamed: 0,Asset Type,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,SIGNAL GANTRY,,,,,,,,
1,LEVEL CROSSING MONITORS,,,,,,,,
2,TRAIN PROTECTION SYSTEMS (ATP),,,,,,,,
3,WEATHER STATION,,,,,,,,
4,RADIO POLES,,,,,,,,
...,...,...,...,...,...,...,...,...,...
87,AUTOTRANSFORMERS,83722.333333,15.0,83633.158000,5.0,68045.527000,10.0,68045.527000,10.0
88,HARMONIC FILTERS,,,,,,,,
89,TRACTION SCADA,,,,,,,,
90,AUXILIARY SUPPLY TRANSFORMERS,,,,,,,,


In [101]:
all_category_type_df = pd.DataFrame(df2['Category'].unique(), columns=['Category'])

merged_df2 = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FuncLocID')

# Convert CompletionDate column into datetime
merged_df2['CompletionDate'] = pd.to_datetime(merged_df2['CompletionDate'])

# Calculate all time average
grouped_all_time2 = merged_df2.groupby('Category').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('Category', 'size')
).reset_index()

# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year2 = merged_df2[merged_df2['CompletionDate'].dt.year == previous_year]

grouped_previous_year2 = df_previous_year2.groupby('Category').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('Category', 'size')
).reset_index()

# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years2 = merged_df2[(merged_df2['CompletionDate'].dt.year >= last_3_years_start) & (merged_df2['CompletionDate'].dt.year != current_year)]

grouped_last_3_years2 = df_last_3_years2.groupby('Category').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('Category', 'size')
).reset_index()

# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years2 = merged_df2[(merged_df2['CompletionDate'].dt.year >= last_5_years_start) & (merged_df2['CompletionDate'].dt.year != current_year)]

grouped_last_5_years2 = df_last_5_years2.groupby('Category').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('Category', 'size')
).reset_index()

# Join 3 df into 1
result_df2 = pd.merge(grouped_all_time2, grouped_previous_year2, on='Category', how='left')
result_df2 = pd.merge(result_df2, grouped_last_3_years2, on='Category', how='left')
result_df2 = pd.merge(result_df2, grouped_last_5_years2, on='Category', how='left')

result_df2 = pd.merge(all_category_type_df, result_df2, on='Category', how='left')
# result_df2.to_pickle("../pkl/average_renewal_cost.pkl")

result_df2

Unnamed: 0,Category,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Control & Electrical,36221.359314,306,41017.736019,103,33382.60722,241,32578.600161,249
1,Track & Ballast,80980.248479,1940,111122.62467,621,73371.237494,1221,66776.755003,1475
2,Structure & Civils,23003.08432,250,10859.298833,120,12222.082593,189,14059.584948,194
