In [14]:
import pandas as pd
import pyodbc
from datetime import datetime

In [15]:
sql_query = """
    -- all WO and their flocid
    SELECT
        [WorkOrderNumber],
        [OrderType],
        [CompanyCode],
        [FunctionLocation],
        CASE
            WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
            ELSE [TechCompletionDate]
        END AS 'CompletionDate',
        [MaintenanceActivityType],
        [MaintenanceActivityTypeDesc],
        [MainUserStatus],
        [MainUserStatusDesc],
        [ActualTotalCost],
        [WBSElementSAP]
    FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
    WHERE
        [MaintenanceActivityType] IN
            (
                'C01', 'C14', 'C20', 'C02', 'C13', 'C03', 'C19', 'C23', 'C25', 'C26', 'C10', -- Mechanised Track Maintenance
                'C29', 'C31', 'T31', 'C37', 'C08', 'C47', 'C54', 'C43', 'C51', 'C53', 'C57', 'C70', 'C72', 'C50', 'C10', 'C30', 'C48', 'C54', 'C52', 'C06', 'C07', 'C44', 'C01', 'C41', -- General Track Maintenance
                'NIP', 'NRP', 'B50', 'B53', 'C67', 'B04', 'B05', 'B06', 'B55', 'B57', -- Structures Maintenance
                'T28', 'T29', 'T33', 'T40', 'T41', 'T58', 'T44', 'T45', 'T34', 'T46', 'T47', 'T48', 'T54', 'T42', 'T43', -- Control Systems - Signalling and Wayside Maintenance
                'T10', 'T11', 'T32', -- Control Systems - Telecommunications Maintenance
                'C54', 'T32', -- Control Systems - Operational Systems Maintenance
                'T26', 'T27', 'NSV', 'T32', 'T24', 'T25', 'E31' -- Traction Power Maintenance
            )
        AND [CompanyCode] = '5000'
        AND [MainUserStatusDesc] = 'Practically Completed'
        AND [ActualTotalCost] IS NOT NULL
        AND [ActualTotalCost] > 0
        AND OrderType <> 'MW04'
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,000058006897,NW03,5000,NC-02ML,2017-12-29,NIP,Inspect,PCOM,Practically Completed,480.00
1,000058006898,NW03,5000,NC-03ML,2018-01-12,NIP,Inspect,PCOM,Practically Completed,780.00
2,000058006901,NW03,5000,BW-03ML,2017-11-30,NIP,Inspect,PCOM,Practically Completed,2672.50
3,000058006902,NW03,5000,BW-01ML,2018-01-12,NIP,Inspect,PCOM,Practically Completed,2750.00
4,000058006903,NW03,5000,BW-13ML,2017-11-30,NIP,Inspect,PCOM,Practically Completed,250.00
...,...,...,...,...,...,...,...,...,...,...
323393,000058105639,NW03,5000,ST003030-02,2020-11-18,NSV,Service,PCOM,Practically Completed,554.15
323394,000058105640,NW03,5000,TO000790-01,2020-05-12,NSV,Service,PCOM,Practically Completed,602.38
323395,000058105641,NW03,5000,TO000790-02,2020-05-12,NSV,Service,PCOM,Practically Completed,301.19
323396,000058105642,NW03,5000,TO000791-01,2020-05-12,NSV,Service,PCOM,Practically Completed,602.38


In [20]:
# Getting all FLOC from [vw_Dim_FunctionalLocation] where CompanyCode = 5000 -> df2_no_length
sql_query = """
    SELECT
        FLOC_STRNO_FunctionalLocation,
        FLOC_PLTXT_FunctionalLocationDescr,
        FLOC_EARTX_TechnicalObjectTypeDescr,
        FLOC_STORT_LocationDescr
    FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
	WHERE
        FLOC_BUKRS_CompanyCode = '5000'
        AND FLOC_EARTX_TechnicalObjectTypeDescr = 'Substation'
	ORDER BY FLOC_STRNO_FunctionalLocation
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df2 = pd.read_sql_query(sql_query, conn)

# Specify the file path and sheet name
file_path = '../xlsx/substation_maintenance.xlsx'
sheet_name = 'all_substations'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='w') as writer:  # mode='a' for append, use mode='w' to write a new file
    df2.to_excel(writer, sheet_name=sheet_name, index=False)

df2

  df2 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
0,ST000020-08,Wycarbah FS,Substation,Wycarbah
1,ST000040-12,Dingo TSC,Substation,Dingo
2,ST000050-08,Bluff FS,Substation,Bluff
3,ST000070-06,Burngrove TCU,Substation,Burngrove
4,ST000090-04,Red Rock TSC,Substation,Red Rock
...,...,...,...,...
199,ST009500-03,KMMO/AT1 41.100Km PSC,Substation,Kenmare-Memooloo
200,ST009510-03,MOSE/AT1 67.500Km PSC,Substation,Memooloo-Starlee
201,ST009520-01,MOSE/AT2 78.650Km PSC,Substation,Starlee
202,ST009530-03,SERM/AT2 103.650Km PSC,Substation,Rolleston Mine


In [21]:
merged_df = pd.merge(df1, df2, how='left', left_on='FunctionLocation', right_on='FLOC_STRNO_FunctionalLocation')
merged_df = merged_df[merged_df['FLOC_EARTX_TechnicalObjectTypeDescr'].notna()]

# # Write the DataFrame to an Excel file
# file_path = '../xlsx/all_wo_substations_only.xlsx'
# merged_df.to_excel(file_path, index=False)

merged_df

Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_STORT_LocationDescr
453,000056012902,NW01,5000,ST009450-23,2019-03-10,NIP,Inspect,PCOM,Practically Completed,870.00,ST009450-23,Struan Road FS,Substation,Struan Road
622,000057080543,NW02,5000,ST007760-38,2023-08-30,NIP,Inspect,PCOM,Practically Completed,195.49,ST007760-38,Edungalba TSC,Substation,Edungalba
1547,000058233497,NW03,5000,ST000150-09,2023-08-03,NSV,Service,PCOM,Practically Completed,521.31,ST000150-09,Raglan FS,Substation,Raglan
1548,000058233498,NW03,5000,ST008450-49,2023-08-03,NSV,Service,PCOM,Practically Completed,521.31,ST008450-49,Raglan CS,Substation,Raglan
1549,000058233499,NW03,5000,ST001860-03,2023-12-21,NSV,Service,PCOM,Practically Completed,587.17,ST001860-03,AA MI PSC,Substation,Aldoga
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319504,000057091106,NW02,5000,ST002060-02,2024-06-06,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,377.59,ST002060-02,MZ AT PSC,Substation,Mackenzie
319506,000057091107,NW02,5000,ST007520-02,2024-06-06,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,314.53,ST007520-02,YY MI PSC,Substation,Yan Yan
320528,000056046286,NW01,5000,ST009450-23,2024-06-02,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,57.76,ST009450-23,Struan Road FS,Substation,Struan Road
321252,000057091969,NW02,5000,ST001760-01,2024-06-13,E31,Feed Stns&Trck Sect Cabin Mtce,PCOM,Practically Completed,913.02,ST001760-01,SI 65.14 Km PSC,Substation,Saraji


In [18]:
all_maint_activity_code = merged_df[['MaintenanceActivityType', 'MaintenanceActivityTypeDesc']].drop_duplicates()
all_maint_activity_code

Unnamed: 0,MaintenanceActivityType,MaintenanceActivityTypeDesc
453,NIP,Inspect
1547,NSV,Service
1728,E31,Feed Stns&Trck Sect Cabin Mtce
5559,NRP,Repair
8622,T32,Power Systems Control Traction
8624,C44,Fire & Vegetation Management
239966,T02,Overhead Maintenance


In [22]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
avg_maint_cost_per_activity_type = merged_df.groupby('MaintenanceActivityType').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('MaintenanceActivityType', 'size')
).reset_index()

avg_maint_cost_per_activity_type = pd.merge(avg_maint_cost_per_activity_type, all_maint_activity_code, how='left', left_on='MaintenanceActivityType', right_on='MaintenanceActivityType')
avg_maint_cost_per_activity_type = avg_maint_cost_per_activity_type[['MaintenanceActivityType', 'MaintenanceActivityTypeDesc', 'Average_ActualTotalCost_All_Time', 'Sample_Size_All_Time']]

# Specify the file path and sheet name
file_path = '../xlsx/substation_maintenance.xlsx'
sheet_name = 'avg_maint_cost_per_activity_type'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    avg_maint_cost_per_activity_type.to_excel(writer, sheet_name=sheet_name, index=False)

avg_maint_cost_per_activity_type



Unnamed: 0,MaintenanceActivityType,MaintenanceActivityTypeDesc,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,C44,Fire & Vegetation Management,2078.82,1
1,E31,Feed Stns&Trck Sect Cabin Mtce,1346.800074,406
2,NIP,Inspect,2362.04907,215
3,NRP,Repair,3274.054067,359
4,NSV,Service,797.31817,2541
5,T02,Overhead Maintenance,1385.11,1
6,T32,Power Systems Control Traction,1268.085,10


In [23]:
# Convert CompletionDate column into datetime
merged_df['CompletionDate'] = pd.to_datetime(merged_df['CompletionDate'])
# merged_df['Asset Type'] = merged_df['Asset Type'].fillna('Unknown')

# Calculate all time average
grouped_all_time = merged_df.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_All_Time=('ActualTotalCost', 'mean'),
    Sample_Size_All_Time=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_all_time

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time
0,Substation,1209.215126,3533


In [24]:
# Calculate current year average
current_year = datetime.now().year

df_current_year = merged_df[merged_df['CompletionDate'].dt.year == current_year]

grouped_current_year = df_current_year.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_current_Year=('ActualTotalCost', 'mean'),
    Sample_Size_current_Year=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_current_year

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_current_Year,Sample_Size_current_Year
0,Substation,1371.981931,725


In [25]:
# Calculate last year average
current_year = datetime.now().year
previous_year = current_year - 1

df_previous_year = merged_df[merged_df['CompletionDate'].dt.year == previous_year]

grouped_previous_year = df_previous_year.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_ActualTotalCost_Previous_Year=('ActualTotalCost', 'mean'),
    Sample_Size_Previous_Year=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_previous_year

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year
0,Substation,1110.978073,1240


In [26]:
# Calculate last 3 years average
last_3_years_start = current_year - 3

df_last_3_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_3_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_3_years = df_last_3_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_3_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_3_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_3_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_3_Years,Sample_Size_Last_3_Years
0,Substation,1156.344664,2742


In [27]:
# Calculate last 5 years average
last_5_years_start = current_year - 5

df_last_5_years = merged_df[(merged_df['CompletionDate'].dt.year >= last_5_years_start) & (merged_df['CompletionDate'].dt.year != current_year)]

grouped_last_5_years = df_last_5_years.groupby('FLOC_EARTX_TechnicalObjectTypeDescr').agg(
    Average_Last_5_Years=('ActualTotalCost', 'mean'),
    Sample_Size_Last_5_Years=('FLOC_EARTX_TechnicalObjectTypeDescr', 'size')
).reset_index()

grouped_last_5_years

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Substation,1154.562914,2780


In [28]:
result_df = pd.merge(grouped_all_time, grouped_current_year, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_previous_year, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_3_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')
result_df = pd.merge(result_df, grouped_last_5_years, on='FLOC_EARTX_TechnicalObjectTypeDescr', how='left')

# Specify the file path and sheet name
file_path = '../xlsx/substation_maintenance.xlsx'
sheet_name = 'avg_maint_cost'

# Write the DataFrame to a specific sheet in the Excel file
with pd.ExcelWriter(file_path, engine='openpyxl', mode='a') as writer:  # mode='a' for append, use mode='w' to write a new file
    result_df.to_excel(writer, sheet_name=sheet_name, index=False)

result_df



Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,Average_ActualTotalCost_All_Time,Sample_Size_All_Time,Average_ActualTotalCost_current_Year,Sample_Size_current_Year,Average_ActualTotalCost_Previous_Year,Sample_Size_Previous_Year,Average_Last_3_Years,Sample_Size_Last_3_Years,Average_Last_5_Years,Sample_Size_Last_5_Years
0,Substation,1209.215126,3533,1371.981931,725,1110.978073,1240,1156.344664,2742,1154.562914,2780
