In [1]:
import pandas as pd
import pyodbc

sql_query = """
    -- average cost per renewal work order,
    WITH work_order AS (
        SELECT
            AUFNR_Order,
            TPLNR_FunctionalLocationInternalID,
            AUART_OrderTypeID,
            ActualCost
        FROM [myANALYTICS_SP].[silver.fact.enterprise.asset].[vw_Fact_WorkOrder]
        WHERE
            BUKRS_CompanyCodeID = '5000'
            AND ActualCost > 0
            AND AUART_OrderTypeID = 'MW04'
            AND TPLNR_FunctionalLocationInternalID IS NOT NULL
    ),
    all_floc AS (
        SELECT
            FLOC_TPLNR_FunctionalLocationInternalKey,
            FLOC_STRNO_FunctionalLocation,
            FLOC_EARTX_TechnicalObjectTypeDescr,
            FLOC_BUKRS_CompanyCode
        FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
        WHERE FLOC_BUKRS_CompanyCode = '5000'
    )

    SELECT
        SubQuery.FLOC_EARTX_TechnicalObjectTypeDescr AS 'AssetType',
        AVG(ActualCost) AS 'AverageCost'
    FROM (
        SELECT
            work_order.AUFNR_Order,
            all_floc.FLOC_STRNO_FunctionalLocation,
            all_floc.FLOC_EARTX_TechnicalObjectTypeDescr,
            ActualCost
        FROM work_order
            LEFT JOIN all_floc ON all_floc.FLOC_TPLNR_FunctionalLocationInternalKey = work_order.TPLNR_FunctionalLocationInternalID
    ) AS SubQuery
    GROUP BY SubQuery.FLOC_EARTX_TechnicalObjectTypeDescr
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

df1.to_pickle("pkl/asset_renewal_work_order_based.pkl")
# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,AssetType,AverageCost
0,LV Power Systems,6787.946666
1,CER,9182.693333
2,Lvl5 Feature Locatio,13297.018651
3,Catchpoints,11164.62
4,Level Crossing,10570.795
5,Insulators,1872.5
6,Loading Facility,163794.430571
7,Standby Supply,21833.6825
8,GIJ,9774.779305
9,Bridges,116273.18


In [2]:
import pandas as pd
import pyodbc

sql_query = """
    SELECT DISTINCT
        FLOC_EARTX_TechnicalObjectTypeDescr
    FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
    WHERE FLOC_BUKRS_CompanyCode = '5000'
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df2 = pd.read_sql_query(sql_query, conn)

# df1.to_pickle("pkl/asset_renewal_work_order_based.pkl")
# Display the DataFrame
df2

  df2 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr
0,LV Power Systems
1,Mechanical Signal
2,DED Systems
3,DC-DC Converter
4,CER
...,...
252,Rail Handling Crane
253,Ballast Cleaning Mch
254,Wagon: Hopper
255,Equip: Trimming


In [4]:
merged_df = pd.merge(df2, df1, how='left', left_on='FLOC_EARTX_TechnicalObjectTypeDescr', right_on='AssetType')
merged_df = merged_df[['FLOC_EARTX_TechnicalObjectTypeDescr', 'AverageCost']].sort_values(by='FLOC_EARTX_TechnicalObjectTypeDescr')
merged_df

Unnamed: 0,FLOC_EARTX_TechnicalObjectTypeDescr,AverageCost
135,2M Digital Branch,
224,2M VF Mux,
203,8M Mux,
35,AC-DC Converter,
59,AC/DC Track,
...,...,...
238,Wheelset,
75,Wireless Access Pt,
126,Yard Track,131540.490833
32,ZRS,
