In [2]:
import pandas as pd
import pyodbc
from datetime import datetime

In [3]:
sql_query = """
    -- all WO and their flocid
    SELECT
        [WorkOrderNumber],
        [OrderType],
        [CompanyCode],
        [FunctionLocation],
        CASE
            WHEN [TechCompletionDate] IS NULL THEN [BasicFinishDate]
            ELSE [TechCompletionDate]
        END AS 'CompletionDate',
        [MaintenanceActivityType],
        [MaintenanceActivityTypeDesc],
        [MainUserStatus],
        [MainUserStatusDesc],
        [ActualTotalCost]
    FROM [myANALYTICS_SP].[bronze.batch.belowrail.asset.ringfenced].[vw_WorkOrder]
    WHERE
        OrderType = 'MW04'
        AND CompanyCode = '5000'
        AND MainUserStatusDesc = 'Practically Completed'
        AND ActualTotalCost IS NOT NULL
        AND ActualTotalCost > 0
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
df1 = pd.read_sql_query(sql_query, conn)

df1.to_pickle("../pkl/work_order_all.pkl")
# Display the DataFrame
df1

  df1 = pd.read_sql_query(sql_query, conn)


Unnamed: 0,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,000080085665,MW04,5000,TO000654,2018-12-19,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,71132.19
1,000080085667,MW04,5000,TO000470,2018-04-04,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,60383.24
2,000080085668,MW04,5000,CP000045,2018-05-08,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,5545.31
3,000080085669,MW04,5000,TO000352,2018-06-01,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,57677.62
4,000080085670,MW04,5000,TO000483,2018-06-05,C16,Turnout Renew Maj Civil Parts,PCOM,Practically Completed,50925.56
...,...,...,...,...,...,...,...,...,...,...
9726,000080117443,MW04,5000,LX003196,2024-06-13,C58,Level Xing Refurb/Renew,PCOM,Practically Completed,14901.08
9727,000080117445,MW04,5000,LX005559,2024-05-29,C58,Level Xing Refurb/Renew,PCOM,Practically Completed,1321.59
9728,000080116605,MW04,5000,GA-01ML,2024-06-06,C21,Track Upgrade,PCOM,Practically Completed,4180.45
9729,000080116606,MW04,5000,BW-04PL,2024-05-01,C63,Joint Renewal,PCOM,Practically Completed,13517.25


In [4]:
sql_query = """
    SELECT DISTINCT
        FLOC_STRNO_FunctionalLocation,
        IFLOT_DATAB_StartupDate
    FROM
    myANALYTICS_SP.[workarea.silver.dimension.enterprise.asset].vw_Dim_FunctionalLocationStartupDate
    WHERE IFLOT_DATAB_StartupDate > 0
    AND FLOC_STRNO_FunctionalLocation IS NOT NULL
    AND FLOC_BUKRS_CompanyCode = '5000'
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';DATABASE=myANALYTICS_SP;Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
startupdate_df = pd.read_sql_query(sql_query, conn)

# df1.to_pickle("../pkl/work_order_all.pkl")
# Display the DataFrame
startupdate_df['IFLOT_DATAB_StartupDate'] = pd.to_datetime(startupdate_df['IFLOT_DATAB_StartupDate'])
startupdate_df

  startupdate_df = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_STRNO_FunctionalLocation,IFLOT_DATAB_StartupDate
0,ST000480-63,2014-01-01
1,ST000850-48,1994-01-01
2,ST000960-87,1987-01-01
3,ST008220-29,2011-11-16
4,ST003870-03,2013-01-01
...,...,...
25938,ST000600-53,2018-07-01
25939,ST000170-61,2014-01-01
25940,ST004120-17,2016-05-29
25941,TO004059,2021-01-01


In [8]:
merged_df3 = pd.merge(startupdate_df, df1, how='left', left_on='FLOC_STRNO_FunctionalLocation', right_on='FunctionLocation')
merged_df3

Unnamed: 0,FLOC_STRNO_FunctionalLocation,IFLOT_DATAB_StartupDate,WorkOrderNumber,OrderType,CompanyCode,FunctionLocation,CompletionDate,MaintenanceActivityType,MaintenanceActivityTypeDesc,MainUserStatus,MainUserStatusDesc,ActualTotalCost
0,ST000480-63,2014-01-01,,,,,,,,,,
1,ST000850-48,1994-01-01,,,,,,,,,,
2,ST000960-87,1987-01-01,,,,,,,,,,
3,ST008220-29,2011-11-16,,,,,,,,,,
4,ST003870-03,2013-01-01,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
25995,ST000600-53,2018-07-01,,,,,,,,,,
25996,ST000170-61,2014-01-01,,,,,,,,,,
25997,ST004120-17,2016-05-29,,,,,,,,,,
25998,TO004059,2021-01-01,,,,,,,,,,


In [6]:
# Getting all FLOC from [vw_Dim_FunctionalLocation] where CompanyCode = 5000 -> df2_no_length
sql_query = """
    SELECT
        FLOC_TPLNR_FunctionalLocationInternalKey,
        FLOC_STRNO_FunctionalLocation,
        FLOC_PLTXT_FunctionalLocationDescr,
        FLOC_TYPTX_FunctionalLocationCategoryDescr,
        FLOC_EQART_TechnicalObjectType,
        FLOC_EARTX_TechnicalObjectTypeDescr,
        FLOC_BUKRS_CompanyCode,
        FLOC_STORT_Location,
        FLOC_STORT_LocationDescr
    FROM [myANALYTICS_SP].[silver.dimension.enterprise.asset].[vw_Dim_FunctionalLocation]
	WHERE FLOC_BUKRS_CompanyCode = '5000'
	ORDER BY FLOC_STRNO_FunctionalLocation
"""

# Define your server name
server_name = 'myanalytics.aurizon.com.au'

# Establish a connection using Windows Authentication
conn = pyodbc.connect('DRIVER={SQL Server};SERVER=' + server_name + ';Trusted_Connection=yes;')

# Execute the SQL query and load the result into a pandas DataFrame
all_floc = pd.read_sql_query(sql_query, conn)

# Display the DataFrame
all_floc

  all_floc = pd.read_sql_query(sql_query, conn)


Unnamed: 0,FLOC_TPLNR_FunctionalLocationInternalKey,FLOC_STRNO_FunctionalLocation,FLOC_PLTXT_FunctionalLocationDescr,FLOC_TYPTX_FunctionalLocationCategoryDescr,FLOC_EQART_TechnicalObjectType,FLOC_EARTX_TechnicalObjectTypeDescr,FLOC_BUKRS_CompanyCode,FLOC_STORT_Location,FLOC_STORT_LocationDescr
0,?0100000000000387769,AC-01CH,CALLEMONDAH BLUE ROAD 1,Track,10003,Yard Track,5000,CA0089,Callemondah
1,?0100000000000365741,AC-01JN,Jilalan Provisioning 1,Track,10003,Yard Track,5000,JI0204,Jilalan
2,?0100000000000366347,AC-01JN-JNJN,Jilalan Provisioning 1,Track,999999,Lvl 3 Segmentation,5000,JI0204,Jilalan
3,?0100000000000377748,AC-01NA,NOGOA YARD BLUE ROAD TRACK 1,Track,10003,Yard Track,5000,NO0275,Nogoa-Yamala
4,?0100000000000387811,AC-01PA,PARANA BLUE ROAD MAINLINE,Track,10001,Main Line,5000,PA0289,Parana
...,...,...,...,...,...,...,...,...,...
174332,?0100000000000253349,WL000203,Baralaba WB Strain Gauge Equip 4.225Km,Telecommunications,40000,Wayleave Objects,5000,BA0032,Baralaba
174333,?0100000000000253351,WL000204,Baralaba WB Location Box 4.225Km,Telecommunications,40000,Wayleave Objects,5000,BA0032,Baralaba
174334,?0100000000000253372,WL000205,Baralaba WB Location Box 4.165Km,Telecommunications,40000,Wayleave Objects,5000,BA0032,Baralaba
174335,?0100000000000264822,WL999999,Electric WLNL 01CR BYAA16.652Km,Bridges and other Structures,40000,Wayleave Objects,5000,AR0021,Armuna-Buckley


In [9]:
merged_df4 = pd.merge(all_floc, merged_df3, how='right', left_on='FLOC_STRNO_FunctionalLocation', right_on='FLOC_STRNO_FunctionalLocation')
merged_df4 = merged_df4[['FLOC_STRNO_FunctionalLocation', 'IFLOT_DATAB_StartupDate']]

merged_df4 = merged_df4[merged_df4['IFLOT_DATAB_StartupDate'].notnull()]
merged_df4

Unnamed: 0,FLOC_STRNO_FunctionalLocation,IFLOT_DATAB_StartupDate
0,ST000480-63,2014-01-01
1,ST000850-48,1994-01-01
2,ST000960-87,1987-01-01
3,ST008220-29,2011-11-16
4,ST003870-03,2013-01-01
...,...,...
26001,ST000600-53,2018-07-01
26002,ST000170-61,2014-01-01
26003,ST004120-17,2016-05-29
26004,TO004059,2021-01-01
