# JDE ETL Source Design
## Goal:  Generate source SQL with friendly names and built-in data Conversion
1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals
2. Pull *Specific* Table fields
3. Create SQL mapiing pull with data-conversion

In [1]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os, sys
import warnings

warnings.filterwarnings('ignore')

In [2]:
from decouple import config
config.search_path = '/home/jovyan/work'

### Connect to SQL DB

In [3]:
engine = create_engine(config('DATABASE_URL'))

### 1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals

In [4]:
sql_field_meta_server = 'ESYS_PROD'
sql_field_meta_lib = 'ARCPCOM71'

In [5]:
sql_field_meta = '''
SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY ({}, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		{}.F9210 t
		LEFT JOIN {}.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')

'''.format(sql_field_meta_server, sql_field_meta_lib, sql_field_meta_lib)

In [6]:
print(sql_field_meta)


SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY (ESYS_PROD, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		ARCPCOM71.F9210 t
		LEFT JOIN ARCPCOM71.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')




In [7]:
df_field_meta = pd.read_sql_query(sql_field_meta, engine);

In [8]:
df_field_meta.iloc[:,[2,3,4]] = df_field_meta.iloc[:,[2,3,4]].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_field_meta.fillna(value=0,inplace=True)

### 2. Pull *Specific* Table fields

#  Set Table Name HERE

In [9]:
sql_table = 'F4211'
#sql_table = 'F060116'
#sql_table = 'RPTRULES'

sql_link_server = 'ESYS_PROD'
#sql_lib = 'ARCPTEL'
sql_lib = 'ARCPDTA71'
#sql_lib = 'ESEND'
#sql_lib = 'HSIPDTA71'


stage_db_schema = 'Integration.'
convert_julian_date = True

# [ARC | HSI] [P | D] [DTA | CDC] [ 71 | 94]


In [10]:
sql_table_fields = '''

SELECT * from OPENQUERY ({}, '
	SELECT
		*
	FROM
		QSYS2.SYSCOLUMNS
	WHERE
        TABLE_SCHEMA = ''{}'' AND
		TABLE_NAME in( ''{}'')
    ORDER BY 
        ORDINAL_POSITION
')

''' .format(sql_link_server, sql_lib, sql_table)

In [11]:
#print (sql_table_fields)

In [12]:
df_table_fields = pd.read_sql_query(sql_table_fields, engine);

#### Join table fields with data dictionary meta-data

In [13]:
#df_table_fields

In [14]:
df_table_fields = df_table_fields[['ORDINAL_POSITION', 'COLUMN_NAME', 'COLUMN_TEXT', 'DATA_TYPE','LENGTH', 'NUMERIC_PRECISION']]

In [15]:
df_table_fields['data_item'] = df_table_fields.COLUMN_NAME.str[2:]

In [16]:
df_table_fields.head()

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item
0,1,SDKCOO,Order Company. . . . . . . . . . . . . .,CHAR,5,,KCOO
1,2,SDDOCO,Order Number . . . . . . . . . . . . . .,NUMERIC,8,8.0,DOCO
2,3,SDDCTO,Order Type . . . . . . . . . . . . . . .,CHAR,2,,DCTO
3,4,SDLNID,Line Number. . . . . . . . . . . . . . .,DECIMAL,6,6.0,LNID
4,5,SDSFXO,Order Suffix . . . . . . . . . . . . . .,CHAR,3,,SFXO


In [17]:
df_table_fields.data_item.unique()

array(['KCOO', 'DOCO', 'DCTO', 'LNID', 'SFXO', 'SFX', 'MCU', 'CO', 'OKCO',
       'OORN', 'OCTO', 'OGNO', 'RKCO', 'RORN', 'RCTO', 'RLLN', 'DMCT',
       'DMCS', 'BALU', 'AN8', 'SHAN', 'MKFR', 'BTAN', 'PA8', 'DRQJ',
       'TRDJ', 'PDDJ', 'OPDJ', 'ADDJ', 'IVD', 'CNDJ', 'DGL', 'RSDJ',
       'PEFJ', 'PPDJ', 'RQSJ', 'ADLJ', 'DRQT', 'RSDT', 'VR01', 'VR02',
       'VR03', 'ITM', 'LITM', 'AITM', 'MERL', 'LOCN', 'LOTN', 'FRGD',
       'THGD', 'ACGD', 'FRMP', 'THRP', 'APOT', 'EXDP', 'DSC1', 'DSC2',
       'LNTY', 'NXTR', 'LTTR', 'HOLD', 'EMCU', 'HDBU', 'DMBU', 'RLIT',
       'KTLN', 'CPNT', 'RKIT', 'KTP', 'CSID', 'SRP1', 'SRP2', 'SRP3',
       'SRP4', 'SRP5', 'PRP1', 'PRP2', 'PRP3', 'PRP4', 'PRP5', 'DMS1',
       'DMT1', 'UOM', 'UORG', 'SOQS', 'SOBK', 'SOCN', 'SONE', 'UOPN',
       'QTYT', 'QRLV', 'COMM', 'OTQY', 'BCRC', 'UPRC', 'AEXP', 'AOPN',
       'PROV', 'TPC', 'APUM', 'LPRC', 'UNCS', 'ECST', 'CSTO', 'TCST',
       'INMG', 'PTC', 'RYIN', 'DTBS', 'TRDC', 'FUN2', 'ASN', 'OSEQ',
       'PRGR

In [18]:
dff = pd.merge(df_table_fields,df_field_meta,on='data_item', how='left')

#### Cleanup final field name
remove trailing dots, special characters, and converto to lower_case

In [19]:
dff['row_description_final'] = dff.row_description.str.rstrip('. ').str.replace(r'%','pct').str.replace(r'$','amt').str.replace(r'[^0-9|a-z|" "]','', case=False).str.replace('  ',' ').str.lower().str.replace(' ','_')

#### Override with Defaults

In [20]:
dff['row_description_final'][dff['data_item']=='LITM'] = 'item_number'
dff['row_description_final'][dff['data_item']=='AN8'] = 'billto'
dff['row_description_final'][dff['data_item']=='SHAN'] = 'shipto'
dff['row_description_final'][dff['data_item']=='DOCO'] = 'salesorder_number'

### 3. Create SQL mapping pull with data-conversion

In [21]:
dff.groupby(['DATA_TYPE', 'data_type','display_decimals'])['ORDINAL_POSITION'].count()

DATA_TYPE  data_type  display_decimals
CHAR       1.0        0.0                  74
           2.0        0.0                 115
DECIMAL    9.0        0.0                  17
                      2.0                   6
                      3.0                  10
                      4.0                  12
NUMERIC    9.0        0.0                  36
                      1.0                   1
                      3.0                   1
           11.0       0.0                  20
Name: ORDINAL_POSITION, dtype: int64

In [22]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,SDKCOO,Order Company. . . . . . . . . . . . . .,CHAR,5,,KCOO,A,2.0,5.0,0.0,Order Number Document Company. . . . . .,order_number_document_company
1,2,SDDOCO,Order Number . . . . . . . . . . . . . .,NUMERIC,8,8.0,DOCO,S,9.0,8.0,0.0,Order Number . . . . . . . . . . . . . .,salesorder_number
2,3,SDDCTO,Order Type . . . . . . . . . . . . . . .,CHAR,2,,DCTO,A,2.0,2.0,0.0,Order Type . . . . . . . . . . . . . . .,order_type
3,4,SDLNID,Line Number. . . . . . . . . . . . . . .,DECIMAL,6,6.0,LNID,P,9.0,6.0,3.0,Line Number. . . . . . . . . . . . . . .,line_number
4,5,SDSFXO,Order Suffix . . . . . . . . . . . . . .,CHAR,3,,SFXO,A,2.0,3.0,0.0,Order Suffix . . . . . . . . . . . . . .,order_suffix
5,6,SDSFX,Pay Item . . . . . . . . . . . . . . . .,CHAR,3,,SFX,A,2.0,3.0,0.0,Pay Item . . . . . . . . . . . . . . . .,pay_item
6,7,SDMCU,Business Unit. . . . . . . . . . . . . .,CHAR,12,,MCU,A,2.0,12.0,0.0,Business Unit. . . . . . . . . . . . . .,business_unit
7,8,SDCO,Company. . . . . . . . . . . . . . . . .,CHAR,5,,CO,A,2.0,5.0,0.0,Company. . . . . . . . . . . . . . . . .,company
8,9,SDOKCO,Original Order Document Company. . . . .,CHAR,5,,OKCO,A,2.0,5.0,0.0,Original Order Document Company. . . . .,original_order_document_company
9,10,SDOORN,Original Order Number. . . . . . . . . .,CHAR,8,,OORN,A,2.0,8.0,0.0,Original Order Number. . . . . . . . . .,original_order_number


In [23]:
def field_format_sel(col_name, col_type, col_dec):
    val =''

    if col_type == 9 :
        if col_dec > 0 :
            val = 'CAST(({})/{} AS DEC({},{})) AS {}'.format(col_name,10**col_dec,15,np.int(col_dec),col_name)
        else :
            val = col_name
    elif  col_type == 11 : 
        if convert_julian_date :
            val = 'CASE WHEN {} IS NOT NULL THEN DATE(DIGITS(DEC({}+ 1900000,7,0))) ELSE NULL END AS {}'.format(col_name, col_name, col_name)
        else :
            val = '{} as {}'.format(col_name,col_name)
    else : 
        val = col_name
    return val;

def field_format_map(col_name, col_descr, is_etl = False):
    val =''

    col_name_format = '{message:{fill}{align}{width}}'.format(message=col_name, fill='_', align='<', width=6)
    
    col_descr_format = col_descr
    if str.isdigit(col_descr_format[0]) :
        col_descr_format = "_" + col_descr_format
        
    if is_etl :
        val = '{}_{} AS {}'.format(col_name_format, col_descr_format, col_descr_format)
    else :
        val = '"{}" AS {}_{}'.format(col_name, col_name_format, col_descr_format)

    return val;



sql_field_map = ', '.join([field_format_map(x,y)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])

sql_field_etl = ', '.join([field_format_map(x,y,is_etl=True)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])
#
# ok
sql_field_sel = ', '.join([ field_format_sel(col_name, col_type, col_dec) \
                           for col_name, col_type, col_dec in zip(dff['COLUMN_NAME'], dff['data_type'], dff['display_decimals'])])


In [24]:
sql_field_sel

'SDKCOO, SDDOCO, SDDCTO, CAST((SDLNID)/1000.0 AS DEC(15,3)) AS SDLNID, SDSFXO, SDSFX, SDMCU, SDCO, SDOKCO, SDOORN, SDOCTO, CAST((SDOGNO)/1000.0 AS DEC(15,3)) AS SDOGNO, SDRKCO, SDRORN, SDRCTO, CAST((SDRLLN)/1000.0 AS DEC(15,3)) AS SDRLLN, SDDMCT, SDDMCS, SDBALU, SDAN8, SDSHAN, SDMKFR, SDBTAN, SDPA8, CASE WHEN SDDRQJ IS NOT NULL THEN DATE(DIGITS(DEC(SDDRQJ+ 1900000,7,0))) ELSE NULL END AS SDDRQJ, CASE WHEN SDTRDJ IS NOT NULL THEN DATE(DIGITS(DEC(SDTRDJ+ 1900000,7,0))) ELSE NULL END AS SDTRDJ, CASE WHEN SDPDDJ IS NOT NULL THEN DATE(DIGITS(DEC(SDPDDJ+ 1900000,7,0))) ELSE NULL END AS SDPDDJ, CASE WHEN SDOPDJ IS NOT NULL THEN DATE(DIGITS(DEC(SDOPDJ+ 1900000,7,0))) ELSE NULL END AS SDOPDJ, CASE WHEN SDADDJ IS NOT NULL THEN DATE(DIGITS(DEC(SDADDJ+ 1900000,7,0))) ELSE NULL END AS SDADDJ, CASE WHEN SDIVD IS NOT NULL THEN DATE(DIGITS(DEC(SDIVD+ 1900000,7,0))) ELSE NULL END AS SDIVD, CASE WHEN SDCNDJ IS NOT NULL THEN DATE(DIGITS(DEC(SDCNDJ+ 1900000,7,0))) ELSE NULL END AS SDCNDJ, CASE WHEN SDDGL 

In [25]:
sql_table_map = '''

--------------------------------------------------------------------------------
-- DROP TABLE Integration.{}_{}_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    {} 
    {} 

-- INTO {}{}_{}_<instert_friendly_name_here>

FROM 
    OPENQUERY ({}, '

	SELECT
		{}

	FROM
		{}.{}
--    WHERE
--        <insert custom code here>
--    ORDER BY
--        <insert custom code here>
')

--------------------------------------------------------------------------------

'''.format(sql_lib, sql_table, 'Top 5', sql_field_map, stage_db_schema, sql_lib, sql_table, sql_link_server, sql_field_sel, sql_lib, sql_table)




### Output Table
1. Use to create STAGE via SQL link table 
1. Note that some Julian _JDT conversion will need to be converted Post 

In [26]:
print(sql_table_map)



--------------------------------------------------------------------------------
-- DROP TABLE Integration.ARCPDTA71_F4211_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    Top 5 
    "SDKCOO" AS SDKCOO_order_number_document_company, "SDDOCO" AS SDDOCO_salesorder_number, "SDDCTO" AS SDDCTO_order_type, "SDLNID" AS SDLNID_line_number, "SDSFXO" AS SDSFXO_order_suffix, "SDSFX" AS SDSFX__pay_item, "SDMCU" AS SDMCU__business_unit, "SDCO" AS SDCO___company, "SDOKCO" AS SDOKCO_original_order_document_company, "SDOORN" AS SDOORN_original_order_number, "SDOCTO" AS SDOCTO_original_order_type, "SDOGNO" AS SDOGNO_original_line_number, "SDRKCO" AS SDRKCO_related_order_key_company, "SDRORN" AS SDRORN_related_order_number, "SDRCTO" AS SDRCTO_related_order_type, "SDRLLN" AS SDRLLN_related_poso_line_number, "SDDMCT" AS SDDMCT_contract_number, "SDDMCS" AS SDDMCS_contract_supplement, "SDBALU" AS SDBALU_contract_balances_updated_yn

In [27]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,SDKCOO,Order Company. . . . . . . . . . . . . .,CHAR,5,,KCOO,A,2.0,5.0,0.0,Order Number Document Company. . . . . .,order_number_document_company
1,2,SDDOCO,Order Number . . . . . . . . . . . . . .,NUMERIC,8,8.0,DOCO,S,9.0,8.0,0.0,Order Number . . . . . . . . . . . . . .,salesorder_number
2,3,SDDCTO,Order Type . . . . . . . . . . . . . . .,CHAR,2,,DCTO,A,2.0,2.0,0.0,Order Type . . . . . . . . . . . . . . .,order_type
3,4,SDLNID,Line Number. . . . . . . . . . . . . . .,DECIMAL,6,6.0,LNID,P,9.0,6.0,3.0,Line Number. . . . . . . . . . . . . . .,line_number
4,5,SDSFXO,Order Suffix . . . . . . . . . . . . . .,CHAR,3,,SFXO,A,2.0,3.0,0.0,Order Suffix . . . . . . . . . . . . . .,order_suffix
5,6,SDSFX,Pay Item . . . . . . . . . . . . . . . .,CHAR,3,,SFX,A,2.0,3.0,0.0,Pay Item . . . . . . . . . . . . . . . .,pay_item
6,7,SDMCU,Business Unit. . . . . . . . . . . . . .,CHAR,12,,MCU,A,2.0,12.0,0.0,Business Unit. . . . . . . . . . . . . .,business_unit
7,8,SDCO,Company. . . . . . . . . . . . . . . . .,CHAR,5,,CO,A,2.0,5.0,0.0,Company. . . . . . . . . . . . . . . . .,company
8,9,SDOKCO,Original Order Document Company. . . . .,CHAR,5,,OKCO,A,2.0,5.0,0.0,Original Order Document Company. . . . .,original_order_document_company
9,10,SDOORN,Original Order Number. . . . . . . . . .,CHAR,8,,OORN,A,2.0,8.0,0.0,Original Order Number. . . . . . . . . .,original_order_number


In [28]:
%time df_table_map = pd.read_sql_query(sql_table_map, engine);

CPU times: user 86.9 ms, sys: 412 µs, total: 87.3 ms
Wall time: 4.16 s


### Next steps...
Add SQL to SQL Tools data package 

In [29]:
df_table_map

Unnamed: 0,SDKCOO_order_number_document_company,SDDOCO_salesorder_number,SDDCTO_order_type,SDLNID_line_number,SDSFXO_order_suffix,SDSFX__pay_item,SDMCU__business_unit,SDCO___company,SDOKCO_original_order_document_company,SDOORN_original_order_number,...,SDABR4_enh_subledger_code_4,SDABT4_enh_subledger_type_4,SDTORG_transaction_originator,SDUSER_user_id,SDPID__program_id,SDJOBN_work_station_id,SDUPMJ_date_updated,SDTDAY_time_of_day,SDUPAJ_date_added,SDTENT_time_entered
0,2000,1056020.0,CA,3.0,0,,20099990011,2000,,,...,,,SCHART,ARCONA,X5723,ECS_PNDSRV,2018-03-16,144439.0,,0.0
1,2000,1056026.0,CA,2.0,0,,20099990012,2000,,,...,,,SMAT05,ARCONA,X5723,ECS_PNDSRV,2018-03-16,150701.0,,0.0
2,2000,1056028.0,CA,2.0,0,,20099990016,2000,,,...,,,VPENNE,ARCONA,X5723,ECS_PNDSRV,2018-03-16,151050.0,,0.0
3,2000,1056035.0,CA,2.0,0,,20099990011,2000,,,...,,,NROY,ARCONA,X5723,ECS_PNDSRV,2018-03-16,152742.0,,0.0
4,2000,1056057.0,CA,2.0,0,,20001000000,2000,,,...,,,PCON01,ARCONA,X5723,ECS_PNDSRV,2018-03-16,155711.0,,0.0
