# JDE ETL Source Design
## Goal:  Generate source SQL with friendly names and built-in data Conversion
1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals
2. Pull *Specific* Table fields
3. Create SQL mapiing pull with data-conversion

In [30]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os, sys
import warnings

warnings.filterwarnings('ignore')

In [31]:
from decouple import config
config.search_path = '/home/jovyan/work'

### Connect to SQL DB

In [32]:
engine = create_engine(config('DATABASE_URL'))

### 1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals

In [33]:
sql_field_meta_server = 'ESYS_PROD'
sql_field_meta_lib = 'ARCPCOM71'

In [34]:
sql_field_meta = '''
SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY ({}, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		{}.F9210 t
		LEFT JOIN {}.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')

'''.format(sql_field_meta_server, sql_field_meta_lib, sql_field_meta_lib)

In [35]:
print(sql_field_meta)


SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY (ESYS_PROD, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		ARCPCOM71.F9210 t
		LEFT JOIN ARCPCOM71.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')




In [36]:
df_field_meta = pd.read_sql_query(sql_field_meta, engine);

In [37]:
df_field_meta.iloc[:,[2,3,4]] = df_field_meta.iloc[:,[2,3,4]].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_field_meta.fillna(value=0,inplace=True)

### 2. Pull *Specific* Table fields

#  Set Table Name HERE

In [38]:
sql_table = 'F5698'
#sql_table = 'F060116'
#sql_table = 'RPTRULES'

sql_link_server = 'ESYS_PROD'
#sql_lib = 'ARCPTEL'
sql_lib = 'ARCPDTA71'
#sql_lib = 'ESEND'
#sql_lib = 'HSIPDTA71'


stage_db_schema = 'Integration.'
convert_julian_date = True

# [ARC | HSI] [P | D] [DTA | CDC] [ 71 | 94]


In [39]:
sql_table_fields = '''

SELECT * from OPENQUERY ({}, '
	SELECT
		*
	FROM
		QSYS2.SYSCOLUMNS
	WHERE
        TABLE_SCHEMA = ''{}'' AND
		TABLE_NAME in( ''{}'')
    ORDER BY 
        ORDINAL_POSITION
')

''' .format(sql_link_server, sql_lib, sql_table)

In [40]:
#print (sql_table_fields)

In [41]:
df_table_fields = pd.read_sql_query(sql_table_fields, engine);

#### Join table fields with data dictionary meta-data

In [42]:
#df_table_fields

In [43]:
df_table_fields = df_table_fields[['ORDINAL_POSITION', 'COLUMN_NAME', 'COLUMN_TEXT', 'DATA_TYPE','LENGTH', 'NUMERIC_PRECISION']]

In [44]:
df_table_fields['data_item'] = df_table_fields.COLUMN_NAME.str[2:]

In [45]:
df_table_fields.head()

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item
0,1,QT$RTP,Record Type. . . . . . . . . . . . . . .,CHAR,2,,$RTP
1,2,QTITM,Item Number (Short). . . . . . . . . . .,NUMERIC,8,8.0,ITM
2,3,QTLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM
3,4,QT$AJP,Adjustment Percent . . . . . . . . . . .,DECIMAL,5,5.0,$AJP
4,5,QT$AJD,Adjustment Dollar. . . . . . . . . . . .,NUMERIC,15,15.0,$AJD


In [46]:
df_table_fields.data_item.unique()

array(['$RTP', 'ITM', 'LITM', '$AJP', '$AJD', '$USG', 'EFFF', 'EXDE',
       '$FLD', 'AC10', '$SEQ', 'SIC', 'AC01', 'AC02', 'AC03', 'AC04',
       'AC05', 'AC06', '$FT1', '$FT2', '$FT3', '$FT4', '$FT5', '$FT6',
       '$FT7', '$FT8', '$FT9', '$FT0', 'CRTU', 'PGM', 'CRDJ', 'CRTM',
       'USER', 'PID', 'JOBN', 'UPMJ', 'TDAY'], dtype=object)

In [47]:
dff = pd.merge(df_table_fields,df_field_meta,on='data_item', how='left')

#### Cleanup final field name
remove trailing dots, special characters, and converto to lower_case

In [48]:
dff['row_description_final'] = dff.row_description.str.rstrip('. ').str.replace(r'%','pct').str.replace(r'$','amt').str.replace(r'[^0-9|a-z|" "]','', case=False).str.replace('  ',' ').str.lower().str.replace(' ','_')

#### Override with Defaults

In [49]:
dff['row_description_final'][dff['data_item']=='LITM'] = 'item_number'
dff['row_description_final'][dff['data_item']=='AN8'] = 'billto'
dff['row_description_final'][dff['data_item']=='SHAN'] = 'shipto'
dff['row_description_final'][dff['data_item']=='DOCO'] = 'salesorder_number'

### 3. Create SQL mapping pull with data-conversion

In [50]:
dff.groupby(['DATA_TYPE', 'data_type','display_decimals'])['ORDINAL_POSITION'].count()

DATA_TYPE  data_type  display_decimals
CHAR       1.0        0.0                  1
           2.0        0.0                 21
DECIMAL    9.0        0.0                  1
                      4.0                  1
NUMERIC    9.0        0.0                  9
           11.0       0.0                  4
Name: ORDINAL_POSITION, dtype: int64

In [51]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,QT$RTP,Record Type. . . . . . . . . . . . . . .,CHAR,2,,$RTP,A,2.0,2.0,0.0,Record Type. . . . . . . . . . . . . . .,record_type
1,2,QTITM,Item Number (Short). . . . . . . . . . .,NUMERIC,8,8.0,ITM,S,9.0,8.0,0.0,Item Number (Short). . . . . . . . . . .,item_number_short
2,3,QTLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM,A,2.0,25.0,0.0,2nd Item Number. . . . . . . . . . . . .,item_number
3,4,QT$AJP,Adjustment Percent . . . . . . . . . . .,DECIMAL,5,5.0,$AJP,P,9.0,5.0,4.0,Adjustment Percent . . . . . . . . . . .,adjustment_percent
4,5,QT$AJD,Adjustment Dollar. . . . . . . . . . . .,NUMERIC,15,15.0,$AJD,S,9.0,15.0,0.0,Adjustment Dollar. . . . . . . . . . . .,adjustment_dollar
5,6,QT$USG,Adjustment Usage . . . . . . . . . . . .,CHAR,1,,$USG,A,1.0,1.0,0.0,Adjustment Usage . . . . . . . . . . . .,adjustment_usage
6,7,QTEFFF,Effective From Date . . . . . . . . . .,NUMERIC,6,6.0,EFFF,S,11.0,6.0,0.0,Effective From Date . . . . . . . . . .,effective_from_date
7,8,QTEXDE,Expiration Date. . . . . . . . . . . . .,NUMERIC,6,6.0,EXDE,S,11.0,6.0,0.0,Expiration Date. . . . . . . . . . . . .,expiration_date
8,9,QT$FLD,Data Field Name. . . . . . . . . . . . .,CHAR,6,,$FLD,A,2.0,6.0,0.0,Data Field Name. . . . . . . . . . . . .,data_field_name
9,10,QTAC10,Division Code . . . . . . . .. . . . ..,CHAR,3,,AC10,A,2.0,3.0,0.0,Division Code . . . . . . . . . . . . ..,division_code


In [52]:
def field_format_sel(col_name, col_type, col_dec):
    val =''

    if col_type == 9 :
        if col_dec > 0 :
            val = 'CAST(({})/{} AS DEC({},{})) AS {}'.format(col_name,10**col_dec,15,np.int(col_dec),col_name)
        else :
            val = col_name
    elif  col_type == 11 : 
        if convert_julian_date :
            val = 'CASE WHEN {} IS NOT NULL THEN DATE(DIGITS(DEC({}+ 1900000,7,0))) ELSE NULL END AS {}'.format(col_name, col_name, col_name)
        else :
            val = '{} as {}'.format(col_name,col_name)
    else : 
        val = col_name
    return val;

def field_format_map(col_name, col_descr, is_etl = False):
    val =''

    col_name_format = '{message:{fill}{align}{width}}'.format(message=col_name, fill='_', align='<', width=6)
    
    col_descr_format = col_descr
    if str.isdigit(col_descr_format[0]) :
        col_descr_format = "_" + col_descr_format
        
    if is_etl :
        val = '{}_{} AS {}'.format(col_name_format, col_descr_format, col_descr_format)
    else :
        val = '"{}" AS {}_{}'.format(col_name, col_name_format, col_descr_format)

    return val;



sql_field_map = ', '.join([field_format_map(x,y)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])

sql_field_etl = ', '.join([field_format_map(x,y,is_etl=True)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])
#
# ok
sql_field_sel = ', '.join([ field_format_sel(col_name, col_type, col_dec) \
                           for col_name, col_type, col_dec in zip(dff['COLUMN_NAME'], dff['data_type'], dff['display_decimals'])])


In [53]:
sql_field_sel

'QT$RTP, QTITM, QTLITM, CAST((QT$AJP)/10000.0 AS DEC(15,4)) AS QT$AJP, QT$AJD, QT$USG, CASE WHEN QTEFFF IS NOT NULL THEN DATE(DIGITS(DEC(QTEFFF+ 1900000,7,0))) ELSE NULL END AS QTEFFF, CASE WHEN QTEXDE IS NOT NULL THEN DATE(DIGITS(DEC(QTEXDE+ 1900000,7,0))) ELSE NULL END AS QTEXDE, QT$FLD, QTAC10, QT$SEQ, QTSIC, QTAC01, QTAC02, QTAC03, QTAC04, QTAC05, QTAC06, QT$FT1, QT$FT2, QT$FT3, QT$FT4, QT$FT5, QT$FT6, QT$FT7, QT$FT8, QT$FT9, QT$FT0, QTCRTU, QTPGM, CASE WHEN QTCRDJ IS NOT NULL THEN DATE(DIGITS(DEC(QTCRDJ+ 1900000,7,0))) ELSE NULL END AS QTCRDJ, QTCRTM, QTUSER, QTPID, QTJOBN, CASE WHEN QTUPMJ IS NOT NULL THEN DATE(DIGITS(DEC(QTUPMJ+ 1900000,7,0))) ELSE NULL END AS QTUPMJ, QTTDAY'

In [54]:
sql_table_map = '''

--------------------------------------------------------------------------------
-- DROP TABLE Integration.{}_{}_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    {} 
    {} 

-- INTO {}{}_{}_<instert_friendly_name_here>

FROM 
    OPENQUERY ({}, '

	SELECT
		{}

	FROM
		{}.{}
--    WHERE
--        <insert custom code here>
--    ORDER BY
--        <insert custom code here>
')

--------------------------------------------------------------------------------

'''.format(sql_lib, sql_table, 'Top 5', sql_field_map, stage_db_schema, sql_lib, sql_table, sql_link_server, sql_field_sel, sql_lib, sql_table)




### Output Table
1. Use to create STAGE via SQL link table 
1. Note that some Julian _JDT conversion will need to be converted Post 

In [55]:
print(sql_table_map)



--------------------------------------------------------------------------------
-- DROP TABLE Integration.ARCPDTA71_F5698_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    Top 5 
    "QT$RTP" AS QT$RTP_record_type, "QTITM" AS QTITM__item_number_short, "QTLITM" AS QTLITM_item_number, "QT$AJP" AS QT$AJP_adjustment_percent, "QT$AJD" AS QT$AJD_adjustment_dollar, "QT$USG" AS QT$USG_adjustment_usage, "QTEFFF" AS QTEFFF_effective_from_date, "QTEXDE" AS QTEXDE_expiration_date, "QT$FLD" AS QT$FLD_data_field_name, "QTAC10" AS QTAC10_division_code, "QT$SEQ" AS QT$SEQ_sequence_id, "QTSIC" AS QTSIC__speciality, "QTAC01" AS QTAC01_customer_profession, "QTAC02" AS QTAC02_customer_sub_profession, "QTAC03" AS QTAC03_type_of_paying_customer, "QTAC04" AS QTAC04_practice_type, "QTAC05" AS QTAC05_ap_check_routing_code, "QTAC06" AS QTAC06_category_code_address_06, "QT$FT1" AS QT$FT1_future_use_1, "QT$FT2" AS QT$FT2_future_use_2, "Q

In [56]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,QT$RTP,Record Type. . . . . . . . . . . . . . .,CHAR,2,,$RTP,A,2.0,2.0,0.0,Record Type. . . . . . . . . . . . . . .,record_type
1,2,QTITM,Item Number (Short). . . . . . . . . . .,NUMERIC,8,8.0,ITM,S,9.0,8.0,0.0,Item Number (Short). . . . . . . . . . .,item_number_short
2,3,QTLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM,A,2.0,25.0,0.0,2nd Item Number. . . . . . . . . . . . .,item_number
3,4,QT$AJP,Adjustment Percent . . . . . . . . . . .,DECIMAL,5,5.0,$AJP,P,9.0,5.0,4.0,Adjustment Percent . . . . . . . . . . .,adjustment_percent
4,5,QT$AJD,Adjustment Dollar. . . . . . . . . . . .,NUMERIC,15,15.0,$AJD,S,9.0,15.0,0.0,Adjustment Dollar. . . . . . . . . . . .,adjustment_dollar
5,6,QT$USG,Adjustment Usage . . . . . . . . . . . .,CHAR,1,,$USG,A,1.0,1.0,0.0,Adjustment Usage . . . . . . . . . . . .,adjustment_usage
6,7,QTEFFF,Effective From Date . . . . . . . . . .,NUMERIC,6,6.0,EFFF,S,11.0,6.0,0.0,Effective From Date . . . . . . . . . .,effective_from_date
7,8,QTEXDE,Expiration Date. . . . . . . . . . . . .,NUMERIC,6,6.0,EXDE,S,11.0,6.0,0.0,Expiration Date. . . . . . . . . . . . .,expiration_date
8,9,QT$FLD,Data Field Name. . . . . . . . . . . . .,CHAR,6,,$FLD,A,2.0,6.0,0.0,Data Field Name. . . . . . . . . . . . .,data_field_name
9,10,QTAC10,Division Code . . . . . . . .. . . . ..,CHAR,3,,AC10,A,2.0,3.0,0.0,Division Code . . . . . . . . . . . . ..,division_code


In [57]:
%time df_table_map = pd.read_sql_query(sql_table_map, engine);

CPU times: user 16.4 ms, sys: 0 ns, total: 16.4 ms
Wall time: 7.46 s


### Next steps...
Add SQL to SQL Tools data package 

In [58]:
df_table_map

Unnamed: 0,QT$RTP_record_type,QTITM__item_number_short,QTLITM_item_number,QT$AJP_adjustment_percent,QT$AJD_adjustment_dollar,QT$USG_adjustment_usage,QTEFFF_effective_from_date,QTEXDE_expiration_date,QT$FLD_data_field_name,QTAC10_division_code,...,QT$FT0_future_use_0,QTCRTU_created_by_user,QTPGM__program_name,QTCRDJ_creation_date,QTCRTM_creation_time,QTUSER_user_id,QTPID__program_id,QTJOBN_work_station_id,QTUPMJ_date_updated,QTTDAY_time_of_day
0,S1,768589.0,1044648,1.075,0.0,P,2018-01-16,2040-12-31,AC10,ALL,...,,LJAGGA,P5698001,2018-01-15,130232.0,LJAGGA,P5698001,J569800002,2018-01-15,130232.0
1,S1,768589.0,1044648,1.07,0.0,P,2010-01-18,2018-01-15,AC10,ALL,...,,LJAGGA,P5698001,2010-01-18,160159.0,LJAGGA,P5698001,J569800002,2010-01-18,160159.0
2,S1,768589.0,1044648,1.07,0.0,P,2009-12-17,2010-01-17,AC10,ALL,...,,LJAGGA,P5698001,2009-12-17,165436.0,LJAGGA,P5698001,J569800002,2009-12-17,165436.0
3,S1,769078.0,1081158,1.0,0.0,P,2020-03-26,2020-12-31,AC10,ALL,...,,LJAGGA,P5698001,2014-12-19,172613.0,LJAGGA,P5698001,J569800002,2014-12-19,172613.0
4,S1,769078.0,1081158,1.0,0.0,P,2014-12-20,2040-12-31,AC10,ALL,...,,LJAGGA,P5698001,2014-12-19,172613.0,LJAGGA,P5698001,J569800002,2014-12-19,172613.0
