# JDE ETL Source Design
## Goal:  Generate source SQL with friendly names and built-in data Conversion
1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals
2. Pull *Specific* Table fields
3. Create SQL mapiing pull with data-conversion

In [2]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os, sys
import warnings

warnings.filterwarnings('ignore')

In [4]:
from decouple import config
config.search_path = '/home/jovyan/work'

### Connect to SQL DB

In [6]:
engine = create_engine(config('DATABASE_URL'))

### 1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals

In [4]:
sql_field_meta_server = 'ESYS_PROD'
sql_field_meta_lib = 'ARCPCOM71'

In [5]:
sql_field_meta = '''
SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY ({}, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		{}.F9210 t
		LEFT JOIN {}.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')

'''.format(sql_field_meta_server, sql_field_meta_lib, sql_field_meta_lib)

In [6]:
print(sql_field_meta)


SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY (ESYS_PROD, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		ARCPCOM71.F9210 t
		LEFT JOIN ARCPCOM71.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')




In [8]:
df_field_meta = pd.read_sql_query(sql_field_meta, engine);

In [9]:
df_field_meta.iloc[:,[2,3,4]] = df_field_meta.iloc[:,[2,3,4]].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_field_meta.fillna(value=0,inplace=True)

### 2. Pull *Specific* Table fields

#  Set Table Name HERE

In [10]:
sql_table = 'NPFWRS'
#sql_table = 'F060116'
#sql_table = 'RPTRULES'

sql_link_server = 'ESYS_PROD'
sql_lib = 'HSIPCORDTA'
#sql_lib = 'ARCPDTA71'
#sql_lib = 'ARCPTEL'
#sql_lib = 'ESEND'
#sql_lib = 'HSIPDTA71'



stage_db_schema = 'Integration.'
convert_julian_date = True

# [ARC | HSI] [P | D] [DTA | CDC] [ 71 | 94]


In [11]:
sql_table_fields = '''

SELECT * from OPENQUERY ({}, '
	SELECT
		*
	FROM
		QSYS2.SYSCOLUMNS
	WHERE
        TABLE_SCHEMA = ''{}'' AND
		TABLE_NAME in( ''{}'')
    ORDER BY 
        ORDINAL_POSITION
')

''' .format(sql_link_server, sql_lib, sql_table)

In [12]:
#print (sql_table_fields)

In [13]:
df_table_fields = pd.read_sql_query(sql_table_fields, engine);

#### Join table fields with data dictionary meta-data

In [14]:
#df_table_fields

In [15]:
df_table_fields = df_table_fields[['ORDINAL_POSITION', 'COLUMN_NAME', 'COLUMN_TEXT', 'DATA_TYPE','LENGTH', 'NUMERIC_PRECISION']]

In [16]:
df_table_fields['data_item'] = df_table_fields.COLUMN_NAME.str[2:]

In [17]:
df_table_fields.head()

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item
0,1,WRSCMP,COMPANY NUMBER,DECIMAL,2,2.0,SCMP
1,2,WRSWHS,WAREHOUSE NUMBER,DECIMAL,2,2.0,SWHS
2,3,WRSITM,ITEM CODE,CHAR,7,,SITM
3,4,WRSAVL,PRODUCT AVAILABILITY CODE,CHAR,1,,SAVL
4,5,WRSQRC,LAST RECEIVED QUANTITY,DECIMAL,7,7.0,SQRC


In [18]:
df_table_fields.data_item.unique()

array(['SCMP', 'SWHS', 'SITM', 'SAVL', 'SQRC', 'SDRC', 'SSUP', 'SONR',
       'SONH', 'SVAR', 'SVSM', 'SVSD', 'SVSY', 'SVCT', 'SIMP', 'SIMS',
       'SPOH', 'SPOR', 'SPAL', 'SPOA', 'SMSA', 'SROH', 'STFW', 'STOW',
       'SAVI', 'SLMD', 'SMDM', 'SWDM', 'SHWK', 'SDMC', 'SEOQ', 'SBOQ',
       'SPMV', 'SLCM', 'SLCD', 'SLCY', 'SBOD', 'SPOD', 'SVND', 'SAVC',
       'SBUY', 'SPUC', 'SSTK', 'SCSU', 'SMNQ', 'SMLP', 'SSAC', 'SLEQ',
       'STGC', 'SCRC', 'SVSC', 'SLCC'], dtype=object)

In [19]:
dff = pd.merge(df_table_fields,df_field_meta,on='data_item', how='left')

#### Cleanup final field name
remove trailing dots, special characters, and converto to lower_case

In [20]:
dff['row_description_final'] = dff.row_description.str.rstrip('. ').str.replace(r'%','pct').str.replace(r'$','amt').str.replace(r'[^0-9|a-z|" "]','', case=False).str.replace('  ',' ').str.lower().str.replace(' ','_')

#### Override with Defaults

In [21]:
dff['row_description_final'][dff['data_item']=='LITM'] = 'item_number'
dff['row_description_final'][dff['data_item']=='AN8'] = 'billto'
dff['row_description_final'][dff['data_item']=='SHAN'] = 'shipto'
dff['row_description_final'][dff['data_item']=='DOCO'] = 'salesorder_number'

### 3. Create SQL mapping pull with data-conversion

In [22]:
dff.groupby(['DATA_TYPE', 'data_type','display_decimals'])['ORDINAL_POSITION'].count()

DATA_TYPE  data_type  display_decimals
CHAR       2.0        0.0                 2
DECIMAL    2.0        0.0                 2
           9.0        2.0                 1
                      3.0                 1
Name: ORDINAL_POSITION, dtype: int64

In [23]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,WRSCMP,COMPANY NUMBER,DECIMAL,2,2.0,SCMP,,,,,,
1,2,WRSWHS,WAREHOUSE NUMBER,DECIMAL,2,2.0,SWHS,,,,,,
2,3,WRSITM,ITEM CODE,CHAR,7,,SITM,A,2.0,8.0,0.0,Substitute Item Number . . . . . . . . .,substitute_item_number
3,4,WRSAVL,PRODUCT AVAILABILITY CODE,CHAR,1,,SAVL,A,2.0,10.0,0.0,Save File Library. . . . . . . . . . . .,save_file_library
4,5,WRSQRC,LAST RECEIVED QUANTITY,DECIMAL,7,7.0,SQRC,,,,,,
5,6,WRSDRC,LAST RECEIVED DATE,DECIMAL,6,6.0,SDRC,,,,,,
6,7,WRSSUP,SUPPLIER CODE,CHAR,6,,SSUP,,,,,,
7,8,WRSONR,QUANTITY ON ORDER,DECIMAL,7,7.0,SONR,,,,,,
8,9,WRSONH,QUANTITY ON HAND,DECIMAL,7,7.0,SONH,,,,,,
9,10,WRSVAR,QUANTITY VARIANCE,DECIMAL,7,7.0,SVAR,A,2.0,3.0,0.0,Runtime Substitution Variables . . . . .,runtime_substitution_variables


In [24]:
def field_format_sel(col_name, col_type, col_dec):
    val =''

    if col_type == 9 :
        if col_dec > 0 :
            val = 'CAST(({})/{} AS DEC({},{})) AS {}'.format(col_name,10**col_dec,15,np.int(col_dec),col_name)
        else :
            val = col_name
    elif  col_type == 11 : 
        if convert_julian_date :
            val = 'CASE WHEN {} IS NOT NULL THEN DATE(DIGITS(DEC({}+ 1900000,7,0))) ELSE NULL END AS {}'.format(col_name, col_name, col_name)
        else :
            val = '{} as {}'.format(col_name,col_name)
    else : 
        val = col_name
    return val;

def field_format_map(col_name, col_descr, is_etl = False):
    val =''

    col_name_format = '{message:{fill}{align}{width}}'.format(message=col_name, fill='_', align='<', width=6)
    
    col_descr_format = col_descr
    if str.isdigit(col_descr_format[0]) :
        col_descr_format = "_" + col_descr_format
        
    if is_etl :
        val = '{}_{} AS {}'.format(col_name_format, col_descr_format, col_descr_format)
    else :
        val = '"{}" AS {}_{}'.format(col_name, col_name_format, col_descr_format)

    return val;



sql_field_map = ', '.join([field_format_map(x,y)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])

sql_field_etl = ', '.join([field_format_map(x,y,is_etl=True)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])
#
# ok
sql_field_sel = ', '.join([ field_format_sel(col_name, col_type, col_dec) \
                           for col_name, col_type, col_dec in zip(dff['COLUMN_NAME'], dff['data_type'], dff['display_decimals'])])


TypeError: 'float' object is not subscriptable

In [None]:
sql_field_sel

In [None]:
sql_table_map = '''

--------------------------------------------------------------------------------
-- DROP TABLE Integration.{}_{}_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    {} 
    {} 

-- INTO {}{}_{}_<instert_friendly_name_here>

FROM 
    OPENQUERY ({}, '

	SELECT
		{}

	FROM
		{}.{}
--    WHERE
--        <insert custom code here>
--    ORDER BY
--        <insert custom code here>
')

--------------------------------------------------------------------------------

'''.format(sql_lib, sql_table, 'Top 5', sql_field_map, stage_db_schema, sql_lib, sql_table, sql_link_server, sql_field_sel, sql_lib, sql_table)




### Output Table
1. Use to create STAGE via SQL link table 
1. Note that some Julian _JDT conversion will need to be converted Post 

In [None]:
print(sql_table_map)

In [None]:
dff

In [None]:
%time df_table_map = pd.read_sql_query(sql_table_map, engine);

### Next steps...
Add SQL to SQL Tools data package 

In [None]:
df_table_map