# JDE ETL Source Design
## Goal:  Generate source SQL with friendly names and built-in data Conversion
1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals
2. Pull *Specific* Table fields
3. Create SQL mapiing pull with data-conversion

In [30]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os, sys
import warnings

warnings.filterwarnings('ignore')

In [31]:
from decouple import config
config.search_path = '/home/jovyan/work'

### Connect to SQL DB

In [32]:
engine = create_engine(config('DATABASE_URL'))

### 1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals

In [33]:
sql_field_meta_server = 'ESYS_PROD'
sql_field_meta_lib = 'ARCPCOM71'

In [34]:
sql_field_meta = '''
SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY ({}, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		{}.F9210 t
		LEFT JOIN {}.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')

'''.format(sql_field_meta_server, sql_field_meta_lib, sql_field_meta_lib)

In [35]:
print(sql_field_meta)


SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY (ESYS_PROD, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		ARCPCOM71.F9210 t
		LEFT JOIN ARCPCOM71.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')




In [36]:
df_field_meta = pd.read_sql_query(sql_field_meta, engine);

In [37]:
df_field_meta.iloc[:,[2,3,4]] = df_field_meta.iloc[:,[2,3,4]].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_field_meta.fillna(value=0,inplace=True)

### 2. Pull *Specific* Table fields

#  Set Table Name HERE

In [38]:
sql_table = 'F554509'
#sql_table = 'F060116'
#sql_table = 'RPTRULES'

sql_link_server = 'ESYS_PROD'
#sql_lib = 'ARCPTEL'
sql_lib = 'ARCPDTA71'
#sql_lib = 'ESEND'
#sql_lib = 'HSIPDTA71'


stage_db_schema = 'Integration.'
convert_julian_date = True

# [ARC | HSI] [P | D] [DTA | CDC] [ 71 | 94]


In [39]:
sql_table_fields = '''

SELECT * from OPENQUERY ({}, '
	SELECT
		*
	FROM
		QSYS2.SYSCOLUMNS
	WHERE
        TABLE_SCHEMA = ''{}'' AND
		TABLE_NAME in( ''{}'')
    ORDER BY 
        ORDINAL_POSITION
')

''' .format(sql_link_server, sql_lib, sql_table)

In [40]:
#print (sql_table_fields)

In [41]:
df_table_fields = pd.read_sql_query(sql_table_fields, engine);

#### Join table fields with data dictionary meta-data

In [42]:
#df_table_fields

In [43]:
df_table_fields = df_table_fields[['ORDINAL_POSITION', 'COLUMN_NAME', 'COLUMN_TEXT', 'DATA_TYPE','LENGTH', 'NUMERIC_PRECISION']]

In [44]:
df_table_fields['data_item'] = df_table_fields.COLUMN_NAME.str[2:]

In [45]:
df_table_fields.head()

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item
0,1,QXLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM
1,2,QXCYLN,Cylinder # . . . . . . . . . . . . . . .,CHAR,10,,CYLN
2,3,QXURRF,User Reserved Reference . . . . . . . .,CHAR,15,,URRF
3,4,QX$BID,Bid Number . . . . . . . . . . . . . . .,CHAR,10,,$BID
4,5,QXREF,Reference. . . . . . . . . . . . . . . .,CHAR,15,,REF


In [46]:
df_table_fields.data_item.unique()

array(['LITM', 'CYLN', 'URRF', '$BID', 'REF', 'AST', 'AC10', '$FSI',
       'NADR', 'UOM1', 'CRCD', 'VBT', 'REF1', 'URCD', 'URDT', 'URAT',
       'URAB', 'USER'], dtype=object)

In [47]:
dff = pd.merge(df_table_fields,df_field_meta,on='data_item', how='left')

#### Cleanup final field name
remove trailing dots, special characters, and converto to lower_case

In [48]:
dff['row_description_final'] = dff.row_description.str.rstrip('. ').str.replace(r'%','pct').str.replace(r'$','amt').str.replace(r'[^0-9|a-z|" "]','', case=False).str.replace('  ',' ').str.lower().str.replace(' ','_')

#### Override with Defaults

In [49]:
dff['row_description_final'][dff['data_item']=='LITM'] = 'item_number'
dff['row_description_final'][dff['data_item']=='AN8'] = 'billto'
dff['row_description_final'][dff['data_item']=='SHAN'] = 'shipto'
dff['row_description_final'][dff['data_item']=='DOCO'] = 'salesorder_number'

### 3. Create SQL mapping pull with data-conversion

In [50]:
dff.groupby(['DATA_TYPE', 'data_type','display_decimals'])['ORDINAL_POSITION'].count()

DATA_TYPE  data_type  display_decimals
CHAR       1.0        0.0                  1
           2.0        0.0                 13
           9.0        0.0                  1
DECIMAL    9.0        2.0                  1
NUMERIC    9.0        0.0                  1
           11.0       0.0                  1
Name: ORDINAL_POSITION, dtype: int64

In [51]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,QXLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM,A,2.0,25.0,0.0,2nd Item Number. . . . . . . . . . . . .,item_number
1,2,QXCYLN,Cylinder # . . . . . . . . . . . . . . .,CHAR,10,,CYLN,A,2.0,10.0,0.0,Cylinder # . . . . . . . . . . . . . . .,cylinder_
2,3,QXURRF,User Reserved Reference . . . . . . . .,CHAR,15,,URRF,A,2.0,15.0,0.0,User Reserved Reference . . . . . . . .,user_reserved_reference
3,4,QX$BID,Bid Number . . . . . . . . . . . . . . .,CHAR,10,,$BID,A,2.0,10.0,0.0,Bid Number . . . . . . . . . . . . . . .,bid_number
4,5,QXREF,Reference. . . . . . . . . . . . . . . .,CHAR,15,,REF,A,2.0,15.0,0.0,Reference. . . . . . . . . . . . . . . .,reference
5,6,QXAST,Adjustment Name. . . . . . . . . . . . .,CHAR,8,,AST,A,2.0,8.0,0.0,Adjustment Name. . . . . . . . . . . . .,adjustment_name
6,7,QXAC10,Division Code . . . . . . . .. . . . ..,CHAR,3,,AC10,A,2.0,3.0,0.0,Division Code . . . . . . . . . . . . ..,division_code
7,8,QX$FSI,Family Set Indicator . . . . . . . . . .,CHAR,1,,$FSI,A,1.0,1.0,0.0,Family Set Indicator . . . . . . . . . .,family_set_indicator
8,9,QXNADR,Network Address. . . . . . . . . . . . .,CHAR,8,,NADR,A,2.0,8.0,0.0,Network Address. . . . . . . . . . . . .,network_address
9,10,QXUOM1,Unit of Measure. . . . . . . . . . . . .,CHAR,2,,UOM1,A,2.0,2.0,0.0,Unit of Measure. . . . . . . . . . . . .,unit_of_measure


In [52]:
def field_format_sel(col_name, col_type, col_dec):
    val =''

    if col_type == 9 :
        if col_dec > 0 :
            val = 'CAST(({})/{} AS DEC({},{})) AS {}'.format(col_name,10**col_dec,15,np.int(col_dec),col_name)
        else :
            val = col_name
    elif  col_type == 11 : 
        if convert_julian_date :
            val = 'CASE WHEN {} IS NOT NULL THEN DATE(DIGITS(DEC({}+ 1900000,7,0))) ELSE NULL END AS {}'.format(col_name, col_name, col_name)
        else :
            val = '{} as {}'.format(col_name,col_name)
    else : 
        val = col_name
    return val;

def field_format_map(col_name, col_descr, is_etl = False):
    val =''

    col_name_format = '{message:{fill}{align}{width}}'.format(message=col_name, fill='_', align='<', width=6)
    
    col_descr_format = col_descr
    if str.isdigit(col_descr_format[0]) :
        col_descr_format = "_" + col_descr_format
        
    if is_etl :
        val = '{}_{} AS {}'.format(col_name_format, col_descr_format, col_descr_format)
    else :
        val = '"{}" AS {}_{}'.format(col_name, col_name_format, col_descr_format)

    return val;



sql_field_map = ', '.join([field_format_map(x,y)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])

sql_field_etl = ', '.join([field_format_map(x,y,is_etl=True)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])
#
# ok
sql_field_sel = ', '.join([ field_format_sel(col_name, col_type, col_dec) \
                           for col_name, col_type, col_dec in zip(dff['COLUMN_NAME'], dff['data_type'], dff['display_decimals'])])


In [53]:
sql_field_sel

'QXLITM, QXCYLN, QXURRF, QX$BID, QXREF, QXAST, QXAC10, QX$FSI, QXNADR, QXUOM1, QXCRCD, QXVBT, QXREF1, QXURCD, CASE WHEN QXURDT IS NOT NULL THEN DATE(DIGITS(DEC(QXURDT+ 1900000,7,0))) ELSE NULL END AS QXURDT, CAST((QXURAT)/100.0 AS DEC(15,2)) AS QXURAT, QXURAB, QXUSER'

In [54]:
sql_table_map = '''

--------------------------------------------------------------------------------
-- DROP TABLE Integration.{}_{}_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    {} 
    {} 

-- INTO {}{}_{}_<instert_friendly_name_here>

FROM 
    OPENQUERY ({}, '

	SELECT
		{}

	FROM
		{}.{}
--    WHERE
--        <insert custom code here>
--    ORDER BY
--        <insert custom code here>
')

--------------------------------------------------------------------------------

'''.format(sql_lib, sql_table, 'Top 5', sql_field_map, stage_db_schema, sql_lib, sql_table, sql_link_server, sql_field_sel, sql_lib, sql_table)




### Output Table
1. Use to create STAGE via SQL link table 
1. Note that some Julian _JDT conversion will need to be converted Post 

In [55]:
print(sql_table_map)



--------------------------------------------------------------------------------
-- DROP TABLE Integration.ARCPDTA71_F554509_<instert_friendly_name_here>
--------------------------------------------------------------------------------

SELECT 

    Top 5 
    "QXLITM" AS QXLITM_item_number, "QXCYLN" AS QXCYLN_cylinder_, "QXURRF" AS QXURRF_user_reserved_reference, "QX$BID" AS QX$BID_bid_number, "QXREF" AS QXREF__reference, "QXAST" AS QXAST__adjustment_name, "QXAC10" AS QXAC10_division_code, "QX$FSI" AS QX$FSI_family_set_indicator, "QXNADR" AS QXNADR_network_address, "QXUOM1" AS QXUOM1_unit_of_measure, "QXCRCD" AS QXCRCD_currency_code, "QXVBT" AS QXVBT__variable_table, "QXREF1" AS QXREF1_reference_height, "QXURCD" AS QXURCD_user_reserved_code, "QXURDT" AS QXURDT_user_reserved_date, "QXURAT" AS QXURAT_user_reserved_amount, "QXURAB" AS QXURAB_user_reserved_number, "QXUSER" AS QXUSER_user_id 

-- INTO Integration.ARCPDTA71_F554509_<instert_friendly_name_here>

FROM 
    OPENQUERY (ESYS_PR

In [56]:
dff

Unnamed: 0,ORDINAL_POSITION,COLUMN_NAME,COLUMN_TEXT,DATA_TYPE,LENGTH,NUMERIC_PRECISION,data_item,data_item_type,data_type,data_item_size,display_decimals,row_description,row_description_final
0,1,QXLITM,2nd Item Number. . . . . . . . . . . . .,CHAR,25,,LITM,A,2.0,25.0,0.0,2nd Item Number. . . . . . . . . . . . .,item_number
1,2,QXCYLN,Cylinder # . . . . . . . . . . . . . . .,CHAR,10,,CYLN,A,2.0,10.0,0.0,Cylinder # . . . . . . . . . . . . . . .,cylinder_
2,3,QXURRF,User Reserved Reference . . . . . . . .,CHAR,15,,URRF,A,2.0,15.0,0.0,User Reserved Reference . . . . . . . .,user_reserved_reference
3,4,QX$BID,Bid Number . . . . . . . . . . . . . . .,CHAR,10,,$BID,A,2.0,10.0,0.0,Bid Number . . . . . . . . . . . . . . .,bid_number
4,5,QXREF,Reference. . . . . . . . . . . . . . . .,CHAR,15,,REF,A,2.0,15.0,0.0,Reference. . . . . . . . . . . . . . . .,reference
5,6,QXAST,Adjustment Name. . . . . . . . . . . . .,CHAR,8,,AST,A,2.0,8.0,0.0,Adjustment Name. . . . . . . . . . . . .,adjustment_name
6,7,QXAC10,Division Code . . . . . . . .. . . . ..,CHAR,3,,AC10,A,2.0,3.0,0.0,Division Code . . . . . . . . . . . . ..,division_code
7,8,QX$FSI,Family Set Indicator . . . . . . . . . .,CHAR,1,,$FSI,A,1.0,1.0,0.0,Family Set Indicator . . . . . . . . . .,family_set_indicator
8,9,QXNADR,Network Address. . . . . . . . . . . . .,CHAR,8,,NADR,A,2.0,8.0,0.0,Network Address. . . . . . . . . . . . .,network_address
9,10,QXUOM1,Unit of Measure. . . . . . . . . . . . .,CHAR,2,,UOM1,A,2.0,2.0,0.0,Unit of Measure. . . . . . . . . . . . .,unit_of_measure


In [57]:
%time df_table_map = pd.read_sql_query(sql_table_map, engine);

OperationalError: (pymssql.OperationalError) (7342, b'An unexpected NULL value was returned for column "[MSDASQL].QXURAT" from OLE DB provider "MSDASQL" for linked server "ESYS_PROD". This column cannot be NULL.DB-Lib error message 20018, severity 16:\nGeneral SQL Server error: Check messages from the SQL Server\n') [SQL: '\n\n--------------------------------------------------------------------------------\n-- DROP TABLE Integration.ARCPDTA71_F554509_<instert_friendly_name_here>\n--------------------------------------------------------------------------------\n\nSELECT \n\n    Top 5 \n    "QXLITM" AS QXLITM_item_number, "QXCYLN" AS QXCYLN_cylinder_, "QXURRF" AS QXURRF_user_reserved_reference, "QX$BID" AS QX$BID_bid_number, "QXREF" AS QXREF__reference, "QXAST" AS QXAST__adjustment_name, "QXAC10" AS QXAC10_division_code, "QX$FSI" AS QX$FSI_family_set_indicator, "QXNADR" AS QXNADR_network_address, "QXUOM1" AS QXUOM1_unit_of_measure, "QXCRCD" AS QXCRCD_currency_code, "QXVBT" AS QXVBT__variable_table, "QXREF1" AS QXREF1_reference_height, "QXURCD" AS QXURCD_user_reserved_code, "QXURDT" AS QXURDT_user_reserved_date, "QXURAT" AS QXURAT_user_reserved_amount, "QXURAB" AS QXURAB_user_reserved_number, "QXUSER" AS QXUSER_user_id \n\n-- INTO Integration.ARCPDTA71_F554509_<instert_friendly_name_here>\n\nFROM \n    OPENQUERY (ESYS_PROD, \'\n\n\tSELECT\n\t\tQXLITM, QXCYLN, QXURRF, QX$BID, QXREF, QXAST, QXAC10, QX$FSI, QXNADR, QXUOM1, QXCRCD, QXVBT, QXREF1, QXURCD, CASE WHEN QXURDT IS NOT NULL THEN DATE(DIGITS(DEC(QXURDT+ 1900000,7,0))) ELSE NULL END AS QXURDT, CAST((QXURAT)/100.0 AS DEC(15,2)) AS QXURAT, QXURAB, QXUSER\n\n\tFROM\n\t\tARCPDTA71.F554509\n--    WHERE\n--        <insert custom code here>\n--    ORDER BY\n--        <insert custom code here>\n\')\n\n--------------------------------------------------------------------------------\n\n'] (Background on this error at: http://sqlalche.me/e/e3q8)

### Next steps...
Add SQL to SQL Tools data package 

In [58]:
df_table_map

Unnamed: 0,QCDOCO_salesorder_number,QCDCTO_order_type,QCKCOO_order_number_document_company,QC$XRN_cross_reference_number,QC$AC1_address_code_future_1,QC$OSC_order_source_code,QC$FA__total_amount_freight,QC$ASC_apply_small_order_charges,QC$AHC_apply_hazardous_charges,QC$FMC_form_222c,...,QCAC19_category_code_19,QCAC20_category_code_20,QCAC21_category_code_21,QCAC22_category_code_22,QCAC23_category_code_23,QCUSER_user_id,QCPID__program_id,QCJOBN_work_station_id,QCUPMJ_date_updated,QCTDAY_time_of_day
0,2051.0,SO,2000,,0,T,0.0,Y,Y,,...,,EFC,,,DEN,MMORIN,P551101,EAGLEHIST,2007-03-13,132436.0
1,2.0,CM,2000,1220151001.0,0,T,0.0,Y,Y,,...,,EFC,,,DEN,MMORIN,XS5713,EAGLEHIST,2007-03-08,163852.0
2,9022.0,CM,2000,1220151001.0,0,T,0.0,Y,Y,,...,,EFC,,,DEN,BMAR04,XS5714,EAGLEHIST,2007-03-13,133259.0
3,1.0,CO,2000,1220151001.0,0,T,0.0,Y,Y,,...,,EFC,,,DEN,MMORIN,XS5713,EAGLEHIST,2007-03-08,114821.0
4,90276.0,SO,2000,,0,T,0.0,Y,Y,,...,,EFC,,,DEN,MMORIN,P551101,EAGLEHIST,2007-03-06,171557.0
