# JDE ETL Source Design
## Goal:  Generate source SQL with friendly names and built-in data Conversion
1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals
2. Pull *Specific* Table fields
3. Create SQL mapiing pull with data-conversion

In [171]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import os, sys
import warnings

warnings.filterwarnings('ignore')

In [172]:
import qgrid # Best practices is to put imports at the top of the Notebook.
qgrid.nbinstall(overwrite=True)

### Connect to SQL DB

In [173]:
sql_connection_str = 'mssql+pymssql://sql2srv:Password1@CAHSIONNLSQL2.ca.hsi.local:1433/BRSales'
engine = create_engine(sql_connection_str)

## 1. Pull *ALL* Field metadata based on QA 9.3:  Name, Datatype, Decimals

In [174]:
sql_field_meta = '''
SELECT 
	RTRIM("FRDTAI")				AS data_item
	,"FRDTAT"					AS data_item_type
	,"FROWTP"					AS data_type
	,"FRDTAS"					AS data_item_size
	,ISNULL("FRCDEC", 0)		AS display_decimals
	,ISNULL("FRDSCR", 'zNA')	AS row_description 
    
FROM 

    OPENQUERY (ESYS_QA, '

	SELECT
		t.FRDTAI
		,FRDTAT
		,FROWTP
		,FRDTAS
		,FRCDEC
		,FRDSCR
	FROM
		ARCPCOM93.F9210 t
		LEFT JOIN ARCPCOM93.F9202 d
		ON t.FRDTAI = d.FRDTAI AND
			d.FRLNGP = '' '' AND
			d.FRSYR = '' ''  
')

'''

In [175]:
df_field_meta = pd.read_sql_query(sql_field_meta, engine);

In [176]:
df_field_meta.iloc[:,[2,3,4]] = df_field_meta.iloc[:,[2,3,4]].apply(lambda x: pd.to_numeric(x, errors='coerce'))
df_field_meta.fillna(value=0,inplace=True)

In [178]:
print(df_field_meta.dtypes)
df_field_meta.head().T

data_item            object
data_item_type       object
data_type           float64
data_item_size      float64
display_decimals    float64
row_description      object
dtype: object


Unnamed: 0,0,1,2,3,4
data_item,$$,$$RA,$$SC,$$TP,$$01
data_item_type,A,A,S,A,A
data_type,2,2,9,1,1
data_item_size,12,20,15,1,1
display_decimals,0,0,4,0,0
row_description,Score Type . . . . . . . . . . . . . . .,D1 EXTERNAL RMA NUMBER . . . . . . . . .,Inventory Unit Cost. . . . . . . . . . .,Return Type. . . . . . . . . . . . . . .,Score Type 01. . . . . . . . . . . . . .


In [179]:
# qgrid.show_grid(df_field_meta.iloc[:,:], remote_js=True)

### 2. Pull *Specific* Table fields

####  Set Table Name HERE

In [180]:
sql_table = 'F4072' 

sql_link_server = 'ESYS_PROD'
#sql_link_server = 'ESYS_QA'
sql_lib = 'ARCPDTA71'
#sql_lib = 'ARCPDTA93'

# F4072 F4101 F5613 F4072 F4094 F5830 F5831 F5832 F8444 F4211, F5503 

# array(['ARCPDTA93', 'ARCPCOM93', 'ARCPDTA71'], dtype=object)
# [ARC | HSI] [P | D] [DTA | CDC] [ 71 | 94]


In [181]:
sql_table_fields = '''

SELECT * from OPENQUERY ({}, '
	SELECT
		*
	FROM
		QSYS2.SYSCOLUMNS
	WHERE
        TABLE_SCHEMA = ''{}'' AND
		TABLE_NAME in( ''{}'')
    ORDER BY 
        ORDINAL_POSITION
')

''' .format(sql_link_server, sql_lib, sql_table)

In [182]:
df_table_fields = pd.read_sql_query(sql_table_fields, engine);

In [183]:
df_table_fields.head()

Unnamed: 0,COLUMN_NAME,TABLE_NAME,TABLE_OWNER,ORDINAL_POSITION,DATA_TYPE,LENGTH,NUMERIC_SCALE,IS_NULLABLE,IS_UPDATABLE,LONG_COMMENT,...,IDENTITY_START,IDENTITY_INCREMENT,IDENTITY_MINIMUM,IDENTITY_MAXIMUM,IDENTITY_CYCLE,IDENTITY_CACHE,IDENTITY_ORDER,COLUMN_EXPRESSION,HIDDEN,HAS_FLDPROC
0,ADAST,F4072,ARCONA,1,CHAR,8,,N,Y,,...,,,,,,,,,N,N
1,ADITM,F4072,ARCONA,2,NUMERIC,8,0.0,N,Y,,...,,,,,,,,,N,N
2,ADLITM,F4072,ARCONA,3,CHAR,25,,N,Y,,...,,,,,,,,,N,N
3,ADAITM,F4072,ARCONA,4,CHAR,25,,N,Y,,...,,,,,,,,,N,N
4,ADAN8,F4072,ARCONA,5,NUMERIC,8,0.0,N,Y,,...,,,,,,,,,N,N


#### Join table fields with data dictionary meta-data

In [184]:
df_table_fields = df_table_fields[['ORDINAL_POSITION', 'COLUMN_NAME', 'COLUMN_TEXT', 'DATA_TYPE','LENGTH', 'NUMERIC_PRECISION']]

In [185]:
df_table_fields['data_item'] = df_table_fields.COLUMN_NAME.str[2:]

In [186]:
df_table_fields.data_item.unique()

array(['AST', 'ITM', 'LITM', 'AITM', 'AN8', 'ICID', 'SDGR', 'SDV1', 'SDV2',
       'SDV3', 'CRCD', 'UOM', 'MNQ', 'EFTJ', 'EXDJ', 'BSCD', 'LEDG',
       'FRMN', 'FVTR', 'FGY', 'ATID', 'URCD', 'URDT', 'URAT', 'URAB',
       'URRF', 'USER', 'PID', 'JOBN', 'UPMJ', 'TDAY'], dtype=object)

In [187]:
dff = pd.merge(df_table_fields,df_field_meta,on='data_item')

#### Cleanup final field name
remove trailing dots, special characters, and converto to lower_case

In [199]:
dff['row_description_final'] = dff.row_description.str.rstrip('. ').str.replace(r'%','pct').str.replace(r'$','amt').str.replace(r'[^0-9|a-z|" "]','', case=False).str.replace('  ',' ').str.lower().str.replace(' ','_')

#### Override with Defaults

In [201]:
dff['row_description_final'][dff['data_item']=='LITM'] = 'item_number'
dff['row_description_final'][dff['data_item']=='AN8'] = 'billto'
dff['row_description_final'][dff['data_item']=='SHAN'] = 'shipto'
dff['row_description_final'][dff['data_item']=='DOCO'] = 'salesorder_number'

In [202]:
print(dff.dtypes)
dff.head().T

ORDINAL_POSITION           int64
COLUMN_NAME               object
COLUMN_TEXT               object
DATA_TYPE                 object
LENGTH                     int64
NUMERIC_PRECISION        float64
data_item                 object
data_item_type            object
data_type                float64
data_item_size           float64
display_decimals         float64
row_description           object
row_description_final     object
dtype: object


Unnamed: 0,0,1,2,3,4
ORDINAL_POSITION,1,2,3,4,5
COLUMN_NAME,ADAST,ADITM,ADLITM,ADAITM,ADAN8
COLUMN_TEXT,Adjustment Name. . . . . . . . . . . . .,Item Number (Short). . . . . . . . . . .,2nd Item Number. . . . . . . . . . . . .,3rd Item Number. . . . . . . . . . . . .,Address Number . . . . . . . . . . . . .
DATA_TYPE,CHAR,NUMERIC,CHAR,CHAR,NUMERIC
LENGTH,8,8,25,25,8
NUMERIC_PRECISION,,8,,,8
data_item,AST,ITM,LITM,AITM,AN8
data_item_type,A,S,A,A,S
data_type,2,9,2,2,9
data_item_size,8,8,25,25,8


### 3. Create SQL mapping pull with data-conversion

In [203]:
dff.groupby(['DATA_TYPE', 'data_type','display_decimals'])['ORDINAL_POSITION'].count()

DATA_TYPE  data_type  display_decimals
CHAR       1.0        0.0                  2
           2.0        0.0                 16
DECIMAL    9.0        0.0                  2
                      2.0                  1
                      4.0                  1
NUMERIC    9.0        0.0                  5
           11.0       0.0                  4
Name: ORDINAL_POSITION, dtype: int64

In [204]:
#qgrid.show_grid(dff.iloc[:,:], remote_js=True)

In [205]:
def field_format(col_name, col_type, col_dec):
    val =''
    if col_type == 9 :
        if col_dec > 0 :
            val = 'CAST(({})/{} AS DEC({},{})) AS {}'.format(col_name,10**col_dec,15,np.int(col_dec),col_name)
        else :
            val = col_name
    elif  col_type == 11 : 
        val = 'DATE(DIGITS(DEC( NULLIF({}, ''0001-01-01'') + 1900000,7,0))) AS {}'.format(col_name, col_name)
    else : 
        val = col_name
    return val;

sql_field_map = ', '.join(['"{}" AS {}'.format(x,y if not str.isdigit(y[0]) else "_" + y)  \
                           for x, y in zip(dff['COLUMN_NAME'], dff['row_description_final'])])


sql_field_sel = ', '.join([ field_format(col_name, col_type, col_dec) \
                           for col_name, col_type, col_dec in zip(dff['COLUMN_NAME'], dff['data_type'], dff['display_decimals'])])


In [206]:
# sql_field_sel

In [207]:
sql_table_map = '''

SELECT 

    {} {} 
    
FROM 
    OPENQUERY ({}, '

	SELECT
		{}
        
	FROM
		{}.{}
')

'''.format('Top 5', sql_field_map, sql_link_server, sql_field_sel, sql_lib, sql_table)

In [208]:
print(sql_table_map)



SELECT 

    Top 5 "ADAST" AS adjustment_name, "ADITM" AS item_number_short, "ADLITM" AS item_number, "ADAITM" AS _3rd_item_number, "ADAN8" AS billto, "ADICID" AS itemcustomer_key_id, "ADSDGR" AS order_detail_group, "ADSDV1" AS sales_detail_value_01, "ADSDV2" AS sales_detail_value_02, "ADSDV3" AS sales_detail_value_03, "ADCRCD" AS currency_code, "ADUOM" AS um, "ADMNQ" AS quantity_from, "ADEFTJ" AS effective_date, "ADEXDJ" AS expired_date, "ADBSCD" AS basis, "ADLEDG" AS cost_method, "ADFRMN" AS formula_name, "ADFVTR" AS factor_value, "ADFGY" AS free_goods_yn, "ADATID" AS price_adjustment_key_id, "ADURCD" AS user_reserved_code, "ADURDT" AS user_reserved_date, "ADURAT" AS user_reserved_amount, "ADURAB" AS user_reserved_number, "ADURRF" AS user_reserved_reference, "ADUSER" AS user_id, "ADPID" AS program_id, "ADJOBN" AS work_station_id, "ADUPMJ" AS date_updated, "ADTDAY" AS time_of_day 
    
FROM 
    OPENQUERY (ESYS_PROD, '

	SELECT
		ADAST, ADITM, ADLITM, ADAITM, ADAN8, ADICID, ADSDGR, 

### Output Table

In [209]:
%time df_table_map = pd.read_sql_query(sql_table_map, engine);

CPU times: user 11.4 ms, sys: 3.44 ms, total: 14.8 ms
Wall time: 1.33 s


In [210]:
df_table_map.dtypes

adjustment_name             object
item_number_short          float64
item_number                 object
_3rd_item_number            object
billto                     float64
itemcustomer_key_id        float64
order_detail_group          object
sales_detail_value_01       object
sales_detail_value_02       object
sales_detail_value_03       object
currency_code               object
um                          object
quantity_from              float64
effective_date              object
expired_date                object
basis                       object
cost_method                 object
formula_name                object
factor_value               float64
free_goods_yn               object
price_adjustment_key_id    float64
user_reserved_code          object
user_reserved_date          object
user_reserved_amount       float64
user_reserved_number       float64
user_reserved_reference     object
user_id                     object
program_id                  object
work_station_id     

In [211]:
df_table_map

Unnamed: 0,adjustment_name,item_number_short,item_number,_3rd_item_number,billto,itemcustomer_key_id,order_detail_group,sales_detail_value_01,sales_detail_value_02,sales_detail_value_03,...,user_reserved_code,user_reserved_date,user_reserved_amount,user_reserved_number,user_reserved_reference,user_id,program_id,work_station_id,date_updated,time_of_day
0,CORPRICE,0.0,,,0.0,1839062.0,,,,,...,,,874.0,0.0,1044648,ARCPRC,P56013,QPADEV000W,2007-03-26,173924.0
1,CORPRICE,0.0,,,0.0,1839425.0,,,,,...,,,5052.0,0.0,1081158,ARCPRC,P56013,QPADEV000W,2007-03-26,174006.0
2,CORPRICE,0.0,,,0.0,1839446.0,,,,,...,,,512.0,0.0,1081163,ARCPRC,P56013,QPADEV000W,2007-03-26,174006.0
3,CORPRICE,0.0,,,0.0,1840693.0,,,,,...,,,1.0,0.0,100FRTC,ARCSLS,P56411E,QPADEV0013,2007-02-08,102622.0
4,CORPRICE,0.0,,,0.0,1840754.0,,,,,...,,,5400.0,0.0,1642445,ARCPRC,P56013,QPADEV000W,2007-03-26,174156.0


### Next steps...
Add SQL to SQL Tools data package 