# Predictive Analytics

Items Purchased together using Association Rules (Market Basket Analysis)

## Oracle AutoML notebook - best run in Zeppelin using Oracle OCI AUTOML

In [None]:
%python

import oml
import pandas as pd

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
%python

PRODUCT_DF = oml.sync(query = 'SELECT PROD_ID, PROD_NAME, PROD_CATEGORY FROM SH.PRODUCTS')
z.show(PRODUCT_DF.head())

In [None]:
%python

SALES_TRANS_CUST = PRODUCT_DF.merge(SALES_DF[(SALES_DF['CUST_ID'] >= 100001) & (SALES_DF['CUST_ID'] <= 104500)], 
                                    how = "inner", on = 'PROD_ID', suffixes = ["", ""])
SALES_TRANS_CUST = SALES_TRANS_CUST[['CUST_ID', 'PROD_NAME', 'PROD_CATEGORY']].drop_duplicates()

In [None]:
%python

setting = {'ASSO_MIN_SUPPORT':'0.04', 
           'ASSO_MIN_CONFIDENCE':'0.1', 
           'ASSO_MAX_RULE_LENGTH': '2', 
           'ODMS_ITEM_ID_COLUMN_NAME':'PROD_NAME'}

ar_mod = oml.ar(**setting)
ar_mod = ar_mod.fit(SALES_TRANS_CUST, case_id = 'CUST_ID')

In [None]:
%python

z.show(ar_mod.itemsets.sort_values(['NUMBER_OF_ITEMS','SUPPORT'], ascending = False).head(10))

In [None]:
%python

z.show(ar_mod.itemsets.sort_values(['NUMBER_OF_ITEMS','SUPPORT'], ascending = False).head(10))

In [None]:
%python

z.show(ar_mod.rules[ar_mod.rules['LHS_NAME'] == 'Standard Mouse'].sort_values(['CONFIDENCE'], ascending = False).head(10))

RULE_ID	NUMBER_OF_ITEMS	LHS_NAME	LHS_VALUE	RHS_NAME	RHS_VALUE	SUPPORT	CONFIDENCE	REVCONFIDENCE	LIFT
91	2	Standard Mouse	None	Mouse Pad	None	0.184042553	0.572847682	0.570957096	1.777151225
53	2	Standard Mouse	None	Extension Cable	None	0.180851064	0.562913907	0.580204778	1.805935402
19	2	Standard Mouse	None	18" Flat Panel Graphics Monitor	None	0.095744681	0.298013245	0.308219178	0.959357707
103	2	Standard Mouse	None	SIMM- 16MB PCMCIAII card	None	0.091489362	0.284768212	0.287625418	0.895257924
83	2	Standard Mouse	None	Model SM26273 Black Ink Cartridge	None	0.079787234	0.248344371	0.367647059	1.144331905
67	2	Standard Mouse	None	External 8X CD-ROM	None	0.075531915	0.235099338	0.24738676	0.770011768
37	2	Standard Mouse	None	CD-RW, High Speed Pack of 5	None	0.070212766	0.218543046	0.24	0.747019868
97	2	Standard Mouse	None	Multimedia speakers- 3" cones	None	0.057446809	0.178807947	0.293478261	0.913475382
101	2	Standard Mouse	None	O/S Documentation Set - English	None	0.055319149	0.17218543	0.284153005	0.884449752
73	2	Standard Mouse	None	Keyboard Wrist Rest	None	0.054255319	0.168874172	0.288135593	0.896845886


#

## Anomaly Detection

## Oracle AutoML notebook - best run in Zeppelin using Oracle OCI AUTOML

In [None]:
%python

import pandas as pd
import oml

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
%python


CUSTOMERS = oml.sync(query = 'SELECT CUST_ID, CUST_GENDER, CUST_MARITAL_STATUS, CUST_YEAR_OF_BIRTH, CUST_INCOME_LEVEL, CUST_CREDIT_LIMIT FROM SH.CUSTOMERS')
SUPP_DEM = oml.sync(query = """SELECT CUST_ID, EDUCATION, AFFINITY_CARD, HOUSEHOLD_SIZE, OCCUPATION, YRS_RESIDENCE, Y_BOX_GAMES FROM SH.SUPPLEMENTARY_DEMOGRAPHICS""")
 


In [None]:
%python

CUST_DF = CUSTOMERS.merge(SUPP_DEM, how = "inner", on = 'CUST_ID',suffixes = ["",""])


In [None]:
%python

DF = CUST_DF.crosstab(['CUST_YEAR_OF_BIRTH', 'CUST_MARITAL_STATUS'], values = 'YRS_RESIDENCE', aggfunc= oml.DataFrame.sum)
z.show(DF)


In [None]:
%python

try:
    oml.drop(model="ANOMALY_DETECTION_MODEL1")
except:
    pass

odm_settings = {'svms_outlier_rate' : 0.01}

svm_mod = oml.svm("anomaly_detection", **odm_settings)
svm_mod.fit(CUST_DF, None, model_name = 'ANOMALY_DETECTION_MODEL1', case_id = 'CUST_ID')

In [None]:
%python

try:
    oml.drop(model="ANOMALY_DETECTION_MODEL1")
except:
    pass

odm_settings = {'SVMS_OUTLIER_RATE' : '0.01',
                'SVMS_REGULARIZER' : 'SVMS_REGULARIZER_L1',
                'SVMS_CONV_TOLERANCE': '0.001'
                }

svm_mod = oml.svm("anomaly_detection", **odm_settings)
svm_mod.fit(CUST_DF, None, model_name = 'ANOMALY_DETECTION_MODEL1', case_id = 'CUST_ID')

## Predict

In [None]:
%python

RES_DF = svm_mod.predict(CUST_DF, supplemental_cols = CUST_DF, proba = True)

In [None]:
%python

z.show(RES_DF[RES_DF['PREDICTION']==0][["PREDICTION","PROBABILITY"] + CUST_DF.columns].head())