### Google.colab
Only execute this cell when use on google colab platform (colab).

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://github.com/Nak007/AssoruleMining">
    <img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

In [None]:
# Mount with google drive.
from google.colab import drive
drive.mount('/content/dirve')
# Import other libraries required. All *.py will be 
# stored under the following location i.e. '/content/example.py'.
!git clone 'http://github.com/Nak007/AssoruleMining.git'

## Example

In [1]:
import pandas as pd, numpy as np, sys
try: sys.path.append('/content/AssoruleMining')
except: pass
from AssoruleMining import *

Find rules in **cascading manner** (all rules are mutually exclusive)

In [2]:
X = pd.read_csv('card_transdata_10K.txt', sep="|")
y = X.pop("fraud").values

In [3]:
for var in ["repeat_retailer","used_chip","used_pin_number","online_order"]:
    X[var] = np.where(X[var]==1,"yes","no")

In [4]:
X = define_dtype(X)

In [5]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column                          Non-Null Count  Dtype   
---  ------                          --------------  -----   
 0   distance_from_home              10000 non-null  float32 
 1   distance_from_last_transaction  10000 non-null  float32 
 2   ratio_to_median_purchase_price  10000 non-null  float32 
 3   repeat_retailer                 10000 non-null  category
 4   used_chip                       10000 non-null  category
 5   used_pin_number                 10000 non-null  category
 6   online_order                    10000 non-null  category
dtypes: category(4), float32(3)
memory usage: 156.9 KB


In [6]:
discr_X1, conditions1 = discretize(X, n_cutoffs=20)

Discretized `X`.

In [7]:
discr_X1.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,124,125,126,127,128,129,130,131,132,133
0,0,0,0,1,1,1,1,1,1,1,...,0,0,1,0,0,1,1,0,0,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,1,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,1,0,0,1
3,0,0,1,1,1,1,1,1,1,1,...,0,0,1,0,1,0,1,0,0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,0,1,0


In [8]:
model1 = AssoRuleMining(metric="f1", operator="and").fit(discr_X1, y)

HBox(children=(HTMLMath(value='Calculating . . .'), HTMLMath(value='')))

In [9]:
pd.DataFrame(model1.info).sort_values(by="f1_score", ascending=False).head(5)

Unnamed: 0,start_with,variable,n_features,p_target,p_sample,f1_score,recall,precision,entropy
133,,133,3,0.680572,0.0571,0.809929,0.680572,1.0,0.175776
113,,113,4,0.680572,0.0571,0.809929,0.680572,1.0,0.175776
62,,62,4,0.680572,0.0571,0.809929,0.680572,1.0,0.175776
63,,63,4,0.680572,0.0571,0.809929,0.680572,1.0,0.175776
105,,105,4,0.680572,0.0571,0.809929,0.680572,1.0,0.175776


Create $1^{st}$ condition

In [10]:
cond1 = from_conditons(X, conditions1, model1.asso_results_[123].features)

In [11]:
print(" & \n".join(cond1[1]))

('ratio_to_median_purchase_price'>=4.059) & 
('online_order'=='yes') & 
('used_pin_number'=='no')


Determine next rule.

In [12]:
X2, y2 = X.loc[~cond1[0]], y[~cond1[0]]

In [13]:
discr_X2, conditions2 = discretize(X2, n_cutoffs=20)

In [14]:
model2 = AssoRuleMining(metric="f1", operator="and").fit(discr_X2, y2)

HBox(children=(HTMLMath(value='Calculating . . .'), HTMLMath(value='')))

In [15]:
pd.DataFrame(model2.info).sort_values(by="f1_score", ascending=False).head(5)

Unnamed: 0,start_with,variable,n_features,p_target,p_sample,f1_score,recall,precision,entropy
133,,133,4,0.58209,0.017393,0.722222,0.58209,0.95122,0.097591
29,,29,5,0.58209,0.017393,0.722222,0.58209,0.95122,0.097591
22,,22,5,0.58209,0.017393,0.722222,0.58209,0.95122,0.097591
23,,23,5,0.58209,0.017393,0.722222,0.58209,0.95122,0.097591
24,,24,5,0.58209,0.017393,0.722222,0.58209,0.95122,0.097591


Create $2^{nd}$ condition

In [16]:
cond2 = from_conditons(X, conditions2, model2.asso_results_[40].features)

In [17]:
print("NOT")
print("(" + " & \n".join(cond1[1]) + ")")
print("AND")
print("(" + " & \n".join(cond2[1]) + ")")

NOT
(('ratio_to_median_purchase_price'>=4.059) & 
('online_order'=='yes') & 
('used_pin_number'=='no'))
AND
(('distance_from_home'>=95.5273) & 
('used_chip'=='no') & 
('online_order'=='yes') & 
('used_pin_number'=='no'))


Summary

In [18]:
final = cond1[0] | (~cond1[0] & cond2[0])

In [19]:
print("Total % target : {:.2%}".format(y[final].sum()/sum(y)))
print("Total % sample : {:.2%}".format(len(y[final])/len(y)))
print("Precision : {:.2%}".format(y[final].sum()/sum(final)))
print("Recall : {:.2%}".format(y[final].sum()/sum(y)))

Total % target : 86.65%
Total % sample : 7.35%
Precision : 98.91%
Recall : 86.65%
