<a href="https://colab.research.google.com/github/J1gsy/machine_learning/blob/main/testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import transactionencoder

In [5]:
data_raw = [
    ['pen', 'book', 'pencil', 'eraser', 'board marker', 'sharpener'],
    ['color pencil', 'drawing book', 'eraser', 'pencil', 'sharpener',],
    ['book', 'pen', 'pencil', 'eraser'],
    ['eraser', 'pencil', 'color pencil', 'sharpener',],
    ['pen', 'book', 'correction fluid', 'board marker', 'pencil']
]

In [6]:
# fix data formating before convert into dataframe
te = transactionencoder.TransactionEncoder()
te_ary = te.fit(data_raw).transform(data_raw)

In [7]:
# convert raw data into data frame
dataset = pd.DataFrame(te_ary, columns=te.columns_)
dataset.head()

Unnamed: 0,board marker,book,color pencil,correction fluid,drawing book,eraser,pen,pencil,sharpener
0,True,True,False,False,False,True,True,True,True
1,False,False,True,False,True,True,False,True,True
2,False,True,False,False,False,True,True,True,False
3,False,False,True,False,False,True,False,True,True
4,True,True,False,True,False,False,True,True,False


In [8]:
# search the transaction with support >= 0.6
frequent_itemset = apriori(dataset, min_support=0.6, use_colnames=True)
frequent_itemset

Unnamed: 0,support,itemsets
0,0.6,(book)
1,0.8,(eraser)
2,0.6,(pen)
3,1.0,(pencil)
4,0.6,(sharpener)
5,0.6,"(book, pen)"
6,0.6,"(book, pencil)"
7,0.8,"(pencil, eraser)"
8,0.6,"(sharpener, eraser)"
9,0.6,"(pencil, pen)"


In [9]:
# filter the transaction with confidence >= 0.5
# show the dataframe
result = association_rules(frequent_itemset, metric="confidence", min_threshold=0.5)
result

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(book),(pen),0.6,0.6,0.6,1.0,1.666667,0.24,inf
1,(pen),(book),0.6,0.6,0.6,1.0,1.666667,0.24,inf
2,(book),(pencil),0.6,1.0,0.6,1.0,1.0,0.0,inf
3,(pencil),(book),1.0,0.6,0.6,0.6,1.0,0.0,1.0
4,(pencil),(eraser),1.0,0.8,0.8,0.8,1.0,0.0,1.0
5,(eraser),(pencil),0.8,1.0,0.8,1.0,1.0,0.0,inf
6,(sharpener),(eraser),0.6,0.8,0.6,1.0,1.25,0.12,inf
7,(eraser),(sharpener),0.8,0.6,0.6,0.75,1.25,0.12,1.6
8,(pencil),(pen),1.0,0.6,0.6,0.6,1.0,0.0,1.0
9,(pen),(pencil),0.6,1.0,0.6,1.0,1.0,0.0,inf


In [10]:
# simplify the dataframe
result_simplify = result[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
result_simplify

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(book),(pen),0.6,1.0,1.666667
1,(pen),(book),0.6,1.0,1.666667
2,(book),(pencil),0.6,1.0,1.0
3,(pencil),(book),0.6,0.6,1.0
4,(pencil),(eraser),0.8,0.8,1.0
5,(eraser),(pencil),0.8,1.0,1.0
6,(sharpener),(eraser),0.6,1.0,1.25
7,(eraser),(sharpener),0.6,0.75,1.25
8,(pencil),(pen),0.6,0.6,1.0
9,(pen),(pencil),0.6,1.0,1.0
