<a href="https://colab.research.google.com/github/riomukhtarom/machine-learning/blob/master/apriori_mlxtend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# for basic operations
import numpy as np
import pandas as pd

# for visualizations
import matplotlib.pyplot as plt

# for market basket analysis
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [3]:
from google.colab import drive
drive.mount('/content/drive')

In [4]:
data = pd.read_csv('/content/drive/My Drive/dataset/Market_Basket_Optimisation.csv', header=None, names=['item_'+str(i) for i in range(1,21)])
data.head()

In [5]:
# making each customers shopping items an identical list
trans = []
for i in range(0, 7501):
    trans.append([str(data.values[i,j]) for j in range(0, 20)])

# conveting it into an numpy array
trans = np.array(trans)

# checking the shape of the array
print(trans.shape)

(7501, 20)


In [6]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
data = te.fit_transform(trans)
data = pd.DataFrame(data, columns = te.columns_)

# getting the shape of the data
data.tail()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
7496,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7497,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7498,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7499,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
7500,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False


In [7]:
data = data.drop(columns=['nan'])

In [72]:
from mlxtend.frequent_patterns import apriori
from datetime import datetime

time_start = datetime.now()
frequent_itemsets_ap = apriori(data, min_support = 0.001, use_colnames = True)
time_finish = datetime.now()
duration = time_finish - time_start
print('Apriori Time', duration)

frequent_itemsets_ap

Apriori Time 0:00:35.158663


Unnamed: 0,support,itemsets
0,0.020397,(almonds)
1,0.008932,(antioxydant juice)
2,0.004666,(asparagus)
3,0.033329,(avocado)
4,0.004533,(babies food)
...,...,...
6773,0.001466,"(tomatoes, frozen vegetables, spaghetti, miner..."
6774,0.001200,"(frozen vegetables, spaghetti, mineral water, ..."
6775,0.001067,"(olive oil, tomatoes, frozen vegetables, spagh..."
6776,0.001067,"(spaghetti, pancakes, mineral water, herb & pe..."


In [73]:
from mlxtend.frequent_patterns import fpgrowth
from datetime import datetime

time_start_fp = datetime.now()
frequent_itemsets_fp = fpgrowth(data, min_support=0.001, use_colnames=True)
time_finish_fp = datetime.now()
duration_fp = time_finish_fp - time_start_fp
print('FP-Tree Time', duration_fp)

frequent_itemsets_fp

FP-Tree Time 0:00:00.471270


Unnamed: 0,support,itemsets
0,0.238368,(mineral water)
1,0.132116,(green tea)
2,0.076523,(low fat yogurt)
3,0.071457,(shrimp)
4,0.065858,(olive oil)
...,...,...
6773,0.001733,"(oatmeal, mineral water)"
6774,0.001333,"(oatmeal, chocolate)"
6775,0.001067,"(oatmeal, ground beef)"
6776,0.001067,"(green tea, oatmeal)"


In [74]:
from mlxtend.frequent_patterns import association_rules

rules_ap = association_rules(frequent_itemsets_ap, metric="confidence", min_threshold=0.4)
rules_fp = association_rules(frequent_itemsets_fp, metric="confidence", min_threshold=0.4)

In [75]:
rules_ap

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(asparagus),(mineral water),0.004666,0.238368,0.002133,0.457143,1.917801,0.001021,1.403006
1,(burger sauce),(mineral water),0.005866,0.238368,0.002400,0.409091,1.716214,0.001001,1.288915
2,(burger sauce),(spaghetti),0.005866,0.174110,0.002400,0.409091,2.349610,0.001378,1.397660
3,(chocolate bread),(mineral water),0.004266,0.238368,0.001866,0.437500,1.835396,0.000850,1.354012
4,(chutney),(mineral water),0.004133,0.238368,0.001733,0.419355,1.759273,0.000748,1.311699
...,...,...,...,...,...,...,...,...,...
2661,"(frozen vegetables, chocolate, spaghetti, mine...",(milk),0.001733,0.129583,0.001067,0.615385,4.748971,0.000842,2.263085
2662,"(frozen vegetables, chocolate, spaghetti, milk...",(mineral water),0.001600,0.238368,0.001067,0.666667,2.796793,0.000685,2.284895
2663,"(frozen vegetables, chocolate, mineral water, ...",(spaghetti),0.002000,0.174110,0.001067,0.533333,3.063196,0.000718,1.769764
2664,"(frozen vegetables, spaghetti, mineral water, ...",(chocolate),0.001866,0.163845,0.001067,0.571429,3.487621,0.000761,1.951029


In [76]:
rules_fp

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,"(spaghetti, low fat yogurt, french fries)",(mineral water),0.003600,0.238368,0.001600,0.444444,1.864529,0.000742,1.370937
1,"(low fat yogurt, mineral water, french fries)",(spaghetti),0.003733,0.174110,0.001600,0.428571,2.461496,0.000950,1.445307
2,"(low fat yogurt, burgers, mineral water)",(spaghetti),0.002533,0.174110,0.001067,0.421053,2.418312,0.000626,1.426537
3,"(low fat yogurt, burgers, eggs)",(spaghetti),0.002133,0.174110,0.001067,0.500000,2.871746,0.000695,1.651780
4,"(low fat yogurt, burgers, eggs)",(milk),0.002133,0.129583,0.001067,0.500000,3.858539,0.000790,1.740835
...,...,...,...,...,...,...,...,...,...
2661,(burger sauce),(spaghetti),0.005866,0.174110,0.002400,0.409091,2.349610,0.001378,1.397660
2662,"(spaghetti, burger sauce)",(mineral water),0.002400,0.238368,0.001333,0.555556,2.330661,0.000761,1.713672
2663,"(burger sauce, mineral water)",(spaghetti),0.002400,0.174110,0.001333,0.555556,3.190829,0.000915,1.858252
2664,"(chicken, burger sauce)",(mineral water),0.001600,0.238368,0.001067,0.666667,2.796793,0.000685,2.284895


In [89]:
rules_fp['length'] = rules_fp['antecedents'].apply(lambda x: len(x))
rules_fp[ (rules_fp['confidence'] >= 0.3) & (rules_fp['length'] == 3) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,length
0,"(spaghetti, low fat yogurt, french fries)",(mineral water),0.003600,0.238368,0.001600,0.444444,1.864529,0.000742,1.370937,3
1,"(low fat yogurt, mineral water, french fries)",(spaghetti),0.003733,0.174110,0.001600,0.428571,2.461496,0.000950,1.445307,3
2,"(low fat yogurt, burgers, mineral water)",(spaghetti),0.002533,0.174110,0.001067,0.421053,2.418312,0.000626,1.426537,3
3,"(low fat yogurt, burgers, eggs)",(spaghetti),0.002133,0.174110,0.001067,0.500000,2.871746,0.000695,1.651780,3
4,"(low fat yogurt, burgers, eggs)",(milk),0.002133,0.129583,0.001067,0.500000,3.858539,0.000790,1.740835,3
...,...,...,...,...,...,...,...,...,...,...
2585,"(spaghetti, milk, french wine)",(mineral water),0.001866,0.238368,0.001200,0.642857,2.696908,0.000755,2.132569,3
2586,"(french wine, mineral water, milk)",(spaghetti),0.002800,0.174110,0.001200,0.428571,2.461496,0.000712,1.445307,3
2589,"(spaghetti, mineral water, french wine)",(ground beef),0.003333,0.098254,0.001466,0.440000,4.478209,0.001139,1.610261,3
2590,"(spaghetti, ground beef, french wine)",(mineral water),0.002400,0.238368,0.001466,0.611111,2.563727,0.000894,1.958482,3
