<a href="https://colab.research.google.com/github/AbrahamKong/CMPE255-Apriori_and_FP_Growth/blob/main/CMPE_255_Apriori_and_FP_Growth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install apyori
!pip install mlxtend



In [2]:
# upgrade to current version, in case sometimes Google Colab has an older version

%pip install mlxtend --upgrade



In [3]:
# import necessary library

import pandas as pd
import numpy as np
from apyori import apriori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns.fpgrowth import fpgrowth

In [4]:
# Using data from earlier assignment in CMPE 256

df = pd.read_csv("https://ghcdn.rawgit.org/AbrahamKong/CMPE256-Apriori_Assignment/main/TRAIN-ARULES.csv")
df.head()

Unnamed: 0,order_id,user_id,product_name
0,1483,90,Organic Pink Lemonade Bunny Fruit Snacks
1,1483,90,Dark Chocolate Minis
2,1483,90,"Sparkling Water, Natural Mango Essenced"
3,1483,90,Peach-Pear Sparkling Water
4,1483,90,Organic Heritage Flakes Cereal


In [5]:
# transform data set to meet the apriori function requirements

# Group data by order_id as instruction
# Transfer product names into a list for each transaction
df_grouped = df.groupby(by = ['order_id'])['product_name'].apply(list).reset_index(name='product_name')

In [6]:
# Unpack list into their own column
df_grouped = df_grouped['product_name'].apply(pd.Series)

In [7]:
# Trasform dataframe into a list to meet the apriori requirements
data = df_grouped.astype(str).values.tolist()

In [8]:
# remove mull value from each row
data = [[ele for ele in sub if ele != 'nan'] for sub in data]

**Apriori Algorithm**

In [9]:
# Apply apriori algorithm
association_rules = apriori(transactions = data, min_support=0.005) 
association_results = list(association_rules)
apriori_result = pd.DataFrame(association_results)

In [10]:
apriori_result.head()

Unnamed: 0,items,support,ordered_statistics
0,(0% Greek Strained Yogurt),0.009873,"[((), (0% Greek Strained Yogurt), 0.0098730606..."
1,(100% Premium Select Not From Concentrate Pure...,0.009168,"[((), (100% Premium Select Not From Concentrat..."
2,(100% Recycled Paper Towels),0.005642,"[((), (100% Recycled Paper Towels), 0.00564174..."
3,(1500 Pale Ale),0.01481,"[((), (1500 Pale Ale), 0.014809590973201692, 1..."
4,(2% Reduced Fat Milk),0.007757,"[((), (2% Reduced Fat Milk), 0.007757404795486..."


In [11]:
apriori_result['ordered_statistics'][0]

[OrderedStatistic(items_base=frozenset(), items_add=frozenset({'0% Greek Strained Yogurt'}), confidence=0.009873060648801129, lift=1.0)]

**FP Growth**

In [12]:
# coopy data to transacetion to avoid 'generator' object is not callable error created by name
transactions = data.copy()

# instantiate a transaction encoder
te = TransactionEncoder()

# fit the transaction encoder using the data
te.fit(transactions)

# transform the data into an array of encoded transactions
trans_encoded = te.transform(transactions)

# convert the array of encoded transactions into a dataframe
df_encoded = pd.DataFrame(trans_encoded, columns=te.columns_)
df_encoded

Unnamed: 0,0% Fat Free Organic Milk,0% Greek Strained Yogurt,"0% Greek, Blueberry on the Bottom Yogurt",1 Apple + 1 Pear Fruit Bar,1 Liter,1 Step Kashmir Spinach Indian Cuisine,1% Lowfat Milk,1% Milk,100 Calorie Per Bag Popcorn,100% Australian Tea Tree Oil,100% Carrot Juice,100% Florida Orange Juice,100% Grated Parmesan Cheese,100% Guava Juice,100% Juice,100% Juice Apple Juice,"100% Juice, Variety Pack",100% Lactose Free Milk,100% Lactose Free Reduced Fat Calcium Enriched Milk,100% Mango Juice,100% Mighty Mango Juice Smoothie,100% Natural Diced Tomatoes,100% Natural Spring Water,100% Natural Tomato Sauce,100% Orange Juice No Pulp,100% Organic Diced Tomatoes,100% Plant Protein Beastley Sliders,100% Pomegranate Juice,100% Premium Select Not From Concentrate Pure Prune Juice,100% Pure Apple Juice,100% Pure Corn Starch,100% Pure Pumpkin,100% Pure Vegetable Oil,100% Raw Coconut Water,100% Recycled 2 Ply Jumbo Paper Towel Roll,100% Recycled Aluminum Foil,100% Recycled Bath Tissue Rolls,100% Recycled Bathroom Tissue,100% Recycled Paper Towels,100% Whole Grain Corn Meal,...,XL Emerald White Seedless Grapes,XL Pick-A-Size Paper Towel Rolls,Yellow Bell Pepper,Yellow Corn Meal,Yellow Corn Taco Shells,Yellow Corn Tortilla Chips,Yellow Corn Tortillas,Yellow Enriched & Degerminated Corn Meal,Yellow Grape Tomatoes,Yellow Onions,Yellow Potato,Yellow Straightneck Squash,YoBaby Blueberry Apple Yogurt,YoKids Blueberry & Strawberry/Vanilla Yogurt,YoKids Squeeze! Organic Strawberry Flavor Yogurt,"YoKids Squeezers Organic Low-Fat Yogurt, Strawberry",YoKids Strawberry Banana/Strawberry Yogurt,Yobaby Organic Plain Yogurt,Yoghurt Blueberry,Yogurt Pretzels,"Yogurt, Lowfat, Strawberry","Yogurt, Organic, Lowfat, Strawberry","Yogurt, Strained Low-Fat, Coconut",Yotoddler Organic Pear Spinach Mango Yogurt,Yukon Gold Potatoes 5lb Bag,Z Bar Protein Peanut Butter Chocolate Protein Snack Bar,ZBar Organic Chocolate Brownie Energy Snack,ZBar Protein Chocolate Mint Protein Bar,Zen Tea,Zero Calorie Cola,Zero Calorie Cola Soda,Zero Calorie Lemon Lime Soda,Zero Calorie Tonic Water,Zero Go-Go Mixed Berry Vitamin Water,Zero Soda,Zero Vitamin Water,Zero XXX Acai Blueberry Pomegranate,Zucchini Noodles,Zucchini Squash,smartwater® Electrolyte Enhanced Water
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1413,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1414,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1415,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1416,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [13]:
# our min support is 7, but it has to be expressed as a percentage for mlxtend
min_support = 7/len(transactions) 

# compute the frequent itemsets using fpgriowth from mlxtend
frequent_itemsets = fpgrowth(df_encoded, min_support=min_support, use_colnames = True)

# print the frequent itemsets
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.031030,(Organic Heritage Flakes Cereal)
1,0.021157,(Dark Chocolate Minis)
2,0.020451,(Organic Pink Lemonade Bunny Fruit Snacks)
3,0.016925,(Peach-Pear Sparkling Water)
4,0.013399,"(Sparkling Water, Natural Mango Essenced)"
...,...,...
1487,0.005642,"(Jalapeno Pepper, Mixed Vegetables)"
1488,0.005642,"(80 Vodka Holiday Edition, Mixed Vegetables)"
1489,0.005642,"(Jalapeno Pepper, 80 Vodka Holiday Edition, M..."
1490,0.004937,"(Hibiscus Organic Raw Kombucha, Passionberry B..."


In [14]:
# Compute the association rules based on the frequent itemsets
from mlxtend.frequent_patterns import association_rules

# compute and print the association rules
association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Dark Chocolate Minis),(Organic Heritage Flakes Cereal),0.021157,0.031030,0.014104,0.666667,21.484848,0.013448,2.906911
1,(Dark Chocolate Minis),(Dark & Mint Filled Chocolate Squares),0.021157,0.024683,0.011989,0.566667,22.958095,0.011467,2.250732
2,"(Dark Chocolate Minis, Dark & Mint Filled Choc...",(Organic Heritage Flakes Cereal),0.011989,0.031030,0.008463,0.705882,22.748663,0.008091,3.294499
3,"(Dark Chocolate Minis, Organic Heritage Flakes...",(Dark & Mint Filled Chocolate Squares),0.014104,0.024683,0.008463,0.600000,24.308571,0.008114,2.438293
4,"(Organic Heritage Flakes Cereal, Dark & Mint F...",(Dark Chocolate Minis),0.013399,0.021157,0.008463,0.631579,29.852632,0.008179,2.656861
...,...,...,...,...,...,...,...,...,...
3529,(Mixed Vegetables),"(80 Vodka Holiday Edition, Jalapeno Pepper)",0.005642,0.008463,0.005642,1.000000,118.166667,0.005594,inf
3530,(Hibiscus Organic Raw Kombucha),(Passionberry Bliss Kombucha Drink),0.009168,0.006347,0.004937,0.538462,84.837607,0.004878,2.152915
3531,(Passionberry Bliss Kombucha Drink),(Hibiscus Organic Raw Kombucha),0.006347,0.009168,0.004937,0.777778,84.837607,0.004878,4.458745
3532,(Chocolate Peppermint Stick Bar),(Imported Mineral Water),0.004937,0.004937,0.004937,1.000000,202.571429,0.004912,inf


**Reference**


[The FP Growth algorithm](https://towardsdatascience.com/the-fp-growth-algorithm-1ffa20e839b8)